Merge v5 anchor into research-block256

Performance relative to v5 anchor:
| CONFIG |  b2  | PSNR_YUV | Enc Time | Dec Time |
| :----: | :--: | :------: | :------: | :------: |
|   AI   |  w/o |  +0.04%  |   102%   |   109%   |
|        | only |  +0.02%  |   104%   |   109%   |
|        |      |          |          |          |
|   RA   |  w/o |  -0.47%  |   116%   |   105%   |
|        | only |  -0.16%  |   132%   |   104%   |
|        |      |          |          |          |
|   LD   |  w/o |  -0.41%  |   118%   |   107%   |
|        | only |  -0.47%  |   129%   |   103%   |
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4ea7422..7b4fbe2 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -39,13 +39,23 @@
       # Run clang-format check.
       for f in $(git diff --diff-filter=ACMR --name-only $DIFF_REF '*.[hc]pp' '*.cc' '*.[ch]' \
         | grep -v third_party); do
-        clang-format -i --style=file $f -n -Werror
+        clang-format -i --style=file $f -n -Werror || exit_code=$?
+        if [ ${exit_code} -ne 0 ]; then
+          echo "Please format your code by following instructions here:"
+          echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#style-check"
+          exit 1
+        fi
       done
     - |
       # Run cmake-format check.
       for f in $(git diff --diff-filter=ACMR --name-only $DIFF_REF '*.cmake' 'CMakeLists.txt' \
         | grep -v third_party); do
-        cmake-format --check $f
+        cmake-format --check $f || exit_code=$?
+        if [ ${exit_code} -ne 0 ]; then
+          echo "Please format your code by following instructions here:"
+          echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#style-check"
+          exit 1
+        fi
       done
   rules:
     - if: '$CI_PIPELINE_SOURCE == "schedule"'
@@ -237,7 +247,16 @@
       echo "Extra CMake Flags: $EXTRA_CMAKE_FLAGS"
       echo "Configuration:     $AOM_BUILD_CONFIG"
     - cmake -B aom_build -S . -GNinja -DCMAKE_BUILD_TYPE=Release $CMAKE_FLAGS $EXTRA_CMAKE_FLAGS
-    - cmake --build aom_build -j 2
+    - cmake --build aom_build -j 2 || exit_code=$?
+    - |
+      if [ ${exit_code} -ne 0 ]; then
+        echo "You may reproduce the compile failure by following instructions here:"
+        echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#build-avm-in-various-configurations"
+        echo "Using following values:"
+        echo "CMAKE_FLAGS = ${CMAKE_FLAGS}"
+        echo "EXTRA_CMAKE_FLAGS = ${EXTRA_CMAKE_FLAGS}"
+        exit 1
+      fi
     - cmake --build aom_build --target dist
     - DESTDIR="${CI_PROJECT_DIR}/${INSTALLROOT_FOLDER}" cmake --build aom_build --target install/strip
   needs: []
@@ -549,14 +568,20 @@
     - |
       # Looking for sanitizer output in log...
       grep -q "\(ERROR\|WARNING\): \(Address\|Thread\|Memory\|Leak\)Sanitizer:" sanitizer.log && {
-        echo "Found sanitizer errors or warnings, check the log:"
+        echo "Found sanitizer errors or warnings, check the log below:"
         cat sanitizer.log
+        echo "You may reproduce sanitizer builds and tests by following instructions below: "
+        echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#build-unit-tests-with-sanitizers and"
+        echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#run-unit-tests-with-sanitizers"
         exit 1
       }
       # Looking for UBSan output in log (differs from the common format)
       grep -q ":[[:digit:]]\+:[[:digit:]]\+: runtime error:" sanitizer.log && {
-        echo "Found sanitizer errors or warnings, check the log:"
+        echo "Found sanitizer errors or warnings, check the log below:"
         cat sanitizer.log
+        echo "You may reproduce sanitizer builds and tests by following instructions below: "
+        echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#build-unit-tests-with-sanitizers and"
+        echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#run-unit-tests-with-sanitizers"
         exit 1
       }
       echo "No sanitizer errors found"
@@ -621,7 +646,7 @@
   interruptible: true
   variables:
     AOMENC_LIMIT: 30
-    AOMENC_QP: 128
+    AOMENC_QP: 210
     AOMENC_INPUT: Vertical_Bayshore_270x480_2997.y4m
     AOMENC: installroot/usr/local/bin/aomenc
   before_script:
@@ -757,6 +782,67 @@
   needs:
     - 'Previous Build (x86_64-linux-gcc): [encode-only]'
 
+# Decode encoded streams and verify that number of frames is as expected.
+.dec-run-common:
+  stage: test
+  interruptible: true
+  variables:
+    AOMENC_LIMIT: 30  # Should match the same variable in `.enc-run-common`
+    AOMDEC: installroot/usr/local/bin/aomdec
+  script:
+    - ${AOMDEC} ${AOMENC_OUTPUT}.obu -o ${AOMDEC_OUTPUT}.decoded.y4m --summary
+       2>&1 | tee "${AOMDEC_OUTPUT}.summary.log"
+    - '[ -f "${AOMDEC_OUTPUT}.decoded.y4m" ] || exit 1'
+    - '[ -f "${AOMDEC_OUTPUT}.summary.log" ] || exit 1'
+    - |
+      for str in 'decoded frames' 'showed frames'; do
+        frame_count=$(grep -E -o "[0-9]+ ${str}" "${AOMDEC_OUTPUT}.summary.log" | sed -E "s/([0-9]+) ${str}/\1/g")
+        echo "${str} = ${frame_count}"
+        if [[ ${frame_count} -ne ${AOMENC_LIMIT} ]]; then
+          echo "ERROR: Unexpected number of ${str}. Got ${frame_count}, expected ${AOMENC_LIMIT}"
+          exit 1
+        fi
+      done
+  artifacts:
+    when: always
+    paths:
+      - ${AOMDEC_OUTPUT}.*
+  rules:
+    - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
+    - if: '$CI_PIPELINE_SOURCE == "schedule"'
+      when: never
+
+
+Dec Run (All-intra):
+  extends: .dec-run-common
+  variables:
+    AOMENC_OUTPUT: all-intra
+    AOMDEC_OUTPUT: dec-all-intra
+  needs:
+    - 'Enc Run (All-intra)'
+    - 'Build (x86_64-linux-gcc): [decode-only]'
+
+
+Dec Run (Random Access):
+  extends: .dec-run-common
+  variables:
+    AOMENC_OUTPUT: random-access
+    AOMDEC_OUTPUT: dec-random-access
+  needs:
+    - 'Enc Run (Random Access)'
+    - 'Build (x86_64-linux-gcc): [decode-only]'
+
+
+Dec Run (Low-delay):
+  extends: .dec-run-common
+  variables:
+    AOMENC_OUTPUT: low-delay
+    AOMDEC_OUTPUT: dec-low-delay
+  needs:
+    - 'Enc Run (Low-delay)'
+    - 'Build (x86_64-linux-gcc): [decode-only]'
+
+
 Enc compare:
   stage: report
   interruptible: true
diff --git a/.gitlab/UBSan.supp b/.gitlab/UBSan.supp
index b0000e8..357d7d1 100644
--- a/.gitlab/UBSan.supp
+++ b/.gitlab/UBSan.supp
@@ -38,8 +38,6 @@
 implicit-signed-integer-truncation:av1_fdct8x64_new_sse2
 
 # nullptr-with-offset warnings.
-pointer-overflow:file_read
-pointer-overflow:av1_pack_bitstream
 pointer-overflow:vfilter8
 pointer-overflow:highbd_vfilter8
 
diff --git a/.gitlab/ci_nightly.yml b/.gitlab/ci_nightly.yml
index d5adad8..fd0544d 100644
--- a/.gitlab/ci_nightly.yml
+++ b/.gitlab/ci_nightly.yml
@@ -261,6 +261,7 @@
 
 Linux Sanitizer (thread) Test Nightly:
   extends: .sanitizer-common-nigtly
+  parallel: 16
   variables:
     AOM_SANITIZER_TYPE: thread
   needs:
diff --git a/aom/aom_encoder.h b/aom/aom_encoder.h
index 63aa16c..b544fa3 100644
--- a/aom/aom_encoder.h
+++ b/aom/aom_encoder.h
@@ -122,8 +122,12 @@
       int partition_id;
       /*!\brief size of the visible frame in this packet */
       size_t vis_frame_size;
-    } frame;                            /**< data for compressed frame packet */
-    aom_fixed_buf_t twopass_stats;      /**< data for two-pass packet */
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      /*!\brief the number of frames in this packet */
+      int frame_count;
+#endif                             // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    } frame;                       /**< data for compressed frame packet */
+    aom_fixed_buf_t twopass_stats; /**< data for two-pass packet */
     aom_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */
     struct aom_psnr_pkt {
       unsigned int samples[4]; /**< Number of samples, total/y/u/v */
@@ -324,6 +328,18 @@
    */
   unsigned int enable_bawp;
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  /*!\brief enable compound weighted prediction
+   *
+   */
+  unsigned int enable_cwp;
+#endif  // CONFIG_BAWP
+#if CONFIG_D071_IMP_MSK_BLD
+  /*!\brief enable implicit maksed blending
+   *
+   */
+  unsigned int enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
   /*!\brief enable Forward skip coding
    *
    */
@@ -334,6 +350,12 @@
    */
   unsigned int enable_orip;
 #endif  // CONFIG_ORIP
+#if CONFIG_IDIF
+  /*!\brief enable Intra Directional Interpolation Filter
+   *
+   */
+  unsigned int enable_idif;
+#endif  // CONFIG_IDIF
   /*!\brief enable Intra secondary transform
    *
    */
@@ -372,6 +394,13 @@
    */
   unsigned int enable_joint_mvd;
 #endif
+#if CONFIG_REFINEMV
+  /*!\brief enable refine MV mode
+   *
+   */
+  unsigned int enable_refinemv;
+#endif  // CONFIG_REFINEMV
+
   /*!\brief enable flip and identity transform type
    *
    */
@@ -539,6 +568,12 @@
    *
    */
   unsigned int explicit_ref_frame_map;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  /*!\brief enable frame output order derivation based on order hint
+   *
+   */
+  unsigned int enable_frame_output_order;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   /*!\brief use reduced transform type set
    *
    */
diff --git a/aom/aomcx.h b/aom/aomcx.h
index 3d1150f..d8ed487 100644
--- a/aom/aomcx.h
+++ b/aom/aomcx.h
@@ -1261,6 +1261,11 @@
   /*!\brief Control to get frame info
    */
   AV1E_GET_FRAME_INFO = 165,
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  /*!\brief Control to set frame output order derivation method
+   */
+  AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION = 166,
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 };
 
 /*!\brief aom 1-D scaling mode
@@ -1751,6 +1756,11 @@
 AOM_CTRL_USE_TYPE(AV1E_SET_SUBGOP_CONFIG_PATH, const char *)
 #define AOM_CTRL_AV1E_SET_SUBGOP_CONFIG_PATH
 
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+AOM_CTRL_USE_TYPE(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, int)
+#define AOM_CTRL_AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+
 /*!\endcond */
 /*! @} - end defgroup aom_encoder */
 #ifdef __cplusplus
diff --git a/aom/aomdx.h b/aom/aomdx.h
index d8f3579..3755132 100644
--- a/aom/aomdx.h
+++ b/aom/aomdx.h
@@ -63,8 +63,10 @@
  * context.
  */
 typedef struct aom_inspect_init {
-  /*! Inspection callback. */
+  /*! Inspection callback (per frame). */
   aom_inspect_cb inspect_cb;
+  /*! Inspection callback (per superblock). */
+  aom_inspect_cb inspect_sb_cb;
 
   /*! Inspection context. */
   void *inspect_ctx;
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index e340390..89b6b7b 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -263,6 +263,22 @@
 specialize "aom_highbd_sad8x8", qw/sse2/;
 specialize qw/aom_highbd_sad8x8 sse2/;
 
+add_proto qw/unsigned int/, "aom_highbd_sad8x16", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride";
+specialize "aom_highbd_sad8x16", qw/sse2/;
+specialize qw/aom_highbd_sad8x16 sse2/;
+
+add_proto qw/unsigned int/, "aom_highbd_sad16x8", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride";
+if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") {
+specialize "aom_highbd_sad16x8", qw/sse2/;
+specialize qw/aom_highbd_sad16x8 sse2/;
+}
+
+add_proto qw/unsigned int/, "aom_highbd_sad16x16", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride";
+if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") {
+specialize "aom_highbd_sad16x16", qw/sse2/;
+specialize qw/aom_highbd_sad16x16 sse2/;
+}
+
 if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
   add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum";
   specialize qw/aom_get_blk_sse_sum sse2 avx2/;
@@ -296,8 +312,15 @@
     add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride";
     add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride, const uint16_t *second_pred";
     if ($w != 128 && $h != 128 && $w != 4 && $w != 256 && $h != 256) {
-      specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
-      specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
+      if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+        if (!($w == 16 && $h == 16) && !($w == 16 && $h == 8) && !($w == 16 && $h == 4)) {
+          specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
+          specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
+        }
+      } else {
+        specialize "aom_highbd_sad${w}x${h}", qw/sse2/;
+        specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/;
+      }  # CONFIG_UNEVEN_4WAY
     }
     add_proto qw/unsigned int/, "aom_highbd_dist_wtd_sad${w}x${h}_avg", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride, const uint16_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param";
   }
@@ -313,8 +336,15 @@
   specialize qw/aom_highbd_sad32x32   avx2 sse2/;
   specialize qw/aom_highbd_sad32x16   avx2 sse2/;
   specialize qw/aom_highbd_sad16x32   avx2 sse2/;
-  specialize qw/aom_highbd_sad16x16   avx2 sse2/;
-  specialize qw/aom_highbd_sad16x8    avx2 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_sad16x16   avx2/;
+    specialize qw/aom_highbd_sad16x8    avx2/;
+    specialize qw/aom_highbd_sad16x4    avx2/;
+  } else {
+    specialize qw/aom_highbd_sad16x16   avx2 sse2/;
+    specialize qw/aom_highbd_sad16x8    avx2 sse2/;
+    specialize qw/aom_highbd_sad16x4    avx2 sse2/;
+  } # CONFIG_UNEVEN_4WAY
   specialize qw/aom_highbd_sad8x16         sse2/;
   specialize qw/aom_highbd_sad8x8          sse2/;
   specialize qw/aom_highbd_sad8x4          sse2/;
@@ -322,7 +352,6 @@
   specialize qw/aom_highbd_sad4x4          sse2/;
 
   specialize qw/aom_highbd_sad4x16         sse2/;
-  specialize qw/aom_highbd_sad16x4    avx2 sse2/;
   specialize qw/aom_highbd_sad8x32         sse2/;
   specialize qw/aom_highbd_sad32x8    avx2 sse2/;
   specialize qw/aom_highbd_sad16x64   avx2 sse2/;
@@ -340,8 +369,13 @@
   specialize qw/aom_highbd_sad_skip_32x32   avx2 sse2/;
   specialize qw/aom_highbd_sad_skip_32x16   avx2 sse2/;
   specialize qw/aom_highbd_sad_skip_16x32   avx2 sse2/;
-  specialize qw/aom_highbd_sad_skip_16x16   avx2 sse2/;
-  specialize qw/aom_highbd_sad_skip_16x8    avx2 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_sad_skip_16x16   avx2/;
+    specialize qw/aom_highbd_sad_skip_16x8    avx2/;
+  } else {
+    specialize qw/aom_highbd_sad_skip_16x16   avx2 sse2/;
+    specialize qw/aom_highbd_sad_skip_16x8    avx2 sse2/;
+  }
   specialize qw/aom_highbd_sad_skip_8x16         sse2/;
   specialize qw/aom_highbd_sad_skip_8x8          sse2/;
   specialize qw/aom_highbd_sad_skip_4x8          sse2/;
@@ -364,8 +398,13 @@
   specialize qw/aom_highbd_sad32x32_avg   avx2 sse2/;
   specialize qw/aom_highbd_sad32x16_avg   avx2 sse2/;
   specialize qw/aom_highbd_sad16x32_avg   avx2 sse2/;
-  specialize qw/aom_highbd_sad16x16_avg   avx2 sse2/;
-  specialize qw/aom_highbd_sad16x8_avg    avx2 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_sad16x16_avg   avx2/;
+    specialize qw/aom_highbd_sad16x8_avg    avx2/;
+  } else {
+    specialize qw/aom_highbd_sad16x16_avg   avx2 sse2/;
+    specialize qw/aom_highbd_sad16x8_avg    avx2 sse2/;
+  }
   specialize qw/aom_highbd_sad8x4_avg     sse2/;
   specialize qw/aom_highbd_sad4x8_avg     sse2/;
   specialize qw/aom_highbd_sad4x4_avg     sse2/;
@@ -558,14 +597,22 @@
       # TODO(rachelbarker): When ext-partition-types is enabled, we currently
       # don't have vectorized 4x16 highbd variance functions
       if ($w == 4 && $h == 4) {
-          specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
+        specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1";
         specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1";
         specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1";
       }
-      if ($w != 128 && $h != 128 && $w != 4 && $w != 256 && $h != 256) {
-        specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
-        specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
-      }
+      if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+        if ($w != 128 && $h != 128 && $w != 4 && !($w == 16 && $h == 16) &&
+            !($w == 16 && $h == 8) && !($w == 16 && $h == 4) && $w != 256 && $h != 256) {
+          specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
+          specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
+        }
+      } else {
+        if ($w != 128 && $h != 128 && $w != 4 && $w != 256 && $h != 256) {
+          specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/;
+          specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/;
+        }
+      }  # CONFIG_UNEVEN_4WAY
 
       add_proto qw/uint32_t/, "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param";
     }
@@ -874,11 +921,19 @@
   specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2/;
 
   add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
-  # specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2 avx2/;
-  specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    # specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/;
+  } else {
+    # specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2 avx2/;
+    specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2 avx2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_12_sub_pixel_variance16x8 avx2/;
+  } else {
+    specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2 avx2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
   specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2/;
@@ -906,8 +961,11 @@
   specialize qw/aom_highbd_12_sub_pixel_variance32x8 sse2/;
 
   add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x4/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_12_sub_pixel_variance16x4 sse2 avx2/;
-
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_12_sub_pixel_variance16x4 avx2/;
+  } else {
+    specialize qw/aom_highbd_12_sub_pixel_variance16x4 sse2 avx2/;
+  }
 
   if (aom_config("CONFIG_BLOCK_256") eq "yes"){
     specialize qw/aom_highbd_10_sub_pixel_variance256x256 avx2/;
@@ -943,10 +1001,18 @@
   specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2 avx2/;
 
   add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2 avx2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_10_sub_pixel_variance16x16 avx2/;
+  } else {
+    specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2 avx2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2 avx2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_10_sub_pixel_variance16x8 avx2/;
+  } else {
+    specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2 avx2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
   specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2/;
@@ -971,7 +1037,11 @@
   specialize qw/aom_highbd_10_sub_pixel_variance32x8 sse2 avx2/;
 
   add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x4/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_10_sub_pixel_variance16x4 sse2 avx2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_10_sub_pixel_variance16x4 avx2/;
+  } else {
+    specialize qw/aom_highbd_10_sub_pixel_variance16x4 sse2 avx2/;
+  }
 
 
   if (aom_config("CONFIG_BLOCK_256") eq "yes"){
@@ -1008,10 +1078,18 @@
   specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2 avx2/;
 
   add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2 avx2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_8_sub_pixel_variance16x16 avx2/;
+  } else {
+    specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2 avx2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2 avx2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_8_sub_pixel_variance16x8 avx2/;
+  } else {
+    specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2 avx2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
   specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2/;
@@ -1036,7 +1114,11 @@
   specialize qw/aom_highbd_8_sub_pixel_variance32x8 sse2 avx2/;
 
   add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x4/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse";
-  specialize qw/aom_highbd_8_sub_pixel_variance16x4 sse2 avx2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") {
+    specialize qw/aom_highbd_8_sub_pixel_variance16x4 avx2/;
+  } else {
+    specialize qw/aom_highbd_8_sub_pixel_variance16x4 sse2 avx2/;
+  }
 
   #
   # Subpixel Avg Variance
@@ -1061,10 +1143,14 @@
   specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2/;
 
   add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") {
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred";
-  specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") {
+    specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred";
   specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2/;
@@ -1097,10 +1183,14 @@
   specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2/;
 
   add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") {
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred";
-  specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") {
+    specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred";
   specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2/;
@@ -1133,10 +1223,14 @@
   specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2/;
 
   add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") {
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred";
-  specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/;
+  if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") {
+    specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/;
+  }
 
   add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int  yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred";
   specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2/;
diff --git a/aom_dsp/binary_codes_reader.c b/aom_dsp/binary_codes_reader.c
index b9172b0..486b623 100644
--- a/aom_dsp/binary_codes_reader.c
+++ b/aom_dsp/binary_codes_reader.c
@@ -15,18 +15,18 @@
 #include "av1/common/common.h"
 
 uint16_t aom_read_primitive_quniform_(aom_reader *r,
-                                      uint16_t n ACCT_STR_PARAM) {
+                                      uint16_t n ACCT_INFO_PARAM) {
   if (n <= 1) return 0;
   const int l = get_msb(n) + 1;
   const int m = (1 << l) - n;
-  const int v = aom_read_literal(r, l - 1, ACCT_STR_NAME);
-  return v < m ? v : (v << 1) - m + aom_read_bit(r, ACCT_STR_NAME);
+  const int v = aom_read_literal(r, l - 1, ACCT_INFO_NAME);
+  return v < m ? v : (v << 1) - m + aom_read_bit(r, ACCT_INFO_NAME);
 }
 
 // Decode finite subexponential code that for a symbol v in [0, n-1] with
 // parameter k
 uint16_t aom_read_primitive_subexpfin_(aom_reader *r, uint16_t n,
-                                       uint16_t k ACCT_STR_PARAM) {
+                                       uint16_t k ACCT_INFO_PARAM) {
   int i = 0;
   int mk = 0;
 
@@ -35,11 +35,11 @@
     int a = (1 << b);
 
     if (n <= mk + 3 * a) {
-      return aom_read_primitive_quniform(r, n - mk, ACCT_STR_NAME) + mk;
+      return aom_read_primitive_quniform(r, n - mk, ACCT_INFO_NAME) + mk;
     }
 
-    if (!aom_read_bit(r, ACCT_STR_NAME)) {
-      return aom_read_literal(r, b, ACCT_STR_NAME) + mk;
+    if (!aom_read_bit(r, ACCT_INFO_NAME)) {
+      return aom_read_literal(r, b, ACCT_INFO_NAME) + mk;
     }
 
     i = i + 1;
@@ -51,7 +51,7 @@
 }
 
 uint16_t aom_read_primitive_refsubexpfin_(aom_reader *r, uint16_t n, uint16_t k,
-                                          uint16_t ref ACCT_STR_PARAM) {
+                                          uint16_t ref ACCT_INFO_PARAM) {
   return inv_recenter_finite_nonneg(
-      n, ref, aom_read_primitive_subexpfin(r, n, k, ACCT_STR_NAME));
+      n, ref, aom_read_primitive_subexpfin(r, n, k, ACCT_INFO_NAME));
 }
diff --git a/aom_dsp/binary_codes_reader.h b/aom_dsp/binary_codes_reader.h
index 4559ff2..4e35483 100644
--- a/aom_dsp/binary_codes_reader.h
+++ b/aom_dsp/binary_codes_reader.h
@@ -25,18 +25,19 @@
 #include "aom_dsp/bitreader.h"
 #include "aom_dsp/bitreader_buffer.h"
 
-#define aom_read_primitive_quniform(r, n, ACCT_STR_NAME) \
-  aom_read_primitive_quniform_(r, n ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_primitive_subexpfin(r, n, k, ACCT_STR_NAME) \
-  aom_read_primitive_subexpfin_(r, n, k ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_primitive_refsubexpfin(r, n, k, ref, ACCT_STR_NAME) \
-  aom_read_primitive_refsubexpfin_(r, n, k, ref ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_primitive_quniform(r, n, ACCT_INFO_NAME) \
+  aom_read_primitive_quniform_(r, n ACCT_INFO_ARG(ACCT_INFO_NAME))
+#define aom_read_primitive_subexpfin(r, n, k, ACCT_INFO_NAME) \
+  aom_read_primitive_subexpfin_(r, n, k ACCT_INFO_ARG(ACCT_INFO_NAME))
+#define aom_read_primitive_refsubexpfin(r, n, k, ref, ACCT_INFO_NAME) \
+  aom_read_primitive_refsubexpfin_(r, n, k, ref ACCT_INFO_ARG(ACCT_INFO_NAME))
 
-uint16_t aom_read_primitive_quniform_(aom_reader *r, uint16_t n ACCT_STR_PARAM);
+uint16_t aom_read_primitive_quniform_(aom_reader *r,
+                                      uint16_t n ACCT_INFO_PARAM);
 uint16_t aom_read_primitive_subexpfin_(aom_reader *r, uint16_t n,
-                                       uint16_t k ACCT_STR_PARAM);
+                                       uint16_t k ACCT_INFO_PARAM);
 uint16_t aom_read_primitive_refsubexpfin_(aom_reader *r, uint16_t n, uint16_t k,
-                                          uint16_t ref ACCT_STR_PARAM);
+                                          uint16_t ref ACCT_INFO_PARAM);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/aom_dsp/bitreader.c b/aom_dsp/bitreader.c
index 96fc999..b1c346e 100644
--- a/aom_dsp/bitreader.c
+++ b/aom_dsp/bitreader.c
@@ -31,7 +31,7 @@
 
 uint32_t aom_reader_tell(const aom_reader *r) { return od_ec_dec_tell(&r->ec); }
 
-uint32_t aom_reader_tell_frac(const aom_reader *r) {
+uint64_t aom_reader_tell_frac(const aom_reader *r) {
   return od_ec_dec_tell_frac(&r->ec);
 }
 
diff --git a/aom_dsp/bitreader.h b/aom_dsp/bitreader.h
index ee999ba..1a03823 100644
--- a/aom_dsp/bitreader.h
+++ b/aom_dsp/bitreader.h
@@ -33,41 +33,42 @@
 
 #if CONFIG_ACCOUNTING
 #include "av1/decoder/accounting.h"
-#define ACCT_STR_NAME acct_str
-#define ACCT_STR_PARAM , const char *ACCT_STR_NAME
-#define ACCT_STR_ARG(s) , s
+#define ACCT_INFO_NAME acct_info
+#define ACCT_INFO_PARAM , AccountingSymbolInfo acct_info
+#define ACCT_INFO_ARG(s) , s
 #else
-#define ACCT_STR_PARAM
-#define ACCT_STR_ARG(s)
+#define ACCT_INFO_PARAM
+#define ACCT_INFO_ARG(s)
 #endif
 
-#define aom_read(r, prob, ACCT_STR_NAME) \
-  aom_read_(r, prob ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read(r, prob, ACCT_INFO_NAME) \
+  aom_read_(r, prob ACCT_INFO_ARG(ACCT_INFO_NAME))
 #if CONFIG_BYPASS_IMPROVEMENT
-#define aom_read_bypass(r, ACCT_STR_NAME) \
-  aom_read_bypass_(r ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_bypass(r, ACCT_INFO_NAME) \
+  aom_read_bypass_(r ACCT_INFO_ARG(ACCT_INFO_NAME))
 #endif  // CONFIG_BYPASS_IMPROVEMENT
-#define aom_read_bit(r, ACCT_STR_NAME) \
-  aom_read_bit_(r ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_tree(r, tree, probs, ACCT_STR_NAME) \
-  aom_read_tree_(r, tree, probs ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_literal(r, bits, ACCT_STR_NAME) \
-  aom_read_literal_(r, bits ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_cdf(r, cdf, nsymbs, ACCT_STR_NAME) \
-  aom_read_cdf_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_symbol(r, cdf, nsymbs, ACCT_STR_NAME) \
-  aom_read_symbol_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_bit(r, ACCT_INFO_NAME) \
+  aom_read_bit_(r ACCT_INFO_ARG(ACCT_INFO_NAME))
+#define aom_read_tree(r, tree, probs, ACCT_INFO_NAME) \
+  aom_read_tree_(r, tree, probs ACCT_INFO_ARG(ACCT_INFO_NAME))
+#define aom_read_literal(r, bits, ACCT_INFO_NAME) \
+  aom_read_literal_(r, bits ACCT_INFO_ARG(ACCT_INFO_NAME))
+#define aom_read_cdf(r, cdf, nsymbs, ACCT_INFO_NAME) \
+  aom_read_cdf_(r, cdf, nsymbs ACCT_INFO_ARG(ACCT_INFO_NAME))
+#define aom_read_symbol(r, cdf, nsymbs, ACCT_INFO_NAME) \
+  aom_read_symbol_(r, cdf, nsymbs ACCT_INFO_ARG(ACCT_INFO_NAME))
 
 #if CONFIG_BYPASS_IMPROVEMENT
-#define aom_read_unary(r, bits, ACCT_STR_NAME) \
-  aom_read_unary_(r, bits ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_unary(r, bits, ACCT_INFO_NAME) \
+  aom_read_unary_(r, bits ACCT_INFO_ARG(ACCT_INFO_NAME))
 #endif  // CONFIG_BYPASS_IMPROVEMENT
 
 #if ENABLE_LR_4PART_CODE
-#define aom_read_4part(r, cdf, nsymb_bits, ACCT_STR_NAME) \
-  aom_read_4part_(r, cdf, nsymb_bits ACCT_STR_ARG(ACCT_STR_NAME))
-#define aom_read_4part_wref(r, ref_symb, cdf, nsymb_bits, ACCT_STR_NAME) \
-  aom_read_4part_wref_(r, ref_symb, cdf, nsymb_bits ACCT_STR_ARG(ACCT_STR_NAME))
+#define aom_read_4part(r, cdf, nsymb_bits, ACCT_INFO_NAME) \
+  aom_read_4part_(r, cdf, nsymb_bits ACCT_INFO_ARG(ACCT_INFO_NAME))
+#define aom_read_4part_wref(r, ref_symb, cdf, nsymb_bits, ACCT_INFO_NAME) \
+  aom_read_4part_wref_(r, ref_symb, cdf,                                  \
+                       nsymb_bits ACCT_INFO_ARG(ACCT_INFO_NAME))
 #endif  // ENABLE_LR_4PART_CODE
 
 #ifdef __cplusplus
@@ -99,15 +100,17 @@
 // Returns the position in the bit reader in bits.
 uint32_t aom_reader_tell(const aom_reader *r);
 
-// Returns the position in the bit reader in 1/8th bits.
-uint32_t aom_reader_tell_frac(const aom_reader *r);
+// Returns the position in the bit reader in 1/65536th bits.
+uint64_t aom_reader_tell_frac(const aom_reader *r);
 
 #if CONFIG_ACCOUNTING
-static INLINE void aom_process_accounting(const aom_reader *r ACCT_STR_PARAM) {
+static INLINE void aom_process_accounting(const aom_reader *r, int value,
+                                          SYMBOL_CODING_MODE coding_mode
+                                              ACCT_INFO_PARAM) {
   if (r->accounting != NULL) {
-    uint32_t tell_frac;
+    uint64_t tell_frac;
     tell_frac = aom_reader_tell_frac(r);
-    aom_accounting_record(r->accounting, ACCT_STR_NAME,
+    aom_accounting_record(r->accounting, value, coding_mode, ACCT_INFO_NAME,
                           tell_frac - r->accounting->last_tell_frac);
     r->accounting->last_tell_frac = tell_frac;
   }
@@ -134,7 +137,7 @@
 }
 #endif
 
-static INLINE int aom_read_(aom_reader *r, int prob ACCT_STR_PARAM) {
+static INLINE int aom_read_(aom_reader *r, int prob ACCT_INFO_PARAM) {
   int p = (0x7FFFFF - (prob << 15) + prob) >> 8;
   int bit = od_ec_decode_bool_q15(&r->ec, p);
 
@@ -173,7 +176,8 @@
 #endif  // CONFIG_BITSTREAM_DEBUG
 
 #if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+  if (ACCT_INFO_NAME.c_file)
+    aom_process_accounting(r, bit, SYMBOL_BIT, ACCT_INFO_NAME);
 #if CONFIG_THROUGHPUT_ANALYSIS
   aom_update_symb_counts(r, 1, 0, 1);
 #else
@@ -223,13 +227,14 @@
 #endif  // CONFIG_BITSTREAM_DEBUG
 
 #if CONFIG_BYPASS_IMPROVEMENT
-static INLINE int aom_read_bypass_(aom_reader *r ACCT_STR_PARAM) {
+static INLINE int aom_read_bypass_(aom_reader *r ACCT_INFO_PARAM) {
   int ret = od_ec_decode_literal_bypass(&r->ec, 1);
 #if CONFIG_BITSTREAM_DEBUG
   bitstream_queue_pop_literal(ret, 1);
 #endif  // CONFIG_BITSTREAM_DEBUG
 #if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+  if (ACCT_INFO_NAME.c_file)
+    aom_process_accounting(r, ret, SYMBOL_BIT_BYPASS, ACCT_INFO_NAME);
 #if CONFIG_THROUGHPUT_ANALYSIS
   aom_update_symb_counts(r, 1, 0, 1);
 #else
@@ -240,20 +245,21 @@
 }
 #endif  // CONFIG_BYPASS_IMPROVEMENT
 
-static INLINE int aom_read_bit_(aom_reader *r ACCT_STR_PARAM) {
+static INLINE int aom_read_bit_(aom_reader *r ACCT_INFO_PARAM) {
   int ret;
 #if CONFIG_BYPASS_IMPROVEMENT
-  ret = aom_read_bypass(r, NULL);
+  ret = aom_read_bypass(r, ACCT_INFO_NAME);
 #else
-  ret = aom_read(r, 128, NULL);  // aom_prob_half
+  ret = aom_read(r, 128, ACCT_INFO_NAME);  // aom_prob_half
 #endif  // CONFIG_BYPASS_IMPROVEMENT
 #if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+  if (ACCT_INFO_NAME.c_file)
+    aom_process_accounting(r, ret, SYMBOL_BIT_BYPASS, ACCT_INFO_NAME);
 #endif
   return ret;
 }
 
-static INLINE int aom_read_literal_(aom_reader *r, int bits ACCT_STR_PARAM) {
+static INLINE int aom_read_literal_(aom_reader *r, int bits ACCT_INFO_PARAM) {
 #if CONFIG_BYPASS_IMPROVEMENT
   int literal = 0;
   int n_bits = bits;
@@ -268,7 +274,8 @@
   bitstream_queue_pop_literal(literal, bits);
 #endif  // CONFIG_BITSTREAM_DEBUG
 #if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+  if (ACCT_INFO_NAME.c_file)
+    aom_process_accounting(r, literal, SYMBOL_LITERAL_BYPASS, ACCT_INFO_NAME);
 #if CONFIG_THROUGHPUT_ANALYSIS
   aom_update_symb_counts(r, 1, 0, bits);
 #else
@@ -285,7 +292,8 @@
 
 #if CONFIG_BYPASS_IMPROVEMENT
 // Deocode unary coded symbol with truncation at max_nbits.
-static INLINE int aom_read_unary_(aom_reader *r, int max_nbits ACCT_STR_PARAM) {
+static INLINE int aom_read_unary_(aom_reader *r,
+                                  int max_nbits ACCT_INFO_PARAM) {
   int ret = od_ec_decode_unary_bypass(&r->ec, max_nbits);
 #if CONFIG_BITSTREAM_DEBUG
   int nbits = ret < max_nbits ? ret + 1 : max_nbits;
@@ -294,7 +302,8 @@
 #endif  // CONFIG_BITSTREAM_DEBUG
 #if CONFIG_ACCOUNTING
   int n_bits = ret < max_nbits ? ret + 1 : max_nbits;
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+  if (ACCT_INFO_NAME.c_file)
+    aom_process_accounting(r, ret, SYMBOL_UNARY, ACCT_INFO_NAME);
 #if CONFIG_THROUGHPUT_ANALYSIS
   aom_update_symb_counts(r, 1, 0, n_bits);
 #else
@@ -306,7 +315,7 @@
 #endif  // CONFIG_BYPASS_IMPROVEMENT
 
 static INLINE int aom_read_cdf_(aom_reader *r, const aom_cdf_prob *cdf,
-                                int nsymbs ACCT_STR_PARAM) {
+                                int nsymbs ACCT_INFO_PARAM) {
   int symb;
   assert(cdf != NULL);
   symb = od_ec_decode_cdf_q15(&r->ec, cdf, nsymbs);
@@ -351,7 +360,8 @@
 #endif  // CONFIG_BITSTREAM_DEBUG
 
 #if CONFIG_ACCOUNTING
-  if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME);
+  if (ACCT_INFO_NAME.c_file)
+    aom_process_accounting(r, symb, SYMBOL_CDF, ACCT_INFO_NAME);
 #if CONFIG_THROUGHPUT_ANALYSIS
   aom_update_symb_counts(r, (nsymbs == 2), 1, 1);
 #else
@@ -362,9 +372,9 @@
 }
 
 static INLINE int aom_read_symbol_(aom_reader *r, aom_cdf_prob *cdf,
-                                   int nsymbs ACCT_STR_PARAM) {
+                                   int nsymbs ACCT_INFO_PARAM) {
   int ret;
-  ret = aom_read_cdf(r, cdf, nsymbs, ACCT_STR_NAME);
+  ret = aom_read_cdf(r, cdf, nsymbs, ACCT_INFO_NAME);
   if (r->allow_update_cdf) update_cdf(cdf, ret, nsymbs);
   return ret;
 }
@@ -378,22 +388,22 @@
 // (nsymb_bits - 3), (nsymb_bits - 3), (nsymb_bits - 2) or (nsymb_bits - 1)
 // bits, depending on the part.
 static INLINE int aom_read_4part_(aom_reader *r, aom_cdf_prob *cdf,
-                                  int nsymb_bits ACCT_STR_PARAM) {
+                                  int nsymb_bits ACCT_INFO_PARAM) {
   assert(nsymb_bits >= 3);
   int part_bits[4] = { (nsymb_bits - 3), (nsymb_bits - 3), (nsymb_bits - 2),
                        (nsymb_bits - 1) };
   int part_offs[4] = { 0, 1 << (nsymb_bits - 3), 1 << (nsymb_bits - 2),
                        1 << (nsymb_bits - 1) };
-  const int part = aom_read_symbol(r, cdf, 4, ACCT_STR_NAME);
-  return aom_read_literal(r, part_bits[part], ACCT_STR_NAME) + part_offs[part];
+  const int part = aom_read_symbol(r, cdf, 4, ACCT_INFO_NAME);
+  return aom_read_literal(r, part_bits[part], ACCT_INFO_NAME) + part_offs[part];
 }
 
 // Implements a nsymb_bits bit 4-part code that codes a symbol symb given a
 // reference ref_symb after recentering symb around ref_symb.
 static INLINE int aom_read_4part_wref_(aom_reader *r, int ref_symb,
                                        aom_cdf_prob *cdf,
-                                       int nsymb_bits ACCT_STR_PARAM) {
-  const int symb = aom_read_4part(r, cdf, nsymb_bits, ACCT_STR_NAME);
+                                       int nsymb_bits ACCT_INFO_PARAM) {
+  const int symb = aom_read_4part(r, cdf, nsymb_bits, ACCT_INFO_NAME);
   return inv_recenter_finite_nonneg(1 << nsymb_bits, ref_symb, symb);
 }
 #endif  // ENABLE_LR_4PART_CODE
diff --git a/aom_dsp/bitwriter_buffer.c b/aom_dsp/bitwriter_buffer.c
index 7e41949..5f76f15 100644
--- a/aom_dsp/bitwriter_buffer.c
+++ b/aom_dsp/bitwriter_buffer.c
@@ -92,6 +92,13 @@
 void aom_wb_write_primitive_quniform(struct aom_write_bit_buffer *wb,
                                      uint16_t n, uint16_t v) {
   if (n <= 1) return;
+  assert(v < n);
+  // Split the valid range into two.
+  // The encoded value is in the range [0, n), but in order to map a range
+  // which may not be a power of 2 onto a binary code, we split into the
+  // sub-ranges [0, m) and [m, n), where m is an intermediate point.
+  // Values in the range [0, m) then use one fewer bit than values in
+  // the range [m, n).
   const int l = get_msb(n) + 1;
   const int m = (1 << l) - n;
   if (v < m) {
diff --git a/aom_dsp/entcode.c b/aom_dsp/entcode.c
index c794edb..a49ae0d 100644
--- a/aom_dsp/entcode.c
+++ b/aom_dsp/entcode.c
@@ -21,9 +21,9 @@
   Return: The number of bits scaled by 2**OD_BITRES.
           This will always be slightly larger than the exact value (e.g., all
            rounding error is in the positive direction).*/
-uint32_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) {
-  uint32_t nbits;
-  int l;
+uint64_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) {
+  uint64_t nbits;
+  int64_t l;
   int i;
   /*To handle the non-integral number of bits still left in the encoder/decoder
      state, we compute the worst-case number of bits of val that must be
@@ -37,7 +37,7 @@
      probability of 1/(1 << n) might sometimes appear to use more than n bits.
     This may help explain the surprising result that a newly initialized
      encoder or decoder claims to have used 1 bit.*/
-  nbits = nbits_total << OD_BITRES;
+  nbits = (uint64_t)nbits_total << OD_BITRES;
   l = 0;
   for (i = OD_BITRES; i-- > 0;) {
     int b;
diff --git a/aom_dsp/entcode.h b/aom_dsp/entcode.h
index 0227c52..2e95945 100644
--- a/aom_dsp/entcode.h
+++ b/aom_dsp/entcode.h
@@ -33,14 +33,14 @@
 #define OD_EC_WINDOW_SIZE ((int)sizeof(od_ec_window) * CHAR_BIT)
 
 /*The resolution of fractional-precision bit usage measurements, i.e.,
-   3 => 1/8th bits.*/
-#define OD_BITRES (3)
+   16 => 1/65536th bits.*/
+#define OD_BITRES (16)
 
 #define OD_ICDF AOM_ICDF
 
 /*See entcode.c for further documentation.*/
 
-OD_WARN_UNUSED_RESULT uint32_t od_ec_tell_frac(uint32_t nbits_total,
+OD_WARN_UNUSED_RESULT uint64_t od_ec_tell_frac(uint32_t nbits_total,
                                                uint32_t rng);
 
 #endif  // AOM_AOM_DSP_ENTCODE_H_
diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c
index 02188ec..8cfb826 100644
--- a/aom_dsp/entdec.c
+++ b/aom_dsp/entdec.c
@@ -352,6 +352,6 @@
   Return: The number of bits scaled by 2**OD_BITRES.
           This will always be slightly larger than the exact value (e.g., all
            rounding error is in the positive direction).*/
-uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec) {
+uint64_t od_ec_dec_tell_frac(const od_ec_dec *dec) {
   return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng);
 }
diff --git a/aom_dsp/entdec.h b/aom_dsp/entdec.h
index 7ca2f29..442ae2e 100644
--- a/aom_dsp/entdec.h
+++ b/aom_dsp/entdec.h
@@ -82,7 +82,7 @@
 
 OD_WARN_UNUSED_RESULT int od_ec_dec_tell(const od_ec_dec *dec)
     OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec)
+OD_WARN_UNUSED_RESULT uint64_t od_ec_dec_tell_frac(const od_ec_dec *dec)
     OD_ARG_NONNULL(1);
 
 #ifdef __cplusplus
diff --git a/aom_dsp/entenc.c b/aom_dsp/entenc.c
index 8517cee..5f19c0e 100644
--- a/aom_dsp/entenc.c
+++ b/aom_dsp/entenc.c
@@ -467,7 +467,7 @@
   Return: The number of bits scaled by 2**OD_BITRES.
           This will always be slightly larger than the exact value (e.g., all
            rounding error is in the positive direction).*/
-uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc) {
+uint64_t od_ec_enc_tell_frac(const od_ec_enc *enc) {
   return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng);
 }
 
diff --git a/aom_dsp/entenc.h b/aom_dsp/entenc.h
index fbb35a0..9831601 100644
--- a/aom_dsp/entenc.h
+++ b/aom_dsp/entenc.h
@@ -78,7 +78,7 @@
 
 OD_WARN_UNUSED_RESULT int od_ec_enc_tell(const od_ec_enc *enc)
     OD_ARG_NONNULL(1);
-OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc)
+OD_WARN_UNUSED_RESULT uint64_t od_ec_enc_tell_frac(const od_ec_enc *enc)
     OD_ARG_NONNULL(1);
 
 void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src);
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c
index 5d39cd0..225d5a4 100644
--- a/aom_dsp/intrapred.c
+++ b/aom_dsp/intrapred.c
@@ -296,11 +296,51 @@
   }
 }
 
+#if CONFIG_BLEND_MODE
+#define BLEND_WEIGHT_MAX 32
+static const uint8_t blk_size_log2[65] = {
+  0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6
+};
+#endif  // CONFIG_BLEND_MODE
+
 static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
                                            int bw, int bh,
                                            const uint16_t *above,
                                            const uint16_t *left, int bd) {
   (void)bd;
+#if CONFIG_BLEND_MODE
+  const uint16_t bl = left[bh];   // estimated by bottom-left pixel
+  const uint16_t tr = above[bw];  // estimated by top-right pixel
+
+  uint16_t *pred = dst;
+  const int scale =
+      ROUND_POWER_OF_TWO((blk_size_log2[bh] - 2 + blk_size_log2[bw] - 2), 2);
+  assert(scale >= 0 && scale <= BLEND_WEIGHT_MAX - 1);
+  for (int r = 0; r < bh; r++) {
+    const int s_top =
+        BLEND_WEIGHT_MAX >>
+        AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((r << 1) >> scale));
+    const uint32_t l = left[r];
+    for (int c = 0; c < bw; c++) {
+      const int s_left =
+          BLEND_WEIGHT_MAX >>
+          AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((c << 1) >> scale));
+      const uint32_t top = above[c];
+      uint32_t predv = (above[c] * (bh - 1 - r) + bl * (r + 1)) * bw;
+      uint32_t predh = (left[r] * (bw - 1 - c) + tr * (c + 1)) * bh;
+      predv = (s_top * top * bw * bh + (BLEND_WEIGHT_MAX * 2 - s_top) * predv);
+      assert(predv < UINT_MAX);
+      predh = (s_left * l * bw * bh + (BLEND_WEIGHT_MAX * 2 - s_left) * predh);
+      assert(predh < UINT_MAX);
+
+      const int bits = 1 + 6 + blk_size_log2[bh] + blk_size_log2[bw];
+      pred[c] = divide_round((predv + predh), bits);
+    }
+    pred += stride;
+  }
+#else
   const uint16_t below_pred = left[bh - 1];   // estimated by bottom-left pixel
   const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
   const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
@@ -327,6 +367,7 @@
     }
     dst += stride;
   }
+#endif  // CONFIG_BLEND_MODE
 }
 
 static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
@@ -334,6 +375,29 @@
                                              const uint16_t *above,
                                              const uint16_t *left, int bd) {
   (void)bd;
+#if CONFIG_BLEND_MODE
+  const uint16_t bl = left[bh];  // estimated by bottom-left pixel
+
+  uint16_t *pred = dst;
+  const int scale =
+      ROUND_POWER_OF_TWO((blk_size_log2[bh] - 2 + blk_size_log2[bw] - 2), 2);
+  assert(scale >= 0 && scale <= BLEND_WEIGHT_MAX - 1);
+  for (int r = 0; r < bh; ++r) {
+    const int s_top =
+        BLEND_WEIGHT_MAX >>
+        AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((r << 1) >> scale));
+    for (int c = 0; c < bw; ++c) {
+      const uint32_t top = above[c];
+      uint32_t predv = (above[c] * (bh - 1 - r) + bl * (r + 1)) * bw;
+      assert(predv < UINT_MAX);
+      const int bits = 6 + blk_size_log2[bh] + blk_size_log2[bw];
+      pred[c] = divide_round(
+          (s_top * top * bw * bh + (BLEND_WEIGHT_MAX * 2 - s_top) * predv),
+          bits);
+    }
+    pred += stride;
+  }
+#else
   const uint16_t below_pred = left[bh - 1];  // estimated by bottom-left pixel
   const uint8_t *const sm_weights = sm_weight_arrays + bh;
   // scale = 2^sm_weight_log2_scale
@@ -358,6 +422,7 @@
     }
     dst += stride;
   }
+#endif  // CONFIG_BLEND_MODE
 }
 
 static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
@@ -365,6 +430,29 @@
                                              const uint16_t *above,
                                              const uint16_t *left, int bd) {
   (void)bd;
+#if CONFIG_BLEND_MODE
+  const uint16_t tr = above[bw];  // estimated by top-right pixel
+
+  uint16_t *pred = dst;
+  const int scale =
+      ROUND_POWER_OF_TWO((blk_size_log2[bh] - 2 + blk_size_log2[bw] - 2), 2);
+  assert(scale >= 0 && scale <= BLEND_WEIGHT_MAX - 1);
+  for (int r = 0; r < bh; r++) {
+    const uint32_t l = left[r];
+    for (int c = 0; c < bw; c++) {
+      const int s_left =
+          BLEND_WEIGHT_MAX >>
+          AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((c << 1) >> scale));
+      uint32_t predh = (left[r] * (bw - 1 - c) + tr * (c + 1)) * bh;
+      assert(predh < UINT_MAX);
+      const int bits = 6 + blk_size_log2[bh] + blk_size_log2[bw];
+      pred[c] = divide_round(
+          (s_left * l * (bw * bh) + (BLEND_WEIGHT_MAX * 2 - s_left) * predh),
+          bits);
+    }
+    pred += stride;
+  }
+#else
   const uint16_t right_pred = above[bw - 1];  // estimated by top-right pixel
   const uint8_t *const sm_weights = sm_weight_arrays + bw;
   // scale = 2^sm_weight_log2_scale
@@ -389,6 +477,7 @@
     }
     dst += stride;
   }
+#endif  // CONFIG_BLEND_MODE
 }
 
 static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
diff --git a/aom_dsp/psnr.c b/aom_dsp/psnr.c
index d6241e5..3220358 100644
--- a/aom_dsp/psnr.c
+++ b/aom_dsp/psnr.c
@@ -18,13 +18,15 @@
 #include "aom_dsp/psnr.h"
 #include "aom_scale/yv12config.h"
 
+#define MIN_SSE 0.5
+
 double aom_sse_to_psnr(double samples, double peak, double sse) {
-  if (sse > 0.0) {
-    const double psnr = 10.0 * log10(samples * peak * peak / sse);
-    return psnr > MAX_PSNR ? MAX_PSNR : psnr;
-  } else {
-    return MAX_PSNR;
-  }
+  const bool zero_sse = (sse < MIN_SSE);
+  if (zero_sse) sse = MIN_SSE;
+  assert(sse > 0.0);
+  double psnr = 10.0 * log10(samples * peak * peak / sse);
+  if (zero_sse) psnr = ceil(psnr);
+  return psnr;
 }
 
 static void encoder_highbd_variance64(const uint16_t *a, int a_stride,
diff --git a/aom_dsp/x86/highbd_intrapred_asm_sse2.asm b/aom_dsp/x86/highbd_intrapred_asm_sse2.asm
index 8ea91d2..022c60e 100644
--- a/aom_dsp/x86/highbd_intrapred_asm_sse2.asm
+++ b/aom_dsp/x86/highbd_intrapred_asm_sse2.asm
@@ -106,14 +106,14 @@
   pshuflw               m0, m0, 0x0
   punpcklqdq            m0, m0
 .loop:
-  mova   [dstq              ], m0
-  mova   [dstq           +16], m0
-  mova   [dstq+strideq*2    ], m0
-  mova   [dstq+strideq*2 +16], m0
-  mova   [dstq+strideq*4    ], m0
-  mova   [dstq+strideq*4 +16], m0
-  mova   [dstq+stride3q*2   ], m0
-  mova   [dstq+stride3q*2+16], m0
+  movu   [dstq              ], m0
+  movu   [dstq           +16], m0
+  movu   [dstq+strideq*2    ], m0
+  movu   [dstq+strideq*2 +16], m0
+  movu   [dstq+strideq*4    ], m0
+  movu   [dstq+strideq*4 +16], m0
+  movu   [dstq+stride3q*2   ], m0
+  movu   [dstq+stride3q*2+16], m0
   lea                 dstq, [dstq+strideq*8]
   dec              lines4d
   jnz .loop
@@ -214,14 +214,14 @@
   lea             stride3q, [strideq*3]
   mov              nlines4d, 4
 .loop:
-  mova    [dstq              ], m0
-  mova    [dstq           +16], m1
-  mova    [dstq+strideq*2    ], m0
-  mova    [dstq+strideq*2 +16], m1
-  mova    [dstq+strideq*4    ], m0
-  mova    [dstq+strideq*4 +16], m1
-  mova    [dstq+stride3q*2   ], m0
-  mova    [dstq+stride3q*2+16], m1
+  movu    [dstq              ], m0
+  movu    [dstq           +16], m1
+  movu    [dstq+strideq*2    ], m0
+  movu    [dstq+strideq*2 +16], m1
+  movu    [dstq+strideq*4    ], m0
+  movu    [dstq+strideq*4 +16], m1
+  movu    [dstq+stride3q*2   ], m0
+  movu    [dstq+stride3q*2+16], m1
   lea                 dstq, [dstq+strideq*8]
   dec             nlines4d
   jnz .loop
diff --git a/aom_dsp/x86/highbd_intrapred_sse2.c b/aom_dsp/x86/highbd_intrapred_sse2.c
index 4cc07a9..da7d2f3 100644
--- a/aom_dsp/x86/highbd_intrapred_sse2.c
+++ b/aom_dsp/x86/highbd_intrapred_sse2.c
@@ -107,16 +107,16 @@
 static INLINE void h_store_16_unpacklo(uint16_t **dst, const ptrdiff_t stride,
                                        const __m128i *row) {
   const __m128i val = _mm_unpacklo_epi64(*row, *row);
-  _mm_store_si128((__m128i *)*dst, val);
-  _mm_store_si128((__m128i *)(*dst + 8), val);
+  _mm_storeu_si128((__m128i *)*dst, val);
+  _mm_storeu_si128((__m128i *)(*dst + 8), val);
   *dst += stride;
 }
 
 static INLINE void h_store_16_unpackhi(uint16_t **dst, const ptrdiff_t stride,
                                        const __m128i *row) {
   const __m128i val = _mm_unpackhi_epi64(*row, *row);
-  _mm_store_si128((__m128i *)(*dst), val);
-  _mm_store_si128((__m128i *)(*dst + 8), val);
+  _mm_storeu_si128((__m128i *)(*dst), val);
+  _mm_storeu_si128((__m128i *)(*dst + 8), val);
   *dst += stride;
 }
 
@@ -483,8 +483,8 @@
   const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo);
   int i;
   for (i = 0; i < height; ++i, dst += stride) {
-    _mm_store_si128((__m128i *)dst, dc_dup);
-    _mm_store_si128((__m128i *)(dst + 8), dc_dup);
+    _mm_storeu_si128((__m128i *)dst, dc_dup);
+    _mm_storeu_si128((__m128i *)(dst + 8), dc_dup);
   }
 }
 
@@ -739,17 +739,17 @@
   const __m128i above1_u16 = _mm_load_si128((const __m128i *)(above + 8));
   int i;
   for (i = 0; i < 2; ++i) {
-    _mm_store_si128((__m128i *)dst, above0_u16);
-    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    _mm_storeu_si128((__m128i *)dst, above0_u16);
+    _mm_storeu_si128((__m128i *)(dst + 8), above1_u16);
     dst += stride;
-    _mm_store_si128((__m128i *)dst, above0_u16);
-    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    _mm_storeu_si128((__m128i *)dst, above0_u16);
+    _mm_storeu_si128((__m128i *)(dst + 8), above1_u16);
     dst += stride;
-    _mm_store_si128((__m128i *)dst, above0_u16);
-    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    _mm_storeu_si128((__m128i *)dst, above0_u16);
+    _mm_storeu_si128((__m128i *)(dst + 8), above1_u16);
     dst += stride;
-    _mm_store_si128((__m128i *)dst, above0_u16);
-    _mm_store_si128((__m128i *)(dst + 8), above1_u16);
+    _mm_storeu_si128((__m128i *)dst, above0_u16);
+    _mm_storeu_si128((__m128i *)(dst + 8), above1_u16);
     dst += stride;
   }
 }
@@ -901,17 +901,17 @@
   const __m128i row = _mm_set1_epi16((uint16_t)sum32);
   int i;
   for (i = 0; i < 2; ++i) {
-    _mm_store_si128((__m128i *)dst, row);
-    _mm_store_si128((__m128i *)(dst + 8), row);
+    _mm_storeu_si128((__m128i *)dst, row);
+    _mm_storeu_si128((__m128i *)(dst + 8), row);
     dst += stride;
-    _mm_store_si128((__m128i *)dst, row);
-    _mm_store_si128((__m128i *)(dst + 8), row);
+    _mm_storeu_si128((__m128i *)dst, row);
+    _mm_storeu_si128((__m128i *)(dst + 8), row);
     dst += stride;
-    _mm_store_si128((__m128i *)dst, row);
-    _mm_store_si128((__m128i *)(dst + 8), row);
+    _mm_storeu_si128((__m128i *)dst, row);
+    _mm_storeu_si128((__m128i *)(dst + 8), row);
     dst += stride;
-    _mm_store_si128((__m128i *)dst, row);
-    _mm_store_si128((__m128i *)(dst + 8), row);
+    _mm_storeu_si128((__m128i *)dst, row);
+    _mm_storeu_si128((__m128i *)(dst + 8), row);
     dst += stride;
   }
 }
diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c
index 1e29924..d288ef7 100644
--- a/aom_dsp/x86/highbd_variance_sse2.c
+++ b/aom_dsp/x86/highbd_variance_sse2.c
@@ -251,6 +251,7 @@
       const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
       const uint16_t *dst, ptrdiff_t dst_stride, int height,                 \
       unsigned int *sse, void *unused0, void *unused);
+
 #define DECLS(opt) \
   DECL(8, opt);    \
   DECL(16, opt)
@@ -394,6 +395,26 @@
     return (var >= 0) ? (uint32_t)var : 0;                                     \
   }
 
+#if CONFIG_UNEVEN_4WAY
+// TODO(any): Add back 16X16, 16X8, 16X4 after fixing alignment issues.
+#define FNS(opt)                          \
+  FN(128, 128, 16, 7, 7, opt, (int64_t)); \
+  FN(128, 64, 16, 7, 6, opt, (int64_t));  \
+  FN(64, 128, 16, 6, 7, opt, (int64_t));  \
+  FN(64, 64, 16, 6, 6, opt, (int64_t));   \
+  FN(64, 32, 16, 6, 5, opt, (int64_t));   \
+  FN(32, 64, 16, 5, 6, opt, (int64_t));   \
+  FN(32, 32, 16, 5, 5, opt, (int64_t));   \
+  FN(32, 16, 16, 5, 4, opt, (int64_t));   \
+  FN(16, 32, 16, 4, 5, opt, (int64_t));   \
+  FN(8, 16, 8, 3, 4, opt, (int64_t));     \
+  FN(8, 8, 8, 3, 3, opt, (int64_t));      \
+  FN(8, 4, 8, 3, 2, opt, (int64_t));      \
+  FN(8, 32, 8, 3, 5, opt, (int64_t));     \
+  FN(32, 8, 16, 5, 3, opt, (int64_t));    \
+  FN(16, 64, 16, 4, 6, opt, (int64_t));   \
+  FN(64, 16, 16, 6, 4, opt, (int64_t))
+#else
 #define FNS(opt)                          \
   FN(128, 128, 16, 7, 7, opt, (int64_t)); \
   FN(128, 64, 16, 7, 6, opt, (int64_t));  \
@@ -414,6 +435,7 @@
   FN(32, 8, 16, 5, 3, opt, (int64_t));    \
   FN(16, 64, 16, 4, 6, opt, (int64_t));   \
   FN(64, 16, 16, 6, 4, opt, (int64_t))
+#endif  // CONFIG_UNEVEN_4WAY
 
 FNS(sse2);
 
@@ -552,6 +574,23 @@
     return (var >= 0) ? (uint32_t)var : 0;                                     \
   }
 
+#if CONFIG_UNEVEN_4WAY
+// TODO(any): Add back 16X16, 16X8, 16X4 after fixing alignment issues.
+#define FNS(opt)                        \
+  FN(64, 64, 16, 6, 6, opt, (int64_t)); \
+  FN(64, 32, 16, 6, 5, opt, (int64_t)); \
+  FN(32, 64, 16, 5, 6, opt, (int64_t)); \
+  FN(32, 32, 16, 5, 5, opt, (int64_t)); \
+  FN(32, 16, 16, 5, 4, opt, (int64_t)); \
+  FN(16, 32, 16, 4, 5, opt, (int64_t)); \
+  FN(8, 16, 8, 3, 4, opt, (int64_t));   \
+  FN(8, 8, 8, 3, 3, opt, (int64_t));    \
+  FN(8, 4, 8, 3, 2, opt, (int64_t));    \
+  FN(8, 32, 8, 3, 5, opt, (int64_t));   \
+  FN(32, 8, 16, 5, 3, opt, (int64_t));  \
+  FN(16, 64, 16, 4, 6, opt, (int64_t)); \
+  FN(64, 16, 16, 6, 4, opt, (int64_t));
+#else
 #define FNS(opt)                        \
   FN(64, 64, 16, 6, 6, opt, (int64_t)); \
   FN(64, 32, 16, 6, 5, opt, (int64_t)); \
@@ -569,6 +608,7 @@
   FN(32, 8, 16, 5, 3, opt, (int64_t));  \
   FN(16, 64, 16, 4, 6, opt, (int64_t)); \
   FN(64, 16, 16, 6, 4, opt, (int64_t));
+#endif  // CONFIG_UNEVEN_4WAY
 
 FNS(sse2);
 
diff --git a/aom_dsp/x86/intrapred_avx2.c b/aom_dsp/x86/intrapred_avx2.c
index bbc2d51..aeb08e3 100644
--- a/aom_dsp/x86/intrapred_avx2.c
+++ b/aom_dsp/x86/intrapred_avx2.c
@@ -15,6 +15,9 @@
 #include "config/aom_dsp_rtcd.h"
 #include "aom_dsp/x86/intrapred_x86.h"
 #include "aom_dsp/x86/lpf_common_sse2.h"
+#if CONFIG_IDIF
+#include "av1/common/reconintra.h"
+#endif  // CONFIG_IDIF
 
 static DECLARE_ALIGNED(16, uint8_t, HighbdLoadMaskx[8][16]) = {
   { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
@@ -2692,3 +2695,2059 @@
   }
   return;
 }
+
+#if CONFIG_IDIF
+
+static INLINE __m256i highbd_clamp_epi16_avx2(__m256i u, int bd) {
+  const __m256i zero = _mm256_setzero_si256();
+  const int max_i = ((1 << bd) - 1) << POWER_DR_INTERP_FILTER;
+  const __m256i max = _mm256_set1_epi16(max_i);
+  __m256i t, clamped;
+
+  t = _mm256_max_epi16(u, zero);
+  clamped = _mm256_min_epi16(t, max);
+
+  return clamped;
+}
+
+static INLINE __m256i highbd_clamp_epi32_avx2(__m256i u, int bd) {
+  const __m256i zero = _mm256_setzero_si256();
+  const int max_i = ((1 << bd) - 1) << POWER_DR_INTERP_FILTER;
+  const __m256i max = _mm256_set1_epi32(max_i);
+  __m256i t, clamped;
+
+  t = _mm256_max_epi32(u, zero);
+  clamped = _mm256_min_epi32(t, max);
+
+  return clamped;
+}
+
+static AOM_FORCE_INLINE void highbd_dr_prediction_z1_4xN_internal_idif_avx2(
+    int N, __m128i *dst, const uint16_t *above, int dx, int mrl_index, int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((N + 4) - 1 + (mrl_index << 1));
+
+  assert(dx > 0);
+  __m256i a0, a1, a2, a3;
+  __m256i val0, val1;
+  __m128i a_mbase_x, max_base_x128, base_inc128, mask128;
+  __m256i f0, f1, f2, f3;
+
+  __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm_set1_epi16(above[max_base_x]);
+  max_base_x128 = _mm_set1_epi16(max_base_x);
+
+  int shift_i;
+  int x = dx * (1 + mrl_index);
+  for (int r = 0; r < N; r++) {
+    __m128i res1;
+
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        dst[i] = a_mbase_x;  // save 4 values
+      }
+      return;
+    }
+
+    // load refs
+    a0 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base - 1)));
+    a1 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base)));
+    a2 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base + 1)));
+    a3 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base + 2)));
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]);
+
+    // multiply and sum
+    val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0),
+                             _mm256_mullo_epi16(a1, f1));
+    val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2),
+                             _mm256_mullo_epi16(a3, f3));
+    val0 = _mm256_adds_epi16(val0, val1);
+
+    val0 = highbd_clamp_epi16_avx2(val0, bd);
+    val0 = _mm256_adds_epi16(val0, rnding);
+    val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER);
+
+    // discard values
+    res1 = _mm256_castsi256_si128(val0);
+    base_inc128 = _mm_setr_epi16(base, base + 1, base + 2, base + 3, base + 4,
+                                 base + 5, base + 6, base + 7);
+    mask128 = _mm_cmpgt_epi16(max_base_x128, base_inc128);
+    dst[r] = _mm_blendv_epi8(a_mbase_x, res1, mask128);
+    x += dx;
+  }
+}
+
+static AOM_FORCE_INLINE void
+highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(int N, __m128i *dst,
+                                                     const uint16_t *above,
+                                                     int dx, int mrl_index,
+                                                     int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((N + 4) - 1 + (mrl_index << 1));
+
+  assert(dx > 0);
+  __m256i a0, a1, a2, a3;
+  __m256i val0, val1;
+  __m128i a_mbase_x, max_base_x128, base_inc128, mask128;
+  __m256i f0, f1, f2, f3;
+
+  __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm_set1_epi16(above[max_base_x]);
+  max_base_x128 = _mm_set1_epi32(max_base_x);
+
+  int x = dx * (1 + mrl_index);
+  int shift_i;
+  for (int r = 0; r < N; r++) {
+    __m128i res1;
+
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        dst[i] = a_mbase_x;  // save 4 values
+      }
+      return;
+    }
+
+    // load refs
+    a0 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base - 1)));
+    a1 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base)));
+    a2 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 1)));
+    a3 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 2)));
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]);
+
+    // multiply and sum
+    val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0),
+                            _mm256_mullo_epi32(a1, f1));
+    val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2),
+                            _mm256_mullo_epi32(a3, f3));
+    val0 = _mm256_add_epi32(val0, val1);
+
+    // round shift
+    val0 = highbd_clamp_epi32_avx2(val0, bd);
+    val0 = _mm256_add_epi32(val0, rnding);
+    val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER);
+
+    // discard values
+    res1 = _mm256_castsi256_si128(val0);
+    res1 = _mm_packus_epi32(res1, res1);
+
+    base_inc128 = _mm_setr_epi32(base, base + 1, base + 2, base + 3);
+    mask128 = _mm_cmpgt_epi32(max_base_x128, base_inc128);
+    mask128 = _mm_packs_epi32(mask128, mask128);  // goto 16 bit
+    dst[r] = _mm_blendv_epi8(a_mbase_x, res1, mask128);
+    x += dx;
+  }
+}
+
+static void highbd_dr_prediction_z1_4xN_idif_avx2(
+    uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above,
+    const uint16_t *left, int dx, int dy, int bd, int mrl_index) {
+  (void)dy;
+  (void)left;
+  (void)bw;
+  assert(bw == 4);
+  int N = bh;
+  __m128i dstvec[16];
+
+  if (bd < 10) {
+    highbd_dr_prediction_z1_4xN_internal_idif_avx2(N, dstvec, above, dx,
+                                                   mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(N, dstvec, above, dx,
+                                                         mrl_index, bd);
+  }
+  for (int i = 0; i < N; i++) {
+    _mm_storel_epi64((__m128i *)(dst + stride * i), dstvec[i]);
+  }
+}
+
+static AOM_FORCE_INLINE void highbd_dr_prediction_z1_8xN_internal_idif_avx2(
+    int N, __m128i *dst, const uint16_t *above, int dx, int mrl_index, int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((N + 8) - 1 + (mrl_index << 1));
+
+  assert(dx > 0);
+  __m256i a0, a1, a2, a3;
+  __m256i val0, val1;
+  __m256i a_mbase_x, max_base_x256, base_inc256, mask256;
+  __m256i f0, f1, f2, f3;
+
+  __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm256_set1_epi16(above[max_base_x]);
+  max_base_x256 = _mm256_set1_epi16(max_base_x);
+
+  int shift_i;
+  int x = dx * (1 + mrl_index);
+  for (int r = 0; r < N; r++) {
+    __m256i res1;
+
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        dst[i] = _mm256_castsi256_si128(a_mbase_x);  // save 8 values
+      }
+      return;
+    }
+
+    // load refs
+    a0 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base - 1)));
+    a1 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base)));
+    a2 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base + 1)));
+    a3 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base + 2)));
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]);
+
+    val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0),
+                             _mm256_mullo_epi16(a1, f1));
+    val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2),
+                             _mm256_mullo_epi16(a3, f3));
+    val0 = _mm256_adds_epi16(val0, val1);
+
+    // round-shift
+    val0 = highbd_clamp_epi16_avx2(val0, bd);
+    val0 = _mm256_adds_epi16(val0, rnding);
+    val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER);
+
+    base_inc256 =
+        _mm256_setr_epi16(base, base + 1, base + 2, base + 3, base + 4,
+                          base + 5, base + 6, base + 7, 0, 0, 0, 0, 0, 0, 0, 0);
+
+    mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256);
+    res1 = _mm256_blendv_epi8(a_mbase_x, val0, mask256);
+    dst[r] = _mm256_castsi256_si128(res1);
+    x += dx;
+  }
+}
+
+static AOM_FORCE_INLINE void
+highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(int N, __m128i *dst,
+                                                     const uint16_t *above,
+                                                     int dx, int mrl_index,
+                                                     int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((N + 8) - 1 + (mrl_index << 1));
+
+  assert(dx > 0);
+  __m256i a0, a1, a2, a3;
+  __m256i val0, val1;
+  __m256i a_mbase_x, max_base_x256, base_inc256, mask256;
+  __m256i f0, f1, f2, f3;
+
+  __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm256_set1_epi16(above[max_base_x]);
+  max_base_x256 = _mm256_set1_epi32(max_base_x);
+
+  int shift_i;
+  int x = dx * (1 + mrl_index);
+  for (int r = 0; r < N; r++) {
+    __m256i res1;
+
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        dst[i] = _mm256_castsi256_si128(a_mbase_x);  // save 8 values
+      }
+      return;
+    }
+
+    // load refs
+    a0 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base - 1)));
+    a1 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base)));
+    a2 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 1)));
+    a3 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 2)));
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]);
+
+    // multiply and sum
+    val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0),
+                            _mm256_mullo_epi32(a1, f1));
+    val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2),
+                            _mm256_mullo_epi32(a3, f3));
+    val0 = _mm256_add_epi32(val0, val1);
+
+    // round shift
+    val0 = highbd_clamp_epi32_avx2(val0, bd);
+    val0 = _mm256_add_epi32(val0, rnding);
+    val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER);
+
+    res1 = _mm256_packus_epi32(
+        val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1)));
+
+    base_inc256 = _mm256_setr_epi32(base, base + 1, base + 2, base + 3,
+                                    base + 4, base + 5, base + 6, base + 7);
+
+    mask256 = _mm256_cmpgt_epi32(max_base_x256, base_inc256);
+    mask256 = _mm256_packs_epi32(
+        mask256, _mm256_castsi128_si256(
+                     _mm256_extracti128_si256(mask256, 1)));  // go to 16 bit
+    res1 = _mm256_blendv_epi8(a_mbase_x, res1, mask256);
+    dst[r] = _mm256_castsi256_si128(res1);
+    x += dx;
+  }
+}
+
+void highbd_dr_prediction_z1_8xN_idif_avx2(uint16_t *dst, ptrdiff_t stride,
+                                           int bw, int bh,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int dx, int dy,
+                                           int bd, int mrl_index) {
+  (void)left;
+  (void)dy;
+  (void)bw;
+  assert(bw == 8);
+  int N = bh;
+  __m128i dstvec[32];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_8xN_internal_idif_avx2(N, dstvec, above, dx,
+                                                   mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(N, dstvec, above, dx,
+                                                         mrl_index, bd);
+  }
+  for (int i = 0; i < N; i++) {
+    _mm_storeu_si128((__m128i *)(dst + stride * i), dstvec[i]);
+  }
+}
+
+static AOM_FORCE_INLINE void highbd_dr_prediction_z1_16xN_internal_idif_avx2(
+    int N, __m256i *dstvec, const uint16_t *above, int dx, int mrl_index,
+    int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((16 + N) - 1 + (mrl_index << 1));
+
+  __m256i a_mbase_x, max_base_x256, base_inc256, mask256;
+
+  __m256i a0, a1, a2, a3;
+  __m256i val0, val1;
+  __m256i f0, f1, f2, f3;
+
+  __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm256_set1_epi16(above[max_base_x]);
+  max_base_x256 = _mm256_set1_epi16(max_base_x);
+
+  int shift_i;
+  int x = dx * (1 + mrl_index);
+  for (int r = 0; r < N; r++) {
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        dstvec[i] = a_mbase_x;  // save 16 values
+      }
+      return;
+    }
+
+    // load refs
+    a0 = _mm256_loadu_si256((__m256i *)(above + base - 1));
+    a1 = _mm256_loadu_si256((__m256i *)(above + base));
+    a2 = _mm256_loadu_si256((__m256i *)(above + base + 1));
+    a3 = _mm256_loadu_si256((__m256i *)(above + base + 2));
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]);
+
+    val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0),
+                             _mm256_mullo_epi16(a1, f1));
+    val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2),
+                             _mm256_mullo_epi16(a3, f3));
+    val0 = _mm256_adds_epi16(val0, val1);
+
+    // clamp and round-shift
+    val0 = highbd_clamp_epi16_avx2(val0, bd);
+    val0 = _mm256_adds_epi16(val0, rnding);
+    val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER);
+
+    base_inc256 = _mm256_setr_epi16(base, base + 1, base + 2, base + 3,
+                                    base + 4, base + 5, base + 6, base + 7,
+                                    base + 8, base + 9, base + 10, base + 11,
+                                    base + 12, base + 13, base + 14, base + 15);
+
+    mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256);
+    dstvec[r] = _mm256_blendv_epi8(a_mbase_x, val0, mask256);
+    x += dx;
+  }
+}
+
+static AOM_FORCE_INLINE void
+highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(int N, __m256i *dstvec,
+                                                      const uint16_t *above,
+                                                      int dx, int mrl_index,
+                                                      int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((16 + N) - 1 + (mrl_index << 1));
+  __m256i a0, a1, a2, a3;
+  __m256i val0, val1;
+  __m256i f0, f1, f2, f3;
+  __m256i a_mbase_x, max_base_x256, base_inc256, mask256;
+
+  __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm256_set1_epi16(above[max_base_x]);
+  max_base_x256 = _mm256_set1_epi16(max_base_x);
+
+  int shift_i;
+  int x = dx * (1 + mrl_index);
+  for (int r = 0; r < N; r++) {
+    __m256i res[2], res1;
+
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        dstvec[i] = a_mbase_x;  // save 16 values
+      }
+      return;
+    }
+
+    a0 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base - 1)));
+    a1 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base)));
+    a2 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 1)));
+    a3 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 2)));
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]);
+
+    // multiply and sum
+    val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0),
+                            _mm256_mullo_epi32(a1, f1));
+    val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2),
+                            _mm256_mullo_epi32(a3, f3));
+    val0 = _mm256_add_epi32(val0, val1);
+
+    // round shift
+    val0 = highbd_clamp_epi32_avx2(val0, bd);
+    val0 = _mm256_add_epi32(val0, rnding);
+    val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER);
+
+    res[0] = _mm256_packus_epi32(
+        val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1)));
+
+    int mdif = max_base_x - base;
+    if (mdif > 8) {
+      a0 =
+          _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 7)));
+      a1 =
+          _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 8)));
+      a2 =
+          _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 9)));
+      a3 = _mm256_cvtepu16_epi32(
+          _mm_loadu_si128((__m128i *)(above + base + 10)));
+
+      // multiply and sum
+      val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0),
+                              _mm256_mullo_epi32(a1, f1));
+      val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2),
+                              _mm256_mullo_epi32(a3, f3));
+      val0 = _mm256_add_epi32(val0, val1);
+
+      // round shift
+      val0 = highbd_clamp_epi32_avx2(val0, bd);
+      val0 = _mm256_add_epi32(val0, rnding);
+      val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER);
+
+      res[1] = _mm256_packus_epi32(
+          val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1)));
+    } else {
+      res[1] = a_mbase_x;
+    }
+    res1 = _mm256_inserti128_si256(res[0], _mm256_castsi256_si128(res[1]),
+                                   1);  // 16 16bit values
+
+    base_inc256 = _mm256_setr_epi16(base, base + 1, base + 2, base + 3,
+                                    base + 4, base + 5, base + 6, base + 7,
+                                    base + 8, base + 9, base + 10, base + 11,
+                                    base + 12, base + 13, base + 14, base + 15);
+    mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256);
+    dstvec[r] = _mm256_blendv_epi8(a_mbase_x, res1, mask256);
+    x += dx;
+  }
+}
+
+static void highbd_dr_prediction_z1_16xN_idif_avx2(
+    uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above,
+    const uint16_t *left, int dx, int dy, int bd, int mrl_index) {
+  (void)left;
+  (void)dy;
+  (void)bw;
+  assert(bw == 16);
+  int N = bh;
+  __m256i dstvec[64];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_16xN_internal_idif_avx2(N, dstvec, above, dx,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(N, dstvec, above, dx,
+                                                          mrl_index, bd);
+  }
+  for (int i = 0; i < N; i++) {
+    _mm256_storeu_si256((__m256i *)(dst + stride * i), dstvec[i]);
+  }
+}
+
+static AOM_FORCE_INLINE void highbd_dr_prediction_z1_32xN_internal_idif_avx2(
+    int N, __m256i *dstvec, const uint16_t *above, int dx, int mrl_index,
+    int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((32 + N) - 1 + (mrl_index << 1));
+
+  __m256i a_mbase_x, max_base_x256, base_inc256, mask256;
+
+  __m256i a0, a1, a2, a3;
+  __m256i val0, val1;
+  __m256i f0, f1, f2, f3;
+
+  __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm256_set1_epi16(above[max_base_x]);
+  max_base_x256 = _mm256_set1_epi16(max_base_x);
+
+  int shift_i;
+  int x = dx * (1 + mrl_index);
+  for (int r = 0; r < N; r++) {
+    __m256i res;
+
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        dstvec[i] = a_mbase_x;  // save 32 values
+        dstvec[i + N] = a_mbase_x;
+      }
+      return;
+    }
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]);
+
+    for (int j = 0; j < 32; j += 16) {
+      int mdif = max_base_x - (base + j);
+      if (mdif <= 0) {
+        res = a_mbase_x;
+      } else {
+        // load refs
+        a0 = _mm256_loadu_si256((__m256i *)(above + base - 1 + j));
+        a1 = _mm256_loadu_si256((__m256i *)(above + base + j));
+        a2 = _mm256_loadu_si256((__m256i *)(above + base + 1 + j));
+        a3 = _mm256_loadu_si256((__m256i *)(above + base + 2 + j));
+
+        val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0),
+                                 _mm256_mullo_epi16(a1, f1));
+        val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2),
+                                 _mm256_mullo_epi16(a3, f3));
+        val0 = _mm256_adds_epi16(val0, val1);
+
+        // clamp and round-shift
+        val0 = highbd_clamp_epi16_avx2(val0, bd);
+        val0 = _mm256_adds_epi16(val0, rnding);
+        val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER);
+
+        base_inc256 = _mm256_setr_epi16(
+            base + j, base + j + 1, base + j + 2, base + j + 3, base + j + 4,
+            base + j + 5, base + j + 6, base + j + 7, base + j + 8,
+            base + j + 9, base + j + 10, base + j + 11, base + j + 12,
+            base + j + 13, base + j + 14, base + j + 15);
+
+        mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256);
+        res = _mm256_blendv_epi8(a_mbase_x, val0, mask256);
+      }
+      if (!j) {
+        dstvec[r] = res;
+      } else {
+        dstvec[r + N] = res;
+      }
+    }
+    x += dx;
+  }
+}
+
+static AOM_FORCE_INLINE void
+highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(int N, __m256i *dstvec,
+                                                      const uint16_t *above,
+                                                      int dx, int mrl_index,
+                                                      int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((32 + N) - 1 + (mrl_index << 1));
+
+  __m256i a_mbase_x, max_base_x256, base_inc256, mask256;
+
+  __m256i a0, a1, a2, a3;
+  __m256i val0, val1;
+  __m256i f0, f1, f2, f3;
+
+  __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm256_set1_epi16(above[max_base_x]);
+  max_base_x256 = _mm256_set1_epi16(max_base_x);
+
+  int shift_i;
+  int x = dx * (1 + mrl_index);
+  for (int r = 0; r < N; r++) {
+    __m256i res[2], res1;
+
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        dstvec[i] = a_mbase_x;  // save 32 values
+        dstvec[i + N] = a_mbase_x;
+      }
+      return;
+    }
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]);
+
+    for (int j = 0; j < 32; j += 16) {
+      int mdif = max_base_x - (base + j);
+      if (mdif <= 0) {
+        res1 = a_mbase_x;
+      } else {
+        a0 = _mm256_cvtepu16_epi32(
+            _mm_loadu_si128((__m128i *)(above + base - 1 + j)));
+        a1 = _mm256_cvtepu16_epi32(
+            _mm_loadu_si128((__m128i *)(above + base + j)));
+        a2 = _mm256_cvtepu16_epi32(
+            _mm_loadu_si128((__m128i *)(above + base + 1 + j)));
+        a3 = _mm256_cvtepu16_epi32(
+            _mm_loadu_si128((__m128i *)(above + base + 2 + j)));
+
+        // multiply and sum
+        val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0),
+                                _mm256_mullo_epi32(a1, f1));
+        val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2),
+                                _mm256_mullo_epi32(a3, f3));
+        val0 = _mm256_add_epi32(val0, val1);
+
+        // round shift
+        val0 = highbd_clamp_epi32_avx2(val0, bd);
+        val0 = _mm256_add_epi32(val0, rnding);
+        val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER);
+
+        res[0] = _mm256_packus_epi32(
+            val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1)));
+
+        if (mdif > 8) {
+          a0 = _mm256_cvtepu16_epi32(
+              _mm_loadu_si128((__m128i *)(above + base + 7 + j)));
+          a1 = _mm256_cvtepu16_epi32(
+              _mm_loadu_si128((__m128i *)(above + base + 8 + j)));
+          a2 = _mm256_cvtepu16_epi32(
+              _mm_loadu_si128((__m128i *)(above + base + 9 + j)));
+          a3 = _mm256_cvtepu16_epi32(
+              _mm_loadu_si128((__m128i *)(above + base + 10 + j)));
+
+          // multiply and sum
+          val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0),
+                                  _mm256_mullo_epi32(a1, f1));
+          val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2),
+                                  _mm256_mullo_epi32(a3, f3));
+          val0 = _mm256_add_epi32(val0, val1);
+
+          // round shift
+          val0 = highbd_clamp_epi32_avx2(val0, bd);
+          val0 = _mm256_add_epi32(val0, rnding);
+          val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER);
+
+          res[1] = _mm256_packus_epi32(
+              val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1)));
+        } else {
+          res[1] = a_mbase_x;
+        }
+        res1 = _mm256_inserti128_si256(res[0], _mm256_castsi256_si128(res[1]),
+                                       1);  // 16 16bit values
+        base_inc256 = _mm256_setr_epi16(
+            base + j, base + j + 1, base + j + 2, base + j + 3, base + j + 4,
+            base + j + 5, base + j + 6, base + j + 7, base + j + 8,
+            base + j + 9, base + j + 10, base + j + 11, base + j + 12,
+            base + j + 13, base + j + 14, base + j + 15);
+
+        mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256);
+        res1 = _mm256_blendv_epi8(a_mbase_x, res1, mask256);
+      }
+      if (!j) {
+        dstvec[r] = res1;
+      } else {
+        dstvec[r + N] = res1;
+      }
+    }
+    x += dx;
+  }
+}
+
+static void highbd_dr_prediction_z1_32xN_idif_avx2(int N, uint16_t *dst,
+                                                   ptrdiff_t stride,
+                                                   const uint16_t *above,
+                                                   int dx, int bd,
+                                                   int mrl_index) {
+  __m256i dstvec[128];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_32xN_internal_idif_avx2(N, dstvec, above, dx,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(N, dstvec, above, dx,
+                                                          mrl_index, bd);
+  }
+  for (int i = 0; i < N; i++) {
+    _mm256_storeu_si256((__m256i *)(dst + stride * i), dstvec[i]);
+    _mm256_storeu_si256((__m256i *)(dst + stride * i + 16), dstvec[i + N]);
+  }
+}
+
+static void highbd_dr_prediction_z1_64xN_internal_idif_avx2(
+    int N, uint16_t *dst, ptrdiff_t stride, const uint16_t *above, int dx,
+    int mrl_index, int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((64 + N) - 1 + (mrl_index << 1));
+
+  __m256i a_mbase_x, max_base_x256, base_inc256, mask256;
+
+  __m256i a0, a1, a2, a3;
+  __m256i val0, val1;
+  __m256i f0, f1, f2, f3;
+
+  __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm256_set1_epi16(above[max_base_x]);
+  max_base_x256 = _mm256_set1_epi16(max_base_x);
+
+  int shift_i;
+  int x = dx * (1 + mrl_index);
+  for (int r = 0; r < N; r++, dst += stride) {
+    __m256i res;
+
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        _mm256_storeu_si256((__m256i *)dst, a_mbase_x);  // save 32 values
+        _mm256_storeu_si256((__m256i *)(dst + 16), a_mbase_x);
+        _mm256_storeu_si256((__m256i *)(dst + 32), a_mbase_x);
+        _mm256_storeu_si256((__m256i *)(dst + 48), a_mbase_x);
+        dst += stride;
+      }
+      return;
+    }
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]);
+
+    for (int j = 0; j < 64; j += 16) {
+      int mdif = max_base_x - (base + j);
+      if (mdif <= 0) {
+        _mm256_storeu_si256((__m256i *)(dst + j), a_mbase_x);
+      } else {
+        // load refs
+        a0 = _mm256_loadu_si256((__m256i *)(above + base - 1 + j));
+        a1 = _mm256_loadu_si256((__m256i *)(above + base + j));
+        a2 = _mm256_loadu_si256((__m256i *)(above + base + 1 + j));
+        a3 = _mm256_loadu_si256((__m256i *)(above + base + 2 + j));
+
+        val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0),
+                                 _mm256_mullo_epi16(a1, f1));
+        val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2),
+                                 _mm256_mullo_epi16(a3, f3));
+        val0 = _mm256_adds_epi16(val0, val1);
+
+        // clamp and round-shift
+        val0 = highbd_clamp_epi16_avx2(val0, bd);
+        val0 = _mm256_adds_epi16(val0, rnding);
+        val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER);
+
+        base_inc256 = _mm256_setr_epi16(
+            base + j, base + j + 1, base + j + 2, base + j + 3, base + j + 4,
+            base + j + 5, base + j + 6, base + j + 7, base + j + 8,
+            base + j + 9, base + j + 10, base + j + 11, base + j + 12,
+            base + j + 13, base + j + 14, base + j + 15);
+
+        mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256);
+        res = _mm256_blendv_epi8(a_mbase_x, val0, mask256);
+        _mm256_storeu_si256((__m256i *)(dst + j), res);  // 16 16bit values
+      }
+    }
+    x += dx;
+  }
+}
+
+static void highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2(
+    int N, uint16_t *dst, ptrdiff_t stride, const uint16_t *above, int dx,
+    int mrl_index, int bd) {
+  const int frac_bits = 6;
+  const int max_base_x = ((64 + N) - 1 + (mrl_index << 1));
+
+  __m256i a0, a1, a2, a3;
+
+  __m256i a_mbase_x, max_base_x256, base_inc256, mask256;
+
+  __m256i val0, val1;
+  __m256i f0, f1, f2, f3;
+
+  __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1));
+
+  a_mbase_x = _mm256_set1_epi16(above[max_base_x]);
+  max_base_x256 = _mm256_set1_epi16(max_base_x);
+
+  int shift_i;
+  int x = dx * (1 + mrl_index);
+  for (int r = 0; r < N; r++, dst += stride) {
+    __m256i res[2], res1;
+
+    int base = x >> frac_bits;
+    if (base >= max_base_x) {
+      for (int i = r; i < N; ++i) {
+        _mm256_storeu_si256((__m256i *)dst, a_mbase_x);  // save 32 values
+        _mm256_storeu_si256((__m256i *)(dst + 16), a_mbase_x);
+        _mm256_storeu_si256((__m256i *)(dst + 32), a_mbase_x);
+        _mm256_storeu_si256((__m256i *)(dst + 48), a_mbase_x);
+        dst += stride;
+      }
+      return;
+    }
+
+    // load filter
+    shift_i = (x & 0x3F) >> 1;
+    f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]);
+    f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]);
+    f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]);
+    f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]);
+
+    for (int j = 0; j < 64; j += 16) {
+      int mdif = max_base_x - (base + j);
+      if (mdif <= 0) {
+        _mm256_storeu_si256((__m256i *)(dst + j), a_mbase_x);
+      } else {
+        a0 = _mm256_cvtepu16_epi32(
+            _mm_loadu_si128((__m128i *)(above + base - 1 + j)));
+        a1 = _mm256_cvtepu16_epi32(
+            _mm_loadu_si128((__m128i *)(above + base + j)));
+        a2 = _mm256_cvtepu16_epi32(
+            _mm_loadu_si128((__m128i *)(above + base + 1 + j)));
+        a3 = _mm256_cvtepu16_epi32(
+            _mm_loadu_si128((__m128i *)(above + base + 2 + j)));
+
+        // multiply and sum
+        val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0),
+                                _mm256_mullo_epi32(a1, f1));
+        val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2),
+                                _mm256_mullo_epi32(a3, f3));
+        val0 = _mm256_add_epi32(val0, val1);
+
+        // round shift
+        val0 = highbd_clamp_epi32_avx2(val0, bd);
+        val0 = _mm256_add_epi32(val0, rnding);
+        val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER);
+
+        res[0] = _mm256_packus_epi32(
+            val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1)));
+
+        if (mdif > 8) {
+          a0 = _mm256_cvtepu16_epi32(
+              _mm_loadu_si128((__m128i *)(above + base + 7 + j)));
+          a1 = _mm256_cvtepu16_epi32(
+              _mm_loadu_si128((__m128i *)(above + base + 8 + j)));
+          a2 = _mm256_cvtepu16_epi32(
+              _mm_loadu_si128((__m128i *)(above + base + 9 + j)));
+          a3 = _mm256_cvtepu16_epi32(
+              _mm_loadu_si128((__m128i *)(above + base + 10 + j)));
+
+          // multiply and sum
+          val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0),
+                                  _mm256_mullo_epi32(a1, f1));
+          val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2),
+                                  _mm256_mullo_epi32(a3, f3));
+          val0 = _mm256_add_epi32(val0, val1);
+
+          // round shift
+          val0 = highbd_clamp_epi32_avx2(val0, bd);
+          val0 = _mm256_add_epi32(val0, rnding);
+          val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER);
+
+          res[1] = _mm256_packus_epi32(
+              val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1)));
+        } else {
+          res[1] = a_mbase_x;
+        }
+        res1 = _mm256_inserti128_si256(res[0], _mm256_castsi256_si128(res[1]),
+                                       1);  // 16 16bit values
+        base_inc256 = _mm256_setr_epi16(
+            base + j, base + j + 1, base + j + 2, base + j + 3, base + j + 4,
+            base + j + 5, base + j + 6, base + j + 7, base + j + 8,
+            base + j + 9, base + j + 10, base + j + 11, base + j + 12,
+            base + j + 13, base + j + 14, base + j + 15);
+
+        mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256);
+        res1 = _mm256_blendv_epi8(a_mbase_x, res1, mask256);
+        _mm256_storeu_si256((__m256i *)(dst + j), res1);
+      }
+    }
+    x += dx;
+  }
+}
+
+static void highbd_dr_prediction_z1_64xN_idif_avx2(
+    uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above,
+    const uint16_t *left, int dx, int dy, int bd, int mrl_index) {
+  (void)left;
+  (void)dy;
+  (void)bw;
+  assert(bw == 64);
+  if (bd < 10) {
+    highbd_dr_prediction_z1_64xN_internal_idif_avx2(bh, dst, stride, above, dx,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2(
+        bh, dst, stride, above, dx, mrl_index, bd);
+  }
+}
+
+void av1_highbd_dr_prediction_z1_idif_avx2(uint16_t *dst, ptrdiff_t stride,
+                                           int bw, int bh,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int dx, int dy,
+                                           int bd, int mrl_index) {
+  switch (bw) {
+    case 4:
+      highbd_dr_prediction_z1_4xN_idif_avx2(dst, stride, bw, bh, above, left,
+                                            dx, dy, bd, mrl_index);
+      break;
+    case 8:
+      highbd_dr_prediction_z1_8xN_idif_avx2(dst, stride, bw, bh, above, left,
+                                            dx, dy, bd, mrl_index);
+      break;
+    case 16:
+      highbd_dr_prediction_z1_16xN_idif_avx2(dst, stride, bw, bh, above, left,
+                                             dx, dy, bd, mrl_index);
+      break;
+    case 32:
+      highbd_dr_prediction_z1_32xN_idif_avx2(bh, dst, stride, above, dx, bd,
+                                             mrl_index);
+      break;
+    case 64:
+      highbd_dr_prediction_z1_64xN_idif_avx2(dst, stride, bw, bh, above, left,
+                                             dx, dy, bd, mrl_index);
+      break;
+    default: break;
+  }
+  return;
+}
+
+static AOM_FORCE_INLINE __m256i highbd_dr_row8_idif_avx2(const uint16_t *above,
+                                                         const __m256i *filter,
+                                                         int base_x,
+                                                         int base_shift,
+                                                         int bd) {
+  // load refs
+  __m128i a0_x128, a1_x128, a2_x128, a3_x128;
+  a0_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift - 1));
+  a1_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift));
+  a2_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 1));
+  a3_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 2));
+
+  // load mask
+  a0_x128 = _mm_shuffle_epi8(a0_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+  a1_x128 = _mm_shuffle_epi8(a1_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+  a2_x128 = _mm_shuffle_epi8(a2_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+  a3_x128 = _mm_shuffle_epi8(a3_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+
+  __m256i a0_x, a1_x, a2_x, a3_x;
+  a0_x = _mm256_castsi128_si256(a0_x128);
+  a1_x = _mm256_castsi128_si256(a1_x128);
+  a2_x = _mm256_castsi128_si256(a2_x128);
+  a3_x = _mm256_castsi128_si256(a3_x128);
+
+  // multiply and sum
+  __m256i val0, val1;
+  val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0_x, filter[0]),
+                           _mm256_mullo_epi16(a1_x, filter[1]));
+  val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2_x, filter[2]),
+                           _mm256_mullo_epi16(a3_x, filter[3]));
+  val0 = _mm256_adds_epi16(val0, val1);
+
+  // round shift
+  val0 = highbd_clamp_epi16_avx2(val0, bd);
+  const __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1));
+  val0 = _mm256_adds_epi16(val0, rnding);
+  val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER);
+
+  return val0;
+}
+
+static AOM_FORCE_INLINE __m256i
+highbd_dr_row8_32bit_idif_avx2(const uint16_t *above, const __m256i *filter,
+                               int base_x, int base_shift, int bd) {
+  // load refs
+  __m128i a0_x128, a1_x128, a2_x128, a3_x128;
+  a0_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift - 1));
+  a1_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift));
+  a2_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 1));
+  a3_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 2));
+
+  // load mask
+  a0_x128 = _mm_shuffle_epi8(a0_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+  a1_x128 = _mm_shuffle_epi8(a1_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+  a2_x128 = _mm_shuffle_epi8(a2_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+  a3_x128 = _mm_shuffle_epi8(a3_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+
+  __m256i a0_x, a1_x, a2_x, a3_x;
+  a0_x = _mm256_cvtepu16_epi32(a0_x128);
+  a1_x = _mm256_cvtepu16_epi32(a1_x128);
+  a2_x = _mm256_cvtepu16_epi32(a2_x128);
+  a3_x = _mm256_cvtepu16_epi32(a3_x128);
+
+  // multiply and sum
+  __m256i val0, val1;
+  val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0_x, filter[0]),
+                          _mm256_mullo_epi32(a1_x, filter[1]));
+  val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2_x, filter[2]),
+                          _mm256_mullo_epi32(a3_x, filter[3]));
+  val0 = _mm256_add_epi32(val0, val1);
+
+  // round shift
+  val0 = highbd_clamp_epi32_avx2(val0, bd);
+  __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1));
+  val0 = _mm256_add_epi32(val0, rnding);
+  val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER);
+
+  __m256i resx = _mm256_packus_epi32(
+      val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1)));
+  return resx;
+}
+
+static INLINE void highbd_dr_z2_8x8_idif_avx2(int H, int W,
+                                              const uint16_t *above,
+                                              __m128i *dest, int r, int j,
+                                              int dx, int mrl_index, int bd) {
+  const int min_base_x = -((1 + mrl_index));
+  const int frac_bits_x = 6;
+
+  __m256i res;
+  __m128i resx;
+  int min_h = (H == 4) ? 4 : 8;
+  int min_w = (W == 4) ? 4 : 8;
+
+  for (int i = r; i < r + min_h; i++) {
+    assert(i < H);
+    assert(j < W);
+
+    int y = i + 1;
+    int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x;
+    int base_shift = 0;
+    if (base_x < (min_base_x - 1)) {
+      base_shift = (min_base_x - base_x - 1);
+    }
+
+    if (base_shift > min_w - 1) {
+      resx = _mm_setzero_si128();
+    } else {
+      // load filter
+      int shift_i = ((-(y + mrl_index) * dx) & 0x3F) >> 1;
+      __m256i f[4];
+      f[0] = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]);
+      f[1] = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]);
+      f[2] = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]);
+      f[3] = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]);
+
+      res = highbd_dr_row8_idif_avx2(above, f, base_x, base_shift, bd);
+      resx = _mm256_castsi256_si128(res);
+    }
+    dest[i - r] = resx;
+  }
+}
+
+static INLINE void highbd_dr_z2_32bit_8x8_idif_avx2(int H, int W,
+                                                    const uint16_t *above,
+                                                    __m128i *dest, int r, int j,
+                                                    int dx, int mrl_index,
+                                                    int bd) {
+  const int min_base_x = -((1 + mrl_index));
+  const int frac_bits_x = 6;
+
+  __m256i res;
+  __m128i resx;
+  // adapt if size is 4
+  int min_h = (H == 4) ? 4 : 8;
+  int min_w = (W == 4) ? 4 : 8;
+
+  for (int i = r; i < r + min_h; i++) {
+    assert(i < H);
+    assert(j < W);
+
+    int y = i + 1;
+    int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x;
+    int base_shift = 0;
+    if (base_x < (min_base_x - 1)) {
+      base_shift = (min_base_x - base_x - 1);
+    }
+
+    if (base_shift > min_w - 1) {
+      resx = _mm_setzero_si128();
+    } else {
+      // load filter
+      int shift_i = ((-(y + mrl_index) * dx) & 0x3F) >> 1;
+      __m256i f[4];
+      f[0] = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]);
+      f[1] = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]);
+      f[2] = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]);
+      f[3] = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]);
+
+      res = highbd_dr_row8_32bit_idif_avx2(above, f, base_x, base_shift, bd);
+      resx = _mm256_castsi256_si128(res);
+    }
+    dest[i - r] = resx;
+  }
+}
+
+static void highbd_dr_32bit_z2_8x8_tiling_idif_avx2(
+    int H, int W, uint16_t *dst, ptrdiff_t stride, const uint16_t *above,
+    const uint16_t *left, int dx, int dy, int mrl_index, int bd) {
+  // Directional prediction in a 8x8 tile.
+  // Sizes of 4x4, 4x8 and 8x4 are supported as well.
+  // Step 1. Predict from above.
+  // Step 2. Predict from left and transpose.
+  // Step 3. Merge results.
+
+  const int min_base_x = -((1 + mrl_index));
+  const int frac_bits_x = 6;
+
+  __m128i x_pred[8];
+  __m128i y_pred[8];
+  __m128i _y_pred[8];
+
+  for (int i = 0; i < 8; i++) {
+    x_pred[i] = _mm_setzero_si128();
+    y_pred[i] = _mm_setzero_si128();
+    _y_pred[i] = _mm_setzero_si128();
+  }
+
+  int min_h = (H == 4) ? 4 : 8;
+  int min_w = (W == 4) ? 4 : 8;
+
+  for (int r = 0; r < H; r += 8) {
+    for (int j = 0; j < W; j += min_w) {
+      assert((W - j) >= min_w);
+      assert((H - r) >= min_h);
+
+      if (bd < 10) {
+        highbd_dr_z2_8x8_idif_avx2(H, W, above, x_pred, r, j, dx, mrl_index,
+                                   bd);
+        highbd_dr_z2_8x8_idif_avx2(W, H, left, _y_pred, j, r, dy, mrl_index,
+                                   bd);
+      } else {
+        highbd_dr_z2_32bit_8x8_idif_avx2(H, W, above, x_pred, r, j, dx,
+                                         mrl_index, bd);
+        highbd_dr_z2_32bit_8x8_idif_avx2(W, H, left, _y_pred, j, r, dy,
+                                         mrl_index, bd);
+      }
+      highbd_transpose8x8_sse2(&_y_pred[0], &_y_pred[1], &_y_pred[2],
+                               &_y_pred[3], &_y_pred[4], &_y_pred[5],
+                               &_y_pred[6], &_y_pred[7], &y_pred[0], &y_pred[1],
+                               &y_pred[2], &y_pred[3], &y_pred[4], &y_pred[5],
+                               &y_pred[6], &y_pred[7]);
+
+      for (int k = 0; k < min_h; ++k) {
+        int y = r + k + 1;
+        int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x;
+        int base_min_diff = (min_base_x - base_x);
+        if (base_min_diff > min_w) {
+          base_min_diff = min_w;
+        } else {
+          if (base_min_diff < 0) base_min_diff = 0;
+        }
+
+        __m128i resx, resy, resxy;
+        resx = x_pred[k];
+        resy = y_pred[k];
+
+        resxy = _mm_blendv_epi8(resx, resy,
+                                *(__m128i *)HighbdBaseMask[base_min_diff]);
+
+        if (min_w == 8) {
+          _mm_storeu_si128((__m128i *)(dst + k * stride + j), resxy);
+        } else {
+          _mm_storel_epi64((__m128i *)(dst + k * stride + j), resxy);
+        }
+      }
+    }
+    if (r + 8 < H) dst += 8 * stride;
+  }
+}
+
+static void highbd_dr_z2_16x16_idif_avx2(int H, int W, const uint16_t *above,
+                                         __m256i *dest, int r, int j, int dx,
+                                         int mrl_index, int bd) {
+  (void)H;
+  (void)W;
+
+  const int min_base_x = -(1 + mrl_index);
+  const int frac_bits_x = 6;
+
+  __m128i a0_x128, a1_x128, a2_x128, a3_x128;
+  __m256i a0_x, a1_x, a2_x, a3_x;
+  __m256i f0_x, f1_x, f2_x, f3_x;
+  __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1));
+  __m256i val0, val1;
+
+  for (int i = r; i < r + 16; ++i) {
+    assert(i < H);
+    assert(j < W);
+    int y = i + 1;
+
+    int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x;
+    int base_shift = 0;
+    if ((base_x) < (min_base_x - 1)) {
+      base_shift = (min_base_x - (base_x)-1);
+    }
+
+    if (base_shift < 8) {
+      a0_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift - 1));
+      a1_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift));
+      a2_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 1));
+      a3_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 2));
+
+      a0_x128 =
+          _mm_shuffle_epi8(a0_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+      a1_x128 =
+          _mm_shuffle_epi8(a1_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+      a2_x128 =
+          _mm_shuffle_epi8(a2_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+      a3_x128 =
+          _mm_shuffle_epi8(a3_x128, *(__m128i *)HighbdLoadMaskx[base_shift]);
+
+      a0_x = _mm256_castsi128_si256(a0_x128);
+      a1_x = _mm256_castsi128_si256(a1_x128);
+      a2_x = _mm256_castsi128_si256(a2_x128);
+      a3_x = _mm256_castsi128_si256(a3_x128);
+    } else {
+      a0_x = _mm256_setzero_si256();
+      a1_x = _mm256_setzero_si256();
+      a2_x = _mm256_setzero_si256();
+      a3_x = _mm256_setzero_si256();
+    }
+
+    int base_shift1 = 0;
+    if (base_shift > 8) {
+      base_shift1 = base_shift - 8;
+    }
+    if (base_shift1 < 8) {
+      a0_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift1 + 7));
+      a1_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift1 + 8));
+      a2_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift1 + 9));
+      a3_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift1 + 10));
+
+      a0_x128 =
+          _mm_shuffle_epi8(a0_x128, *(__m128i *)HighbdLoadMaskx[base_shift1]);
+      a1_x128 =
+          _mm_shuffle_epi8(a1_x128, *(__m128i *)HighbdLoadMaskx[base_shift1]);
+      a2_x128 =
+          _mm_shuffle_epi8(a2_x128, *(__m128i *)HighbdLoadMaskx[base_shift1]);
+      a3_x128 =
+          _mm_shuffle_epi8(a3_x128, *(__m128i *)HighbdLoadMaskx[base_shift1]);
+
+      a0_x = _mm256_inserti128_si256(a0_x, a0_x128, 1);
+      a1_x = _mm256_inserti128_si256(a1_x, a1_x128, 1);
+      a2_x = _mm256_inserti128_si256(a2_x, a2_x128, 1);
+      a3_x = _mm256_inserti128_si256(a3_x, a3_x128, 1);
+    }
+    if ((base_shift < 8) || base_shift1 < 8) {
+      // load filter
+      int shift_x = ((-(i + 1 + mrl_index) * dx) & 0x3F) >> 1;
+      f0_x = _mm256_set1_epi16(av1_dr_interp_filter[shift_x][0]);
+      f1_x = _mm256_set1_epi16(av1_dr_interp_filter[shift_x][1]);
+      f2_x = _mm256_set1_epi16(av1_dr_interp_filter[shift_x][2]);
+      f3_x = _mm256_set1_epi16(av1_dr_interp_filter[shift_x][3]);
+
+      val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0_x, f0_x),
+                               _mm256_mullo_epi16(a1_x, f1_x));
+      val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2_x, f2_x),
+                               _mm256_mullo_epi16(a3_x, f3_x));
+      val0 = _mm256_adds_epi16(val0, val1);
+
+      val0 = highbd_clamp_epi16_avx2(val0, bd);
+      val0 = _mm256_adds_epi16(val0, rnding);
+      dest[i - r] = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER);
+    } else {
+      dest[i - r] = _mm256_setzero_si256();
+    }
+  }
+}
+
+static void highbd_dr_prediction_z2_HxW_idif_avx2(
+    int H, int W, uint16_t *dst, ptrdiff_t stride, const uint16_t *above,
+    const uint16_t *left, int dx, int dy, int mrl_index, int bd) {
+  // Directional prediction in 16x16 tiles.
+  // Step 1. Predict from above.
+  // Step 2. Predict from left and transpose.
+  // Step 3. Merge results.
+
+  const int min_base_x = -(1 + mrl_index);
+  const int frac_bits_x = 6;
+
+  __m256i x_pred[16];
+  __m256i y_pred[16];
+
+  for (int r = 0; r < H; r += 16) {
+    for (int j = 0; j < W; j += 16) {
+      assert((W - j) >= 16);
+      assert((H - r) >= 16);
+      // x calc
+      highbd_dr_z2_16x16_idif_avx2(H, W, above, x_pred, r, j, dx, mrl_index,
+                                   bd);
+
+      // y calc
+      highbd_dr_z2_16x16_idif_avx2(W, H, left, y_pred, j, r, dy, mrl_index, bd);
+      highbd_transpose16x16_avx2(y_pred, y_pred);
+
+      // merge results
+      for (int k = 0; k < 16; ++k) {
+        int y = k + r + 1;
+        int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x;
+        int base_min_diff = (min_base_x - base_x);
+        if (base_min_diff > 16) {
+          base_min_diff = 16;
+        } else {
+          if (base_min_diff < 0) base_min_diff = 0;
+        }
+
+        __m256i resx, resy, resxy;
+        resx = x_pred[k];
+        resy = y_pred[k];
+
+        resxy = _mm256_blendv_epi8(resx, resy,
+                                   *(__m256i *)HighbdBaseMask[base_min_diff]);
+        _mm256_storeu_si256((__m256i *)(dst + k * stride + j), resxy);
+      }
+    }  // for j
+    if (r + 16 < H) dst += 16 * stride;
+  }
+}
+
+static void highbd_dr_z2_16x16_32bit_idif_avx2(int H, int W,
+                                               const uint16_t *above,
+                                               __m256i *dest, int r, int j,
+                                               int dx, int mrl_index, int bd) {
+  (void)H;
+  (void)W;
+  const int min_base_x = -(1 + mrl_index);
+  const int frac_bits_x = 6;
+  __m256i resx[2];
+
+  for (int i = r; i < r + 16; ++i) {
+    assert(i < H);
+    assert(j < W);
+
+    int y = i + 1;
+
+    int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x;
+    int base_shift = 0;
+    if ((base_x) < (min_base_x - 1)) {
+      base_shift = (min_base_x - (base_x)-1);
+    }
+
+    // load filter
+    int shift_x = ((-(i + 1 + mrl_index) * dx) & 0x3F) >> 1;
+    __m256i f[4];
+    f[0] = _mm256_set1_epi32(av1_dr_interp_filter[shift_x][0]);
+    f[1] = _mm256_set1_epi32(av1_dr_interp_filter[shift_x][1]);
+    f[2] = _mm256_set1_epi32(av1_dr_interp_filter[shift_x][2]);
+    f[3] = _mm256_set1_epi32(av1_dr_interp_filter[shift_x][3]);
+
+    if (base_shift < 8) {
+      resx[0] =
+          highbd_dr_row8_32bit_idif_avx2(above, f, base_x, base_shift, bd);
+
+    } else {
+      resx[0] = _mm256_setzero_si256();
+    }
+
+    int base_shift1 = 0;
+    if (base_shift > 8) {
+      base_shift1 = base_shift - 8;
+    }
+    if (base_shift1 < 8) {
+      resx[1] =
+          highbd_dr_row8_32bit_idif_avx2(above, f, base_x + 8, base_shift1, bd);
+    }
+    if ((base_shift < 8) || base_shift1 < 8) {
+      dest[i - r] =
+          _mm256_inserti128_si256(resx[0], _mm256_castsi256_si128(resx[1]),
+                                  1);  // 16 16bit values
+    } else {
+      dest[i - r] = _mm256_setzero_si256();
+    }
+  }
+}
+
+static void highbd_dr_prediction_32bit_z2_HxW_idif_avx2(
+    int H, int W, uint16_t *dst, ptrdiff_t stride, const uint16_t *above,
+    const uint16_t *left, int dx, int dy, int mrl_index, int bd) {
+  // Directional prediction in 16x16 tiles.
+  // Step 1. Predict from above.
+  // Step 2. Predict from left and transpose.
+  // Step 3. Merge results.
+
+  const int min_base_x = -(1 + mrl_index);
+  const int frac_bits_x = 6;
+
+  __m256i x_pred[16];
+  __m256i y_pred[16];
+
+  for (int r = 0; r < H; r += 16) {
+    for (int j = 0; j < W; j += 16) {
+      assert((W - j) >= 16);
+      assert((H - r) >= 16);
+
+      // x calc
+      highbd_dr_z2_16x16_32bit_idif_avx2(H, W, above, x_pred, r, j, dx,
+                                         mrl_index, bd);
+
+      // y calc
+      highbd_dr_z2_16x16_32bit_idif_avx2(W, H, left, y_pred, j, r, dy,
+                                         mrl_index, bd);
+      highbd_transpose16x16_avx2(y_pred, y_pred);
+      // merge results
+      for (int k = 0; k < 16; ++k) {
+        int y = k + r + 1;
+        int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x;
+        int base_min_diff = (min_base_x - base_x);
+        if (base_min_diff > 16) {
+          base_min_diff = 16;
+        } else {
+          if (base_min_diff < 0) base_min_diff = 0;
+        }
+
+        __m256i resx, resy, resxy;
+        resx = x_pred[k];
+        resy = y_pred[k];
+
+        resxy = _mm256_blendv_epi8(resx, resy,
+                                   *(__m256i *)HighbdBaseMask[base_min_diff]);
+        _mm256_storeu_si256((__m256i *)(dst + k * stride + j), resxy);
+      }
+    }  // for j
+    if (r + 16 < H) dst += 16 * stride;
+  }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180 using IDIF
+void av1_highbd_dr_prediction_z2_idif_avx2(uint16_t *dst, ptrdiff_t stride,
+                                           int bw, int bh,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int dx, int dy,
+                                           int bd, int mrl_index) {
+  assert(dx > 0);
+  assert(dy > 0);
+  switch (bw) {
+    case 4:
+      highbd_dr_32bit_z2_8x8_tiling_idif_avx2(bh, bw, dst, stride, above, left,
+                                              dx, dy, mrl_index, bd);
+      break;
+    case 8:
+      highbd_dr_32bit_z2_8x8_tiling_idif_avx2(bh, bw, dst, stride, above, left,
+                                              dx, dy, mrl_index, bd);
+      break;
+    default:
+      if (bh < 16) {
+        highbd_dr_32bit_z2_8x8_tiling_idif_avx2(bh, bw, dst, stride, above,
+                                                left, dx, dy, mrl_index, bd);
+      } else {
+        if (bd < 10) {
+          highbd_dr_prediction_z2_HxW_idif_avx2(bh, bw, dst, stride, above,
+                                                left, dx, dy, mrl_index, bd);
+        } else {
+          highbd_dr_prediction_32bit_z2_HxW_idif_avx2(
+              bh, bw, dst, stride, above, left, dx, dy, mrl_index, bd);
+        }
+      }
+      break;
+  }
+}
+
+//  Directional prediction, zone 3 functions
+static void highbd_dr_prediction_z3_4x4_idif_avx2(uint16_t *dst,
+                                                  ptrdiff_t stride,
+                                                  const uint16_t *left, int dy,
+                                                  int bd, int mrl_index) {
+  __m128i dstvec[4], d[4];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_4xN_internal_idif_avx2(4, dstvec, left, dy,
+                                                   mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(4, dstvec, left, dy,
+                                                         mrl_index, bd);
+  }
+  highbd_transpose4x8_8x4_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2],
+                                   &dstvec[3], &d[0], &d[1], &d[2], &d[3]);
+  _mm_storel_epi64((__m128i *)(dst + 0 * stride), d[0]);
+  _mm_storel_epi64((__m128i *)(dst + 1 * stride), d[1]);
+  _mm_storel_epi64((__m128i *)(dst + 2 * stride), d[2]);
+  _mm_storel_epi64((__m128i *)(dst + 3 * stride), d[3]);
+  return;
+}
+
+static void highbd_dr_prediction_z3_8x8_idif_avx2(uint16_t *dst,
+                                                  ptrdiff_t stride,
+                                                  const uint16_t *left, int dy,
+                                                  int bd, int mrl_index) {
+  __m128i dstvec[8], d[8];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_8xN_internal_idif_avx2(8, dstvec, left, dy,
+                                                   mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(8, dstvec, left, dy,
+                                                         mrl_index, bd);
+  }
+  highbd_transpose8x8_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3],
+                           &dstvec[4], &dstvec[5], &dstvec[6], &dstvec[7],
+                           &d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6],
+                           &d[7]);
+  for (int i = 0; i < 8; i++) {
+    _mm_storeu_si128((__m128i *)(dst + i * stride), d[i]);
+  }
+}
+
+static void highbd_dr_prediction_z3_16x16_idif_avx2(uint16_t *dst,
+                                                    ptrdiff_t stride,
+                                                    const uint16_t *left,
+                                                    int dy, int bd,
+                                                    int mrl_index) {
+  __m256i dstvec[16], d[16];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_16xN_internal_idif_avx2(16, dstvec, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(16, dstvec, left, dy,
+                                                          mrl_index, bd);
+  }
+
+  highbd_transpose16x16_avx2(dstvec, d);
+
+  for (int i = 0; i < 16; i++) {
+    _mm256_storeu_si256((__m256i *)(dst + i * stride), d[i]);
+  }
+}
+
+static void highbd_dr_prediction_z3_32x32_idif_avx2(uint16_t *dst,
+                                                    ptrdiff_t stride,
+                                                    const uint16_t *left,
+                                                    int dy, int bd,
+                                                    int mrl_index) {
+  __m256i dstvec[64], d[16];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_32xN_internal_idif_avx2(32, dstvec, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(32, dstvec, left, dy,
+                                                          mrl_index, bd);
+  }
+  highbd_transpose16x16_avx2(dstvec, d);
+  for (int j = 0; j < 16; j++) {
+    _mm256_storeu_si256((__m256i *)(dst + j * stride), d[j]);
+  }
+  highbd_transpose16x16_avx2(dstvec + 16, d);
+  for (int j = 0; j < 16; j++) {
+    _mm256_storeu_si256((__m256i *)(dst + j * stride + 16), d[j]);
+  }
+  highbd_transpose16x16_avx2(dstvec + 32, d);
+  for (int j = 0; j < 16; j++) {
+    _mm256_storeu_si256((__m256i *)(dst + (j + 16) * stride), d[j]);
+  }
+  highbd_transpose16x16_avx2(dstvec + 48, d);
+  for (int j = 0; j < 16; j++) {
+    _mm256_storeu_si256((__m256i *)(dst + (j + 16) * stride + 16), d[j]);
+  }
+}
+
+static void highbd_dr_prediction_z3_64x64_idif_avx2(uint16_t *dst,
+                                                    ptrdiff_t stride,
+                                                    const uint16_t *left,
+                                                    int dy, int bd,
+                                                    int mrl_index) {
+  DECLARE_ALIGNED(16, uint16_t, dstT[64 * 64]);
+  if (bd < 10) {
+    highbd_dr_prediction_z1_64xN_internal_idif_avx2(64, dstT, 64, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2(64, dstT, 64, left,
+                                                          dy, mrl_index, bd);
+  }
+  highbd_transpose(dstT, 64, dst, stride, 64, 64);
+}
+
+static void highbd_dr_prediction_z3_4x8_idif_avx2(uint16_t *dst,
+                                                  ptrdiff_t stride,
+                                                  const uint16_t *left, int dy,
+                                                  int bd, int mrl_index) {
+  __m128i dstvec[4], d[8];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_8xN_internal_idif_avx2(4, dstvec, left, dy,
+                                                   mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(4, dstvec, left, dy,
+                                                         mrl_index, bd);
+  }
+
+  highbd_transpose4x8_8x4_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3],
+                               &d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6],
+                               &d[7]);
+  for (int i = 0; i < 8; i++) {
+    _mm_storel_epi64((__m128i *)(dst + i * stride), d[i]);
+  }
+}
+
+static void highbd_dr_prediction_z3_8x4_idif_avx2(uint16_t *dst,
+                                                  ptrdiff_t stride,
+                                                  const uint16_t *left, int dy,
+                                                  int bd, int mrl_index) {
+  __m128i dstvec[8], d[4];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_4xN_internal_idif_avx2(8, dstvec, left, dy,
+                                                   mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(8, dstvec, left, dy,
+                                                         mrl_index, bd);
+  }
+
+  highbd_transpose8x8_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3],
+                               &dstvec[4], &dstvec[5], &dstvec[6], &dstvec[7],
+                               &d[0], &d[1], &d[2], &d[3]);
+  _mm_storeu_si128((__m128i *)(dst + 0 * stride), d[0]);
+  _mm_storeu_si128((__m128i *)(dst + 1 * stride), d[1]);
+  _mm_storeu_si128((__m128i *)(dst + 2 * stride), d[2]);
+  _mm_storeu_si128((__m128i *)(dst + 3 * stride), d[3]);
+}
+
+static void highbd_dr_prediction_z3_8x16_idif_avx2(uint16_t *dst,
+                                                   ptrdiff_t stride,
+                                                   const uint16_t *left, int dy,
+                                                   int bd, int mrl_index) {
+  __m256i dstvec[8], d[8];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_16xN_internal_idif_avx2(8, dstvec, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(8, dstvec, left, dy,
+                                                          mrl_index, bd);
+  }
+  highbd_transpose8x16_16x8_avx2(dstvec, d);
+  for (int i = 0; i < 8; i++) {
+    _mm_storeu_si128((__m128i *)(dst + i * stride),
+                     _mm256_castsi256_si128(d[i]));
+  }
+  for (int i = 8; i < 16; i++) {
+    _mm_storeu_si128((__m128i *)(dst + i * stride),
+                     _mm256_extracti128_si256(d[i - 8], 1));
+  }
+}
+
+static void highbd_dr_prediction_z3_16x8_idif_avx2(uint16_t *dst,
+                                                   ptrdiff_t stride,
+                                                   const uint16_t *left, int dy,
+                                                   int bd, int mrl_index) {
+  __m128i dstvec[16], d[16];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_8xN_internal_idif_avx2(16, dstvec, left, dy,
+                                                   mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(16, dstvec, left, dy,
+                                                         mrl_index, bd);
+  }
+  for (int i = 0; i < 16; i += 8) {
+    highbd_transpose8x8_sse2(&dstvec[0 + i], &dstvec[1 + i], &dstvec[2 + i],
+                             &dstvec[3 + i], &dstvec[4 + i], &dstvec[5 + i],
+                             &dstvec[6 + i], &dstvec[7 + i], &d[0 + i],
+                             &d[1 + i], &d[2 + i], &d[3 + i], &d[4 + i],
+                             &d[5 + i], &d[6 + i], &d[7 + i]);
+  }
+  for (int i = 0; i < 8; i++) {
+    _mm_storeu_si128((__m128i *)(dst + i * stride), d[i]);
+    _mm_storeu_si128((__m128i *)(dst + i * stride + 8), d[i + 8]);
+  }
+}
+
+static void highbd_dr_prediction_z3_4x16_idif_avx2(uint16_t *dst,
+                                                   ptrdiff_t stride,
+                                                   const uint16_t *left, int dy,
+                                                   int bd, int mrl_index) {
+  __m256i dstvec[4], d[4], d1;
+  if (bd < 10) {
+    highbd_dr_prediction_z1_16xN_internal_idif_avx2(4, dstvec, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(4, dstvec, left, dy,
+                                                          mrl_index, bd);
+  }
+  highbd_transpose4x16_avx2(dstvec, d);
+  for (int i = 0; i < 4; i++) {
+    _mm_storel_epi64((__m128i *)(dst + i * stride),
+                     _mm256_castsi256_si128(d[i]));
+    d1 = _mm256_bsrli_epi128(d[i], 8);
+    _mm_storel_epi64((__m128i *)(dst + (i + 4) * stride),
+                     _mm256_castsi256_si128(d1));
+    _mm_storel_epi64((__m128i *)(dst + (i + 8) * stride),
+                     _mm256_extracti128_si256(d[i], 1));
+    _mm_storel_epi64((__m128i *)(dst + (i + 12) * stride),
+                     _mm256_extracti128_si256(d1, 1));
+  }
+}
+
+static void highbd_dr_prediction_z3_16x4_idif_avx2(uint16_t *dst,
+                                                   ptrdiff_t stride,
+                                                   const uint16_t *left, int dy,
+                                                   int bd, int mrl_index) {
+  __m128i dstvec[16], d[8];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_4xN_internal_idif_avx2(16, dstvec, left, dy,
+                                                   mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(16, dstvec, left, dy,
+                                                         mrl_index, bd);
+  }
+  highbd_transpose16x4_8x8_sse2(dstvec, d);
+
+  _mm_storeu_si128((__m128i *)(dst + 0 * stride), d[0]);
+  _mm_storeu_si128((__m128i *)(dst + 0 * stride + 8), d[1]);
+  _mm_storeu_si128((__m128i *)(dst + 1 * stride), d[2]);
+  _mm_storeu_si128((__m128i *)(dst + 1 * stride + 8), d[3]);
+  _mm_storeu_si128((__m128i *)(dst + 2 * stride), d[4]);
+  _mm_storeu_si128((__m128i *)(dst + 2 * stride + 8), d[5]);
+  _mm_storeu_si128((__m128i *)(dst + 3 * stride), d[6]);
+  _mm_storeu_si128((__m128i *)(dst + 3 * stride + 8), d[7]);
+}
+
+static void highbd_dr_prediction_z3_8x32_idif_avx2(uint16_t *dst,
+                                                   ptrdiff_t stride,
+                                                   const uint16_t *left, int dy,
+                                                   int bd, int mrl_index) {
+  __m256i dstvec[16], d[16];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_32xN_internal_idif_avx2(8, dstvec, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(8, dstvec, left, dy,
+                                                          mrl_index, bd);
+  }
+
+  for (int i = 0; i < 16; i += 8) {
+    highbd_transpose8x16_16x8_avx2(dstvec + i, d + i);
+  }
+
+  for (int i = 0; i < 8; i++) {
+    _mm_storeu_si128((__m128i *)(dst + i * stride),
+                     _mm256_castsi256_si128(d[i]));
+  }
+  for (int i = 0; i < 8; i++) {
+    _mm_storeu_si128((__m128i *)(dst + (i + 8) * stride),
+                     _mm256_extracti128_si256(d[i], 1));
+  }
+  for (int i = 8; i < 16; i++) {
+    _mm_storeu_si128((__m128i *)(dst + (i + 8) * stride),
+                     _mm256_castsi256_si128(d[i]));
+  }
+  for (int i = 8; i < 16; i++) {
+    _mm_storeu_si128((__m128i *)(dst + (i + 16) * stride),
+                     _mm256_extracti128_si256(d[i], 1));
+  }
+}
+
+static void highbd_dr_prediction_z3_32x8_idif_avx2(uint16_t *dst,
+                                                   ptrdiff_t stride,
+                                                   const uint16_t *left, int dy,
+                                                   int bd, int mrl_index) {
+  __m128i dstvec[32], d[32];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_8xN_internal_idif_avx2(32, dstvec, left, dy,
+                                                   mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(32, dstvec, left, dy,
+                                                         mrl_index, bd);
+  }
+
+  for (int i = 0; i < 32; i += 8) {
+    highbd_transpose8x8_sse2(&dstvec[0 + i], &dstvec[1 + i], &dstvec[2 + i],
+                             &dstvec[3 + i], &dstvec[4 + i], &dstvec[5 + i],
+                             &dstvec[6 + i], &dstvec[7 + i], &d[0 + i],
+                             &d[1 + i], &d[2 + i], &d[3 + i], &d[4 + i],
+                             &d[5 + i], &d[6 + i], &d[7 + i]);
+  }
+  for (int i = 0; i < 8; i++) {
+    _mm_storeu_si128((__m128i *)(dst + i * stride), d[i]);
+    _mm_storeu_si128((__m128i *)(dst + i * stride + 8), d[i + 8]);
+    _mm_storeu_si128((__m128i *)(dst + i * stride + 16), d[i + 16]);
+    _mm_storeu_si128((__m128i *)(dst + i * stride + 24), d[i + 24]);
+  }
+}
+
+static void highbd_dr_prediction_z3_16x32_idif_avx2(uint16_t *dst,
+                                                    ptrdiff_t stride,
+                                                    const uint16_t *left,
+                                                    int dy, int bd,
+                                                    int mrl_index) {
+  __m256i dstvec[32], d[32];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_32xN_internal_idif_avx2(16, dstvec, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(16, dstvec, left, dy,
+                                                          mrl_index, bd);
+  }
+  for (int i = 0; i < 32; i += 8) {
+    highbd_transpose8x16_16x8_avx2(dstvec + i, d + i);
+  }
+  // store
+  for (int j = 0; j < 32; j += 16) {
+    for (int i = 0; i < 8; i++) {
+      _mm_storeu_si128((__m128i *)(dst + (i + j) * stride),
+                       _mm256_castsi256_si128(d[(i + j)]));
+    }
+    for (int i = 0; i < 8; i++) {
+      _mm_storeu_si128((__m128i *)(dst + (i + j) * stride + 8),
+                       _mm256_castsi256_si128(d[(i + j) + 8]));
+    }
+    for (int i = 8; i < 16; i++) {
+      _mm256_storeu_si256(
+          (__m256i *)(dst + (i + j) * stride),
+          _mm256_inserti128_si256(
+              d[(i + j)], _mm256_extracti128_si256(d[(i + j) - 8], 1), 0));
+    }
+  }
+}
+
+static void highbd_dr_prediction_z3_32x16_idif_avx2(uint16_t *dst,
+                                                    ptrdiff_t stride,
+                                                    const uint16_t *left,
+                                                    int dy, int bd,
+                                                    int mrl_index) {
+  __m256i dstvec[32], d[16];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_16xN_internal_idif_avx2(32, dstvec, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(32, dstvec, left, dy,
+                                                          mrl_index, bd);
+  }
+  for (int i = 0; i < 32; i += 16) {
+    highbd_transpose16x16_avx2((dstvec + i), d);
+    for (int j = 0; j < 16; j++) {
+      _mm256_storeu_si256((__m256i *)(dst + j * stride + i), d[j]);
+    }
+  }
+}
+
+static void highbd_dr_prediction_z3_32x64_idif_avx2(uint16_t *dst,
+                                                    ptrdiff_t stride,
+                                                    const uint16_t *left,
+                                                    int dy, int bd,
+                                                    int mrl_index) {
+  uint16_t dstT[64 * 32];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_64xN_internal_idif_avx2(32, dstT, 64, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2(32, dstT, 64, left,
+                                                          dy, mrl_index, bd);
+  }
+  highbd_transpose(dstT, 64, dst, stride, 32, 64);
+}
+
+static void highbd_dr_prediction_z3_64x32_idif_avx2(uint16_t *dst,
+                                                    ptrdiff_t stride,
+                                                    const uint16_t *left,
+                                                    int dy, int bd,
+                                                    int mrl_index) {
+  DECLARE_ALIGNED(16, uint16_t, dstT[32 * 64]);
+  highbd_dr_prediction_z1_32xN_idif_avx2(64, dstT, 32, left, dy, bd, mrl_index);
+  highbd_transpose(dstT, 32, dst, stride, 64, 32);
+  return;
+}
+
+static void highbd_dr_prediction_z3_16x64_idif_avx2(uint16_t *dst,
+                                                    ptrdiff_t stride,
+                                                    const uint16_t *left,
+                                                    int dy, int bd,
+                                                    int mrl_index) {
+  DECLARE_ALIGNED(16, uint16_t, dstT[64 * 16]);
+  if (bd < 10) {
+    highbd_dr_prediction_z1_64xN_internal_idif_avx2(16, dstT, 64, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2(16, dstT, 64, left,
+                                                          dy, mrl_index, bd);
+  }
+  highbd_transpose(dstT, 64, dst, stride, 16, 64);
+}
+
+static void highbd_dr_prediction_z3_64x16_idif_avx2(uint16_t *dst,
+                                                    ptrdiff_t stride,
+                                                    const uint16_t *left,
+                                                    int dy, int bd,
+                                                    int mrl_index) {
+  __m256i dstvec[64], d[16];
+  if (bd < 10) {
+    highbd_dr_prediction_z1_16xN_internal_idif_avx2(64, dstvec, left, dy,
+                                                    mrl_index, bd);
+  } else {
+    highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(64, dstvec, left, dy,
+                                                          mrl_index, bd);
+  }
+  for (int i = 0; i < 64; i += 16) {
+    highbd_transpose16x16_avx2((dstvec + i), d);
+    for (int j = 0; j < 16; j++) {
+      _mm256_storeu_si256((__m256i *)(dst + j * stride + i), d[j]);
+    }
+  }
+}
+
+void av1_highbd_dr_prediction_z3_idif_avx2(uint16_t *dst, ptrdiff_t stride,
+                                           int bw, int bh,
+                                           const uint16_t *above,
+                                           const uint16_t *left, int dx, int dy,
+                                           int bd, int mrl_index) {
+  (void)above;
+  (void)dx;
+
+  assert(dx == 1);
+  assert(dy > 0);
+
+  if (bw == bh) {
+    switch (bw) {
+      case 4:
+        highbd_dr_prediction_z3_4x4_idif_avx2(dst, stride, left, dy, bd,
+                                              mrl_index);
+        break;
+      case 8:
+        highbd_dr_prediction_z3_8x8_idif_avx2(dst, stride, left, dy, bd,
+                                              mrl_index);
+        break;
+      case 16:
+        highbd_dr_prediction_z3_16x16_idif_avx2(dst, stride, left, dy, bd,
+                                                mrl_index);
+        break;
+      case 32:
+        highbd_dr_prediction_z3_32x32_idif_avx2(dst, stride, left, dy, bd,
+                                                mrl_index);
+        break;
+      case 64:
+        highbd_dr_prediction_z3_64x64_idif_avx2(dst, stride, left, dy, bd,
+                                                mrl_index);
+        break;
+    }
+  } else {
+    if (bw < bh) {
+      if (bw + bw == bh) {
+        switch (bw) {
+          case 4:
+            highbd_dr_prediction_z3_4x8_idif_avx2(dst, stride, left, dy, bd,
+                                                  mrl_index);
+            break;
+          case 8:
+            highbd_dr_prediction_z3_8x16_idif_avx2(dst, stride, left, dy, bd,
+                                                   mrl_index);
+            break;
+          case 16:
+            highbd_dr_prediction_z3_16x32_idif_avx2(dst, stride, left, dy, bd,
+                                                    mrl_index);
+            break;
+          case 32:
+            highbd_dr_prediction_z3_32x64_idif_avx2(dst, stride, left, dy, bd,
+                                                    mrl_index);
+            break;
+        }
+      } else {
+        switch (bw) {
+          case 4:
+            highbd_dr_prediction_z3_4x16_idif_avx2(dst, stride, left, dy, bd,
+                                                   mrl_index);
+            break;
+          case 8:
+            highbd_dr_prediction_z3_8x32_idif_avx2(dst, stride, left, dy, bd,
+                                                   mrl_index);
+            break;
+          case 16:
+            highbd_dr_prediction_z3_16x64_idif_avx2(dst, stride, left, dy, bd,
+                                                    mrl_index);
+            break;
+        }
+      }
+    } else {
+      if (bh + bh == bw) {
+        switch (bh) {
+          case 4:
+            highbd_dr_prediction_z3_8x4_idif_avx2(dst, stride, left, dy, bd,
+                                                  mrl_index);
+            break;
+          case 8:
+            highbd_dr_prediction_z3_16x8_idif_avx2(dst, stride, left, dy, bd,
+                                                   mrl_index);
+            break;
+          case 16:
+            highbd_dr_prediction_z3_32x16_idif_avx2(dst, stride, left, dy, bd,
+                                                    mrl_index);
+            break;
+          case 32:
+            highbd_dr_prediction_z3_64x32_idif_avx2(dst, stride, left, dy, bd,
+                                                    mrl_index);
+            break;
+        }
+      } else {
+        switch (bh) {
+          case 4:
+            highbd_dr_prediction_z3_16x4_idif_avx2(dst, stride, left, dy, bd,
+                                                   mrl_index);
+            break;
+          case 8:
+            highbd_dr_prediction_z3_32x8_idif_avx2(dst, stride, left, dy, bd,
+                                                   mrl_index);
+            break;
+          case 16:
+            highbd_dr_prediction_z3_64x16_idif_avx2(dst, stride, left, dy, bd,
+                                                    mrl_index);
+            break;
+        }
+      }
+    }
+  }
+  return;
+}
+#endif  // CONFIG_IDIF
diff --git a/aom_dsp/x86/sad_highbd_avx2.c b/aom_dsp/x86/sad_highbd_avx2.c
index 3fc91d8..bae6691 100644
--- a/aom_dsp/x86/sad_highbd_avx2.c
+++ b/aom_dsp/x86/sad_highbd_avx2.c
@@ -20,7 +20,8 @@
 #include "aom_ports/mem.h"
 
 // SAD
-static INLINE unsigned int get_sad_from_mm256_epi32(const __m256i *v) {
+static AOM_FORCE_INLINE unsigned int get_sad_from_mm256_epi32(
+    const __m256i *v) {
   // input 8 32-bit summation
   __m128i lo128, hi128;
   __m256i u = _mm256_srli_si256(*v, 8);
@@ -38,8 +39,8 @@
   return (unsigned int)_mm_cvtsi128_si32(lo128);
 }
 
-static INLINE void highbd_sad16x4_core_avx2(__m256i *s, __m256i *r,
-                                            __m256i *sad_acc) {
+static AOM_FORCE_INLINE void highbd_sad16x4_core_avx2(__m256i *s, __m256i *r,
+                                                      __m256i *sad_acc) {
   const __m256i zero = _mm256_setzero_si256();
   int i;
   for (i = 0; i < 4; i++) {
@@ -59,9 +60,10 @@
 }
 
 // If sec_ptr = 0, calculate regular SAD. Otherwise, calculate average SAD.
-static INLINE void sad16x4(const uint16_t *src_ptr, int src_stride,
-                           const uint16_t *ref_ptr, int ref_stride,
-                           const uint16_t *sec_ptr, __m256i *sad_acc) {
+static AOM_FORCE_INLINE void sad16x4(const uint16_t *src_ptr, int src_stride,
+                                     const uint16_t *ref_ptr, int ref_stride,
+                                     const uint16_t *sec_ptr,
+                                     __m256i *sad_acc) {
   __m256i s[4], r[4];
   s[0] = _mm256_loadu_si256((const __m256i *)src_ptr);
   s[1] = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride));
@@ -98,9 +100,10 @@
   return (unsigned int)get_sad_from_mm256_epi32(&sad);
 }
 
-static void sad32x4(const uint16_t *src_ptr, int src_stride,
-                    const uint16_t *ref_ptr, int ref_stride,
-                    const uint16_t *sec_ptr, __m256i *sad_acc) {
+static AOM_FORCE_INLINE void sad32x4(const uint16_t *src_ptr, int src_stride,
+                                     const uint16_t *ref_ptr, int ref_stride,
+                                     const uint16_t *sec_ptr,
+                                     __m256i *sad_acc) {
   __m256i s[4], r[4];
   int row_sections = 0;
 
@@ -149,9 +152,10 @@
   return get_sad_from_mm256_epi32(&sad);
 }
 
-static void sad64x2(const uint16_t *src_ptr, int src_stride,
-                    const uint16_t *ref_ptr, int ref_stride,
-                    const uint16_t *sec_ptr, __m256i *sad_acc) {
+static AOM_FORCE_INLINE void sad64x2(const uint16_t *src_ptr, int src_stride,
+                                     const uint16_t *ref_ptr, int ref_stride,
+                                     const uint16_t *sec_ptr,
+                                     __m256i *sad_acc) {
   __m256i s[4], r[4];
   int i;
   for (i = 0; i < 2; i++) {
@@ -589,8 +593,8 @@
 
 // SAD 4D
 // Combine 4 __m256i input vectors  v to uint32_t result[4]
-static INLINE void get_4d_sad_from_mm256_epi32(const __m256i *v,
-                                               uint32_t *res) {
+static AOM_FORCE_INLINE void get_4d_sad_from_mm256_epi32(const __m256i *v,
+                                                         uint32_t *res) {
   __m256i u0, u1, u2, u3;
   const __m256i mask = yy_set1_64_from_32i(UINT32_MAX);
   __m128i sad;
@@ -628,7 +632,7 @@
   _mm_storeu_si128((__m128i *)res, sad);
 }
 
-static void init_sad(__m256i *s) {
+static AOM_FORCE_INLINE void init_sad(__m256i *s) {
   s[0] = _mm256_setzero_si256();
   s[1] = _mm256_setzero_si256();
   s[2] = _mm256_setzero_si256();
diff --git a/apps/aomdec.c b/apps/aomdec.c
index b9f3981..f8de253 100644
--- a/apps/aomdec.c
+++ b/apps/aomdec.c
@@ -889,6 +889,11 @@
     got_data = 0;
     while ((img = aom_codec_get_frame(&decoder, &iter))) {
       ++frame_out;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      if (frame_in < frame_out) {  // No OBUs for show_existing_frame.
+        frame_in = frame_out;
+      }
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       got_data = 1;
 
       if (AOM_CODEC_CONTROL_TYPECHECKED(&decoder, AOMD_GET_FRAME_CORRUPTED,
diff --git a/apps/aomenc.c b/apps/aomenc.c
index a3f88e8..88a8da3 100644
--- a/apps/aomenc.c
+++ b/apps/aomenc.c
@@ -221,6 +221,9 @@
 #endif
                                         AV1E_SET_SUBGOP_CONFIG_STR,
                                         AV1E_SET_SUBGOP_CONFIG_PATH,
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+                                        AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION,
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
                                         0 };
 
 const arg_def_t *main_args[] = { &g_av1_codec_arg_defs.help,
@@ -443,16 +446,28 @@
 #if CONFIG_BAWP
   &g_av1_codec_arg_defs.enable_bawp,
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  &g_av1_codec_arg_defs.enable_cwp,
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  &g_av1_codec_arg_defs.enable_imp_msk_bld,
+#endif  // CONFIG_D071_IMP_MSK_BLD
   &g_av1_codec_arg_defs.enable_fsc,
 #if CONFIG_ORIP
   &g_av1_codec_arg_defs.enable_orip,
 #endif
+#if CONFIG_IDIF
+  &g_av1_codec_arg_defs.enable_idif,
+#endif  // CONFIG_IDIF
   &g_av1_codec_arg_defs.enable_ist,
 #if CONFIG_CROSS_CHROMA_TX
   &g_av1_codec_arg_defs.enable_cctx,
 #endif  // CONFIG_CROSS_CHROMA_TX
   &g_av1_codec_arg_defs.enable_ibp,
   &g_av1_codec_arg_defs.explicit_ref_frame_map,
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  &g_av1_codec_arg_defs.enable_frame_output_order,
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   &g_av1_codec_arg_defs.max_drl_refmvs,
 #if CONFIG_REF_MV_BANK
   &g_av1_codec_arg_defs.enable_refmvbank,
@@ -481,6 +496,9 @@
 #if CONFIG_JOINT_MVD
   &g_av1_codec_arg_defs.enable_joint_mvd,
 #endif  // CONFIG_JOINT_MVD
+#if CONFIG_REFINEMV
+  &g_av1_codec_arg_defs.enable_refinemv,
+#endif  // CONFIG_REFINEMV
 #if CONFIG_PAR_HIDING
   &g_av1_codec_arg_defs.enable_parity_hiding,
 #endif  // CONFIG_PAR_HIDING
@@ -632,11 +650,7 @@
 #if CONFIG_EXT_RECUR_PARTITIONS
   config->erp_pruning_level = 5;
   config->use_ml_erp_pruning = 0;
-#if CONFIG_H_PARTITION
   config->enable_ext_partitions = 1;
-#else
-  config->enable_ext_partitions = 0;
-#endif  // CONFIG_H_PARTITION
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
   config->enable_sdp = 1;
   config->enable_mrls = 1;
@@ -646,10 +660,19 @@
 #if CONFIG_BAWP
   config->enable_bawp = 1;
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  config->enable_cwp = 1;
+#endif  // CONFIG_BAWP
+#if CONFIG_D071_IMP_MSK_BLD
+  config->enable_imp_msk_bld = 1;
+#endif  // CONFIG_D071_IMP_MSK_BLD
   config->enable_fsc = 1;
 #if CONFIG_ORIP
   config->enable_orip = 1;
 #endif
+#if CONFIG_IDIF
+  config->enable_idif = 1;
+#endif  // CONFIG_IDIF
   config->enable_ist = 1;
 #if CONFIG_CROSS_CHROMA_TX
   config->enable_cctx = 1;
@@ -667,6 +690,9 @@
 #if CONFIG_JOINT_MVD
   config->enable_joint_mvd = 1;
 #endif
+#if CONFIG_REFINEMV
+  config->enable_refinemv = 1;
+#endif  // CONFIG_REFINEMV
   config->enable_flip_idtx = 1;
   config->enable_deblocking = 1;
   config->enable_cdef = 1;
@@ -710,6 +736,9 @@
   config->enable_opfl_refine = 1;
 #endif  // CONFIG_OPTFLOW_REFINEMENT
   config->explicit_ref_frame_map = 0;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  config->enable_frame_output_order = 1;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   config->enable_intra_edge_filter = 1;
   config->enable_tx64 = 1;
   config->enable_smooth_interintra = 1;
@@ -1513,6 +1542,9 @@
 #if CONFIG_ORIP
           ", ORIP(%d)"
 #endif  // CONFIG_CONFIG_ORIP
+#if CONFIG_IDIF
+          ", IDIF(%d)"
+#endif  // CONFIG_IDIF
           ", IBP(%d)"
           "\n",
           encoder_cfg->enable_intra_edge_filter,
@@ -1522,8 +1554,18 @@
           ,
           encoder_cfg->enable_orip
 #endif  //  CONFIG_ORIP
+#if CONFIG_IDIF
+          ,
+          encoder_cfg->enable_idif
+#endif  //  CONFIG_IDIF
           ,
           encoder_cfg->enable_ibp);
+#if CONFIG_ADAPTIVE_DS_FILTER
+  fprintf(
+      stdout,
+      "                               : Adaptive Down sample filter: (%d)\n",
+      encoder_cfg->enable_cfl_ds_filter);
+#endif  // CONFIG_ADAPTIVE_DS_FILTER
 
   fprintf(stdout,
           "Tool setting (Inter)           : InterIntra (%d), OBMC (%d), "
@@ -1549,6 +1591,15 @@
   fprintf(stdout, "                               : BAWP (%d)\n",
           encoder_cfg->enable_bawp);
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  fprintf(stdout, "                               : CWP (%d)\n",
+          encoder_cfg->enable_cwp);
+#endif  // CONFIG_CWP
+
+#if CONFIG_D071_IMP_MSK_BLD
+  fprintf(stdout, "                               : ImpMskBld (%d)\n",
+          encoder_cfg->enable_imp_msk_bld);
+#endif  // CONFIG_D071_IMP_MSK_BLD
 
   fprintf(stdout,
           "                               : GlobalMotion (%d), "
@@ -1570,16 +1621,14 @@
           "                               : Flexible MV precisions: (%d)\n",
           encoder_cfg->enable_flex_mvres);
 #endif  // CONFIG_FLEX_MVRES
-#if CONFIG_ADAPTIVE_DS_FILTER
-  fprintf(
-      stdout,
-      "                               : Adaptive Down sample filter: (%d)\n",
-      encoder_cfg->enable_cfl_ds_filter);
-#endif  // CONFIG_ADAPTIVE_DS_FILTER
 #if CONFIG_JOINT_MVD
   fprintf(stdout, "                               : Joint MVD coding: (%d)\n",
           encoder_cfg->enable_joint_mvd);
 #endif
+#if CONFIG_REFINEMV
+  fprintf(stdout, "                               : RefineMV mode: (%d)\n",
+          encoder_cfg->enable_refinemv);
+#endif  // CONFIG_REFINEMV
   fprintf(stdout,
           "                               : InterInterWedge (%d), "
           "InterIntraWedge (%d), RefFrameMv (%d)\n",
@@ -1926,7 +1975,11 @@
 
     switch (pkt->kind) {
       case AOM_CODEC_CX_FRAME_PKT:
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        stream->frames_out += pkt->data.frame.frame_count;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
         ++stream->frames_out;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
         update_rate_histogram(stream->rate_hist, cfg, pkt);
 #if CONFIG_WEBM_IO
         if (stream->config.write_webm) {
@@ -2358,7 +2411,11 @@
     }
 
     // Keep track of the total number of frames passed to the encoder.
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    unsigned int seen_frames = 0;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     int seen_frames = 0;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     // Does the encoder have queued data that needs retrieval?
     int got_data = 0;
     // Is there a frame available for processing?
@@ -2425,6 +2482,11 @@
         }
       }
       fflush(stdout);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      FOREACH_STREAM(stream, streams) {
+        if (stream->frames_out < seen_frames) got_data = 1;
+      }
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
 
     if (stream_cnt > 1) fprintf(stderr, "\n");
diff --git a/av1/arg_defs.c b/av1/arg_defs.c
index ca367cb..b34a95b 100644
--- a/av1/arg_defs.c
+++ b/av1/arg_defs.c
@@ -410,6 +410,16 @@
                          "Enable block adaptive weighted prediction (BAWP)"
                          "(0: false, 1: true (default))"),
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  .enable_cwp = ARG_DEF(NULL, "enable-cwp", 1,
+                        "Enable compound weighted prediction (CWP)"
+                        "(0: false, 1: true (default))"),
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  .enable_imp_msk_bld = ARG_DEF(NULL, "enable-imp-msk-bld", 1,
+                                "Enable implicit maksed blending"
+                                "(0:false), 1:true (default)"),
+#endif  // CONFIG_D071_IMP_MSK_BLD
   .enable_fsc = ARG_DEF(NULL, "enable-fsc", 1,
                         "Enable forward skip coding"
                         "(0: false, 1: true (default))"),
@@ -418,6 +428,11 @@
                          "Enable Offset Based refinement of intra prediction"
                          "(0: false, 1: true (default))"),
 #endif
+#if CONFIG_IDIF
+  .enable_idif = ARG_DEF(NULL, "enable-idif", 1,
+                         "Enable Intra Directional Interpolation Filter"
+                         "(0: false, 1: true (default))"),
+#endif  // CONFIG_IDIF
   .enable_ist = ARG_DEF(NULL, "enable-ist", 1,
                         "Enable intra secondary transform"
                         "(0: false, 1: true (default))"),
@@ -452,6 +467,12 @@
                               "Enable joint MVD coding"
                               "(0: false, 1: true (default))"),
 #endif  // CONFIG_JOINT_MVD
+
+#if CONFIG_REFINEMV
+  .enable_refinemv = ARG_DEF(NULL, "enable-refinemv", 1,
+                             "Enable RefineMV mode"
+                             "(0: false, 1: true (default))"),
+#endif  // CONFIG_REFINEMV
   .min_partition_size =
       ARG_DEF(NULL, "min-partition-size", 1,
               "Set min partition size "
@@ -511,7 +532,7 @@
                                      "(0: false, 1: true (default))"),
   .enable_global_motion = ARG_DEF(NULL, "enable-global-motion", 1,
                                   "Enable global motion "
-                                  "(0: false, 1: true (default))"),
+                                  "(0: false (default), 1: true)"),
   .enable_warped_motion = ARG_DEF(NULL, "enable-warped-motion", 1,
                                   "Enable local warped motion "
                                   "(0: false, 1: true (default))"),
@@ -679,6 +700,12 @@
       ARG_DEF(NULL, "explicit-ref-frame-map", 1,
               "Explicitly signal the reference frame mapping (0: off "
               "(default), 1: on)"),
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  .enable_frame_output_order =
+      ARG_DEF(NULL, "enable-frame-output-order", 1,
+              "Enable frame output order derivation based on order hint"
+              "(0: off, 1: on (default))"),
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   .target_seq_level_idx = ARG_DEF(
       NULL, "target-seq-level-idx", 1,
       "Target sequence level index. "
diff --git a/av1/arg_defs.h b/av1/arg_defs.h
index ecc6d6b..0d6389a 100644
--- a/av1/arg_defs.h
+++ b/av1/arg_defs.h
@@ -163,10 +163,19 @@
 #if CONFIG_BAWP
   arg_def_t enable_bawp;
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  arg_def_t enable_cwp;
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  arg_def_t enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
   arg_def_t enable_fsc;
 #if CONFIG_ORIP
   arg_def_t enable_orip;
 #endif
+#if CONFIG_IDIF
+  arg_def_t enable_idif;
+#endif  // CONFIG_IDIF
   arg_def_t enable_ist;
 #if CONFIG_CROSS_CHROMA_TX
   arg_def_t enable_cctx;
@@ -184,6 +193,9 @@
 #if CONFIG_JOINT_MVD
   arg_def_t enable_joint_mvd;
 #endif  // CONFIG_JOINT_MVD
+#if CONFIG_REFINEMV
+  arg_def_t enable_refinemv;
+#endif  // CONFIG_REFINEMV
   arg_def_t min_partition_size;
   arg_def_t max_partition_size;
   arg_def_t enable_chroma_deltaq;
@@ -261,6 +273,9 @@
   arg_def_t max_reference_frames;
   arg_def_t reduced_reference_set;
   arg_def_t explicit_ref_frame_map;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  arg_def_t enable_frame_output_order;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   arg_def_t target_seq_level_idx;
   arg_def_t set_min_cr;
   arg_def_t input_color_primaries;
diff --git a/av1/av1.cmake b/av1/av1.cmake
index 87796e5..409084b 100644
--- a/av1/av1.cmake
+++ b/av1/av1.cmake
@@ -283,7 +283,11 @@
   "${AOM_ROOT}/third_party/vector/vector.c"
   "${AOM_ROOT}/third_party/vector/vector.h"
   "${AOM_ROOT}/av1/encoder/dwt.c"
-  "${AOM_ROOT}/av1/encoder/dwt.h")
+  "${AOM_ROOT}/av1/encoder/dwt.h"
+  "${AOM_ROOT}/common/md5_utils.c"
+  "${AOM_ROOT}/common/md5_utils.h"
+  "${AOM_ROOT}/common/rawenc.c"
+  "${AOM_ROOT}/common/rawenc.h")
 
 if(CONFIG_TUNE_VMAF)
   list(APPEND AOM_AV1_ENCODER_SOURCES "${AOM_ROOT}/av1/encoder/tune_vmaf.c"
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c
index e9a969f..f949c19 100644
--- a/av1/av1_cx_iface.c
+++ b/av1/av1_cx_iface.c
@@ -137,10 +137,19 @@
 #if CONFIG_BAWP
   int enable_bawp;  // enable block adaptive weighted prediction
 #endif              // CONFIG_BAWP
-  int enable_fsc;   // enable forward skip coding
+#if CONFIG_CWP
+  int enable_cwp;  // enable compound weighted prediction
+#endif             // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  int enable_imp_msk_bld;
+#endif             // CONFIG_D071_IMP_MSK_BLD
+  int enable_fsc;  // enable forward skip coding
 #if CONFIG_ORIP
   int enable_orip;  // enable ORIP
 #endif              // CONFIG_ORIP
+#if CONFIG_IDIF
+  int enable_idif;  // enable IDIF
+#endif              // CONFIG_IDIF
   int enable_ist;   // enable intra secondary transform
 #if CONFIG_CROSS_CHROMA_TX
   int enable_cctx;  // enable cross-chroma component transform
@@ -159,8 +168,11 @@
 #endif                       // CONFIG_ADAPTIVE_DS_FILTER
 
 #if CONFIG_JOINT_MVD
-  int enable_joint_mvd;          // enable joint MVD coding
-#endif                           // CONFIG_ADAPTIVE_MVD
+  int enable_joint_mvd;  // enable joint MVD coding
+#endif                   // CONFIG_ADAPTIVE_MVD
+#if CONFIG_REFINEMV
+  int enable_refinemv;           // enable refineMV mode
+#endif                           // CONFIG_REFINEMV
   int min_partition_size;        // min partition size [4,8,16,32,64,128]
   int max_partition_size;        // max partition size [4,8,16,32,64,128]
   int enable_intra_edge_filter;  // enable intra-edge filter for sequence
@@ -169,18 +181,22 @@
   int enable_flip_idtx;          // enable flip and identity transform types
   int max_reference_frames;      // maximum number of references per frame
   int enable_reduced_reference_set;  // enable reduced set of references
-  int explicit_ref_frame_map;    // explicitly signal reference frame mapping
-  int enable_ref_frame_mvs;      // sequence level
-  int allow_ref_frame_mvs;       // frame level
-  int enable_masked_comp;        // enable masked compound for sequence
-  int enable_onesided_comp;      // enable one sided compound for sequence
-  int enable_interintra_comp;    // enable interintra compound for sequence
-  int enable_smooth_interintra;  // enable smooth interintra mode usage
-  int enable_diff_wtd_comp;      // enable diff-wtd compound usage
-  int enable_interinter_wedge;   // enable interinter-wedge compound usage
-  int enable_interintra_wedge;   // enable interintra-wedge compound usage
-  int enable_global_motion;      // enable global motion usage for sequence
-  int enable_warped_motion;      // enable local warped motion for sequence
+  int explicit_ref_frame_map;  // explicitly signal reference frame mapping
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  int enable_frame_output_order;  // enable frame output order derivation based
+                                  // on order hint value
+#endif                            // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  int enable_ref_frame_mvs;       // sequence level
+  int allow_ref_frame_mvs;        // frame level
+  int enable_masked_comp;         // enable masked compound for sequence
+  int enable_onesided_comp;       // enable one sided compound for sequence
+  int enable_interintra_comp;     // enable interintra compound for sequence
+  int enable_smooth_interintra;   // enable smooth interintra mode usage
+  int enable_diff_wtd_comp;       // enable diff-wtd compound usage
+  int enable_interinter_wedge;    // enable interinter-wedge compound usage
+  int enable_interintra_wedge;    // enable interintra-wedge compound usage
+  int enable_global_motion;       // enable global motion usage for sequence
+  int enable_warped_motion;       // enable local warped motion for sequence
 #if CONFIG_EXTENDED_WARP_PREDICTION
   int enable_warped_causal;  // enable spatial warp prediction for sequence
   int enable_warp_delta;     // enable explicit warp models for sequence
@@ -448,12 +464,8 @@
   1,  // disable ML based partition speed up features
   5,  // aggressiveness for erp pruning
   0,  // use ml model for erp pruning
-#if CONFIG_H_PARTITION
   1,  // enable extended partitions
 #else
-  0,        // enable extended partitions
-#endif
-#else
   0,                        // disable ML based partition speed up features
 #endif
   1,  // enable rectangular partitions
@@ -468,10 +480,19 @@
 #if CONFIG_BAWP
   1,    // enable block adaptive weighted prediction (BAWP)
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  1,    // enable compound weighted prediction (CWP)
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  1,    // eanble implicit maksed blending
+#endif  // CONFIG_D071_IMP_MSK_BLD
   1,    // enable forward skip coding
 #if CONFIG_ORIP
   1,    // enable ORIP
 #endif  // CONFIG_ORIP
+#if CONFIG_IDIF
+  1,    // enable IDIF
+#endif  // CONFIG_IDIF
   1,    // enable intra secondary transform
 #if CONFIG_CROSS_CHROMA_TX
   1,    // enable cross-chroma component transform
@@ -489,6 +510,9 @@
 #if CONFIG_JOINT_MVD
   1,    // enable joint mvd coding
 #endif  // CONFIG_JOINT_MVD
+#if CONFIG_REFINEMV
+  1,    // enable refineMV mode
+#endif  // CONFIG_REFINEMV
   4,    // min_partition_size
 #if CONFIG_BLOCK_256
   256,  // max_partition_size
@@ -503,17 +527,20 @@
   7,  // max_reference_frames
   0,  // enable_reduced_reference_set
   0,  // explicit_ref_frame_map
-  1,  // enable_ref_frame_mvs sequence level
-  1,  // allow ref_frame_mvs frame level
-  1,  // enable masked compound at sequence level
-  1,  // enable one sided compound at sequence level
-  1,  // enable interintra compound at sequence level
-  1,  // enable smooth interintra mode
-  1,  // enable difference-weighted compound
-  1,  // enable interinter wedge compound
-  1,  // enable interintra wedge compound
-  1,  // enable_global_motion usage
-  1,  // enable_warped_motion at sequence level
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  1,    // enable frame output order derivation based on order hint value
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  1,    // enable_ref_frame_mvs sequence level
+  1,    // allow ref_frame_mvs frame level
+  1,    // enable masked compound at sequence level
+  1,    // enable one sided compound at sequence level
+  1,    // enable interintra compound at sequence level
+  1,    // enable smooth interintra mode
+  1,    // enable difference-weighted compound
+  1,    // enable interinter wedge compound
+  1,    // enable interintra wedge compound
+  0,    // enable_global_motion usage
+  1,    // enable_warped_motion at sequence level
 #if CONFIG_EXTENDED_WARP_PREDICTION
   1,  // enable_warped_causal at sequence level
   1,  // enable_warp_delta at sequence level
@@ -831,6 +858,9 @@
   RANGE_CHECK(extra_cfg, max_reference_frames, 3, 7);
   RANGE_CHECK(extra_cfg, enable_reduced_reference_set, 0, 1);
   RANGE_CHECK(extra_cfg, explicit_ref_frame_map, 0, 1);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  RANGE_CHECK(extra_cfg, enable_frame_output_order, 0, 1);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   RANGE_CHECK_HI(extra_cfg, chroma_subsampling_x, 1);
   RANGE_CHECK_HI(extra_cfg, chroma_subsampling_y, 1);
 
@@ -974,10 +1004,19 @@
 #if CONFIG_BAWP
   cfg->enable_bawp = extra_cfg->enable_bawp;
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  cfg->enable_cwp = extra_cfg->enable_cwp;
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  cfg->enable_imp_msk_bld = extra_cfg->enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
   cfg->enable_fsc = extra_cfg->enable_fsc;
 #if CONFIG_ORIP
   cfg->enable_orip = extra_cfg->enable_orip;
 #endif
+#if CONFIG_IDIF
+  cfg->enable_idif = extra_cfg->enable_idif;
+#endif  // CONFIG_IDIF
   cfg->enable_ist = extra_cfg->enable_ist;
 #if CONFIG_CROSS_CHROMA_TX
   cfg->enable_cctx = extra_cfg->enable_cctx;
@@ -997,6 +1036,9 @@
 #if CONFIG_JOINT_MVD
   cfg->enable_joint_mvd = extra_cfg->enable_joint_mvd;
 #endif  // CONFIG_JOINT_MVD
+#if CONFIG_REFINEMV
+  cfg->enable_refinemv = extra_cfg->enable_refinemv;
+#endif  // CONFIG_REFINEMV
   cfg->max_partition_size = extra_cfg->max_partition_size;
   cfg->min_partition_size = extra_cfg->min_partition_size;
   cfg->enable_intra_edge_filter = extra_cfg->enable_intra_edge_filter;
@@ -1029,6 +1071,9 @@
   cfg->enable_onesided_comp = extra_cfg->enable_onesided_comp;
   cfg->enable_reduced_reference_set = extra_cfg->enable_reduced_reference_set;
   cfg->explicit_ref_frame_map = extra_cfg->explicit_ref_frame_map;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  cfg->enable_frame_output_order = extra_cfg->enable_frame_output_order;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   cfg->reduced_tx_type_set = extra_cfg->reduced_tx_type_set;
   cfg->max_drl_refmvs = extra_cfg->max_drl_refmvs;
 #if CONFIG_REF_MV_BANK
@@ -1088,10 +1133,19 @@
 #if CONFIG_BAWP
   extra_cfg->enable_bawp = cfg->enable_bawp;
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  extra_cfg->enable_cwp = cfg->enable_cwp;
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  extra_cfg->enable_imp_msk_bld = cfg->enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
   extra_cfg->enable_fsc = cfg->enable_fsc;
 #if CONFIG_ORIP
   extra_cfg->enable_orip = cfg->enable_orip;
 #endif
+#if CONFIG_IDIF
+  extra_cfg->enable_idif = cfg->enable_idif;
+#endif  // CONFIG_IDIF
   extra_cfg->enable_ist = cfg->enable_ist;
 #if CONFIG_CROSS_CHROMA_TX
   extra_cfg->enable_cctx = cfg->enable_cctx;
@@ -1111,6 +1165,10 @@
 #if CONFIG_JOINT_MVD
   extra_cfg->enable_joint_mvd = cfg->enable_joint_mvd;
 #endif  // CONFIG_JOINT_MVD
+
+#if CONFIG_REFINEMV
+  extra_cfg->enable_refinemv = cfg->enable_refinemv;
+#endif  // CONFIG_REFINEMV
   extra_cfg->max_partition_size = cfg->max_partition_size;
   extra_cfg->min_partition_size = cfg->min_partition_size;
   extra_cfg->enable_intra_edge_filter = cfg->enable_intra_edge_filter;
@@ -1142,6 +1200,9 @@
   extra_cfg->enable_onesided_comp = cfg->enable_onesided_comp;
   extra_cfg->enable_reduced_reference_set = cfg->enable_reduced_reference_set;
   extra_cfg->explicit_ref_frame_map = cfg->explicit_ref_frame_map;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  extra_cfg->enable_frame_output_order = cfg->enable_frame_output_order;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   extra_cfg->reduced_tx_type_set = cfg->reduced_tx_type_set;
   extra_cfg->max_drl_refmvs = cfg->max_drl_refmvs;
 #if CONFIG_REF_MV_BANK
@@ -1390,6 +1451,9 @@
 #if CONFIG_JOINT_MVD
   tool_cfg->enable_joint_mvd = extra_cfg->enable_joint_mvd;
 #endif  // CONFIG_JOINT_MVD
+#if CONFIG_REFINEMV
+  tool_cfg->enable_refinemv = extra_cfg->enable_refinemv;
+#endif  // CONFIG_REFINEMV
 #if CONFIG_TIP
   tool_cfg->enable_tip = extra_cfg->enable_tip;
   if (tool_cfg->enable_tip) {
@@ -1405,6 +1469,12 @@
 #if CONFIG_BAWP
   tool_cfg->enable_bawp = extra_cfg->enable_bawp;
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  tool_cfg->enable_cwp = extra_cfg->enable_cwp;
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  tool_cfg->enable_imp_msk_bld = extra_cfg->enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
   tool_cfg->force_video_mode = extra_cfg->force_video_mode;
   tool_cfg->enable_palette = extra_cfg->enable_palette;
   // FIXME(debargha): Should this be:
@@ -1601,6 +1671,16 @@
       extra_cfg->enable_reduced_reference_set;
   oxcf->ref_frm_cfg.enable_onesided_comp = extra_cfg->enable_onesided_comp;
   oxcf->ref_frm_cfg.explicit_ref_frame_map = extra_cfg->explicit_ref_frame_map;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // Disable the implicit derivation of frame output order
+  // when order_hint is not available, S-frame is used or error resilience mode
+  // is used.
+  oxcf->ref_frm_cfg.enable_frame_output_order =
+      (!tool_cfg->enable_order_hint || kf_cfg->enable_sframe ||
+       tool_cfg->error_resilient_mode)
+          ? 0
+          : extra_cfg->enable_frame_output_order;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 
   oxcf->row_mt = extra_cfg->row_mt;
 
@@ -1662,6 +1742,9 @@
 #if CONFIG_ORIP
   intra_mode_cfg->enable_orip = extra_cfg->enable_orip;
 #endif
+#if CONFIG_IDIF
+  intra_mode_cfg->enable_idif = extra_cfg->enable_idif;
+#endif  // CONFIG_IDIF
   intra_mode_cfg->enable_ibp = extra_cfg->enable_ibp;
 
   // Set transform size/type configuration.
@@ -1673,7 +1756,7 @@
   txfm_cfg->use_intra_default_tx_only = extra_cfg->use_intra_default_tx_only;
   txfm_cfg->disable_ml_transform_speed_features =
       extra_cfg->disable_ml_transform_speed_features;
-  txfm_cfg->enable_ist = extra_cfg->enable_ist;
+  txfm_cfg->enable_ist = extra_cfg->enable_ist && !extra_cfg->lossless;
 #if CONFIG_CROSS_CHROMA_TX
   txfm_cfg->enable_cctx =
       tool_cfg->enable_monochrome ? 0 : extra_cfg->enable_cctx;
@@ -2666,6 +2749,15 @@
   return AOM_CODEC_OK;
 }
 
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+static aom_codec_err_t ctrl_set_frame_output_order(aom_codec_alg_priv_t *ctx,
+                                                   va_list args) {
+  struct av1_extracfg extra_cfg = ctx->extra_cfg;
+  extra_cfg.enable_frame_output_order =
+      CAST(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, args);
+  return update_extra_cfg(ctx, &extra_cfg);
+}
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 static aom_codec_err_t create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer,
                                            STATS_BUFFER_CTX *stats_buf_context,
                                            int num_lap_buffers) {
@@ -3205,7 +3297,20 @@
 
         index_size = MAG_SIZE * (ctx->pending_frame_count - 1) + 2;
 
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        if (cpi->oxcf.ref_frm_cfg.enable_frame_output_order) {
+          if (cpi->common.current_frame.frame_type == KEY_FRAME ||
+              !cpi->common.show_existing_frame) {
+            is_frame_visible = cpi->common.show_frame;
+          } else {
+            is_frame_visible = 0;
+          }
+        } else {
+          is_frame_visible = cpi->common.show_frame;
+        }
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
         is_frame_visible = cpi->common.show_frame;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 
         has_no_show_keyframe |=
             (!is_frame_visible &&
@@ -3215,6 +3320,13 @@
           report_stats(cpi, frame_size, cx_time);
         }
       }
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      if (cpi->oxcf.ref_frm_cfg.enable_frame_output_order &&
+          cpi->common.show_frame && cpi->common.show_existing_frame) {
+        cpi->frames_left = AOMMAX(0, cpi->frames_left - 1);
+        break;
+      }
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
     if (is_frame_visible) {
       // Add the frame packet to the list of returned packets.
@@ -3244,6 +3356,9 @@
       pkt.data.frame.sz = ctx->pending_cx_data_sz;
       pkt.data.frame.partition_id = -1;
       pkt.data.frame.vis_frame_size = frame_size;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      pkt.data.frame.frame_count = ctx->pending_frame_count;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 
       pkt.data.frame.pts =
           ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) +
@@ -3818,6 +3933,16 @@
                               err_string)) {
     extra_cfg.enable_bawp = arg_parse_int_helper(&arg, err_string);
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_cwp, argv,
+                              err_string)) {
+    extra_cfg.enable_cwp = arg_parse_int_helper(&arg, err_string);
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_imp_msk_bld,
+                              argv, err_string)) {
+    extra_cfg.enable_imp_msk_bld = arg_parse_uint_helper(&arg, err_string);
+#endif  // CONFIG_D071_IMP_MSK_BLD
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_fsc, argv,
                               err_string)) {
     extra_cfg.enable_fsc = arg_parse_int_helper(&arg, err_string);
@@ -3826,6 +3951,11 @@
                               err_string)) {
     extra_cfg.enable_orip = arg_parse_int_helper(&arg, err_string);
 #endif
+#if CONFIG_IDIF
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_idif, argv,
+                              err_string)) {
+    extra_cfg.enable_idif = arg_parse_int_helper(&arg, err_string);
+#endif  // CONFIG_IDIF
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_ist, argv,
                               err_string)) {
     extra_cfg.enable_ist = arg_parse_int_helper(&arg, err_string);
@@ -3857,6 +3987,12 @@
                               argv, err_string)) {
     extra_cfg.enable_joint_mvd = arg_parse_int_helper(&arg, err_string);
 #endif  // CONFIG_JOINT_MVD
+
+#if CONFIG_REFINEMV
+  } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_refinemv, argv,
+                              err_string)) {
+    extra_cfg.enable_refinemv = arg_parse_int_helper(&arg, err_string);
+#endif  // CONFIG_REFINEMV
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.min_partition_size,
                               argv, err_string)) {
     extra_cfg.min_partition_size = arg_parse_int_helper(&arg, err_string);
@@ -3888,6 +4024,13 @@
                               &g_av1_codec_arg_defs.explicit_ref_frame_map,
                               argv, err_string)) {
     extra_cfg.explicit_ref_frame_map = arg_parse_int_helper(&arg, err_string);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  } else if (arg_match_helper(&arg,
+                              &g_av1_codec_arg_defs.enable_frame_output_order,
+                              argv, err_string)) {
+    extra_cfg.enable_frame_output_order =
+        arg_parse_int_helper(&arg, err_string);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_ref_frame_mvs,
                               argv, err_string)) {
     extra_cfg.enable_ref_frame_mvs = arg_parse_int_helper(&arg, err_string);
@@ -4198,6 +4341,9 @@
   { AV1E_SET_VBR_CORPUS_COMPLEXITY_LAP, ctrl_set_vbr_corpus_complexity_lap },
   { AV1E_ENABLE_SB_MULTIPASS_UNIT_TEST, ctrl_enable_sb_multipass_unit_test },
   { AV1E_ENABLE_SUBGOP_STATS, ctrl_enable_subgop_stats },
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  { AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, ctrl_set_frame_output_order },
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 
   // Getters
   { AOME_GET_LAST_QUANTIZER, ctrl_get_quantizer },
@@ -4288,14 +4434,10 @@
         1,
         5,  // aggressiveness for erp pruning
         0,  // use ml model for erp pruning
-#if CONFIG_H_PARTITION
         1,  // enable extended partitions
-#else
-        0,  // enable extended partitions
-#endif
-#else   // CONFIG_EXT_RECUR_PARTITIONS
+#else       // CONFIG_EXT_RECUR_PARTITIONS
         0,
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
+#endif      // CONFIG_EXT_RECUR_PARTITIONS
         0, 1,   1,
 #if CONFIG_TIP
         1,
@@ -4303,10 +4445,19 @@
 #if CONFIG_BAWP
         1,
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+        1,
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+        1,
+#endif  // CONFIG_D071_IMP_MSK_BLD
         1,
 #if CONFIG_ORIP
         1,
 #endif
+#if CONFIG_IDIF
+        1,
+#endif      // CONFIG_IDIF
         1,  // IST
 #if CONFIG_CROSS_CHROMA_TX
         1,
@@ -4324,6 +4475,9 @@
 #if CONFIG_JOINT_MVD
         1,
 #endif  // CONFIG_JOINT_MVD
+#if CONFIG_REFINEMV
+        1,
+#endif  // CONFIG_REFINEMV
         1, 1,   1,   1, 1, 1,
 #if CONFIG_PC_WIENER
         1,
@@ -4349,7 +4503,11 @@
 #if CONFIG_OPTFLOW_REFINEMENT
         1,
 #endif  // CONFIG_OPTFLOW_REFINEMENT
-        1, 1,   1,   1, 1, 1, 3, 1, 1, 0, 0, 0,
+        1, 1,   1,   1, 1, 1, 3, 1, 1, 0,
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        1,
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        0, 0,
 #if CONFIG_REF_MV_BANK
         1,
 #endif  // CONFIG_REF_MV_BANK
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c
index 0609e84..c5f1323 100644
--- a/av1/av1_dx_iface.c
+++ b/av1/av1_dx_iface.c
@@ -76,6 +76,7 @@
   unsigned int enable_subgop_stats;
 #if CONFIG_INSPECTION
   aom_inspect_cb inspect_cb;
+  aom_inspect_cb inspect_sb_cb;
   void *inspect_ctx;
 #endif
 };
@@ -566,6 +567,7 @@
   AV1Decoder *const pbi = frame_worker_data->pbi;
   AV1_COMMON *const cm = &pbi->common;
   frame_worker_data->pbi->inspect_cb = ctx->inspect_cb;
+  frame_worker_data->pbi->inspect_sb_cb = ctx->inspect_sb_cb;
   frame_worker_data->pbi->inspect_ctx = ctx->inspect_ctx;
   res = av1_receive_compressed_data(frame_worker_data->pbi, data_sz, &data);
   check_resync(ctx, frame_worker_data->pbi);
@@ -610,9 +612,21 @@
     struct AV1Decoder *pbi = frame_worker_data->pbi;
     if (ctx->enable_subgop_stats)
       memset(&pbi->subgop_stats, 0, sizeof(pbi->subgop_stats));
-    for (size_t j = 0; j < pbi->num_output_frames; j++) {
-      decrease_ref_count(pbi->output_frames[j], pool);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    // When multiple layers are enabled, use the mechanism of
+    // show_existing_frame
+    if (pbi->common.seq_params.order_hint_info.enable_order_hint &&
+        pbi->common.seq_params.enable_frame_output_order) {
+      if (!pbi->common.show_existing_frame)
+        decrease_ref_count(pbi->output_frames[0], pool);
+    } else {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      for (size_t j = 0; j < pbi->num_output_frames; j++) {
+        decrease_ref_count(pbi->output_frames[j], pool);
+      }
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     pbi->num_output_frames = 0;
     unlock_buffer_pool(pool);
     for (size_t j = 0; j < ctx->num_grain_image_frame_buffers; j++) {
@@ -1590,6 +1604,7 @@
 #else
   aom_inspect_init *init = va_arg(args, aom_inspect_init *);
   ctx->inspect_cb = init->inspect_cb;
+  ctx->inspect_sb_cb = init->inspect_sb_cb;
   ctx->inspect_ctx = init->inspect_ctx;
   return AOM_CODEC_OK;
 #endif
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c
index 1716e87..4256369 100644
--- a/av1/common/alloccommon.c
+++ b/av1/common/alloccommon.c
@@ -129,6 +129,9 @@
   }
 
   aom_free_frame_buffer(&cm->rst_frame);
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  aom_free_frame_buffer(&cm->pre_rst_frame);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 }
 
 void av1_free_above_context_buffers(CommonContexts *above_contexts) {
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h
index 5cd51ec..978e300 100644
--- a/av1/common/av1_common_int.h
+++ b/av1/common/av1_common_int.h
@@ -257,6 +257,12 @@
   // Frame's level within the hierarchical structure
   unsigned int pyramid_level;
 
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  // How many ref frames did this frame use?
+  // This is set to 0 for intra frames
+  int num_ref_frames;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
   MV_REF *mvs;
   uint8_t *seg_map;
   struct segmentation seg;
@@ -410,7 +416,11 @@
   int mib_size;                // Size of the superblock in units of MI blocks
   int mib_size_log2;           // Log 2 of above.
   int explicit_ref_frame_map;  // Explicitly signal the reference frame mapping
-  int max_reference_frames;    // Number of reference frames allowed
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  int enable_frame_output_order;  // Enable frame output order derivation based
+                                  // on order hint value
+#endif                            // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  int max_reference_frames;       // Number of reference frames allowed
 #if CONFIG_ALLOW_SAME_REF_COMPOUND
   int num_same_ref_compound;  // Number of the allowed same reference frames for
                               // the compound mode
@@ -435,13 +445,23 @@
 #if CONFIG_BAWP
   uint8_t enable_bawp;  // enables/disables block adaptive weighted prediction
 #endif                  // CONFIG_BAWP
-  uint8_t enable_fsc;   // enables/disables forward skip coding
+#if CONFIG_CWP
+  uint8_t enable_cwp;  // enables/disables compound weighted prediction
+#endif                 // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  uint8_t enable_imp_msk_bld;        // enable implicit maksed blending
+#endif                               // CONFIG_D071_IMP_MSK_BLD
+  uint8_t enable_fsc;                // enables/disables forward skip coding
   uint8_t enable_filter_intra;       // enables/disables filterintra
   uint8_t enable_intra_edge_filter;  // enables/disables edge upsampling
 
 #if CONFIG_ORIP
   uint8_t enable_orip;  // To turn on/off sub-block based ORIP
 #endif
+#if CONFIG_IDIF
+  uint8_t
+      enable_idif;  // enables/disables Intra Directional Interpolation Filter
+#endif              // CONFIG_IDIF
   uint8_t enable_ist;  // enables/disables intra secondary transform
 #if CONFIG_CROSS_CHROMA_TX
   uint8_t enable_cctx;  // enables/disables cross-chroma component transform
@@ -462,6 +482,10 @@
   uint8_t enable_joint_mvd;  // enables/disables joint MVD coding
 #endif                       // CONFIG_JOINT_MVD
 
+#if CONFIG_REFINEMV
+  uint8_t enable_refinemv;  // enables/disables refineMV mode
+#endif                      // CONFIG_REFINEMV
+
 #if CONFIG_EXTENDED_WARP_PREDICTION
   int seq_enabled_motion_modes;  // Bit mask of enabled motion modes for
                                  // sequence
@@ -509,6 +533,9 @@
 #if CONFIG_EXT_RECUR_PARTITIONS
   uint8_t enable_ext_partitions;  // enable extended partitions
 #endif                            // CONFIG_EXT_RECUR_PARTITIONS
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  bool enable_global_motion;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
   BITSTREAM_PROFILE profile;
 
   // Color config.
@@ -604,12 +631,7 @@
    */
   bool use_pb_mv_precision;
 #endif  // CONFIG_FLEX_MVRES
-#if DS_FRAME_LEVEL
-  /*!
-   * Dowsample filter type
-   */
-  int ds_filter_type;
-#endif  // DS_FRAME_LEVEl
+
   /*!
    * If true, palette tool and/or intra block copy tools may be used.
    */
@@ -624,6 +646,9 @@
 #if !CONFIG_EXTENDED_WARP_PREDICTION
   bool allow_warped_motion; /*!< If true, frame may use warped motion mode. */
 #endif
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  bool allow_warpmv_mode; /*!< If true, frame may use WARPMV mode. */
+#endif                    // CONFIG_CWG_D067_IMPROVED_WARP
   /*!
    * If true, using previous frames' motion vectors for prediction is allowed.
    */
@@ -715,6 +740,18 @@
    */
   bool enable_bawp;
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  /*!
+   * Enables/disables compound weighted prediction
+   */
+  bool enable_cwp;
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  /*!
+   * Enables/disables implicit masked blending.
+   */
+  bool enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
 #if CONFIG_EXTENDED_WARP_PREDICTION
   /*!
    * Bit mask of enabled motion modes for this frame
@@ -1537,6 +1574,11 @@
   int32_t *rst_tmpbuf; /*!< Scratch buffer for self-guided restoration */
   RestorationLineBuffers *rlbs; /*!< Line buffers needed by loop restoration */
   YV12_BUFFER_CONFIG rst_frame; /*!< Stores the output of loop restoration */
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  YV12_BUFFER_CONFIG pre_rst_frame; /*!< Stores the reconstructed frame before
+                                       loop restoration, only used by encoder,
+                                       to be moved to encoder buffer */
+#endif                              // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   /**@}*/
 
   /*!
@@ -1561,6 +1603,18 @@
    */
   DeltaQInfo delta_q_info;
 
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  /*!
+   * Base model used for delta-coding global motion parameters
+   */
+  WarpedMotionParams base_global_motion_model;
+
+  /*!
+   * Temporal length of `base_global_motion_model`
+   */
+  int base_global_motion_distance;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
   /*!
    * Global motion parameters for each reference frame.
    */
@@ -1632,12 +1686,12 @@
    * TODO(jingning): This can be combined with sign_bias later.
    */
   int8_t ref_frame_side[INTER_REFS_PER_FRAME];
-#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
   /*!
    * relative distance between reference 'k' and current frame.
    */
-  int8_t ref_frame_relative_dist[REF_FRAMES];
-#endif  // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+  int ref_frame_relative_dist[REF_FRAMES];
+#endif  // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
   /*!
    * Number of temporal layers: may be > 1 for SVC (scalable vector coding).
    */
@@ -1733,6 +1787,11 @@
    * Log2 of the size of the superblock in units of MI.
    */
   int mib_size_log2;
+
+#if CONFIG_INSPECTION
+  YV12_BUFFER_CONFIG predicted_pixels;
+  YV12_BUFFER_CONFIG prefiltered_pixels;
+#endif  // CONFIG_INSPECTION
 } AV1_COMMON;
 
 /*!\cond */
@@ -2136,6 +2195,10 @@
   xd->mi_col = mi_col;
   xd->mi[0]->mi_row_start = mi_row;
   xd->mi[0]->mi_col_start = mi_col;
+#if CONFIG_EXT_RECUR_PARTITIONS
+  xd->mi[0]->chroma_mi_row_start = mi_row;
+  xd->mi[0]->chroma_mi_col_start = mi_col;
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
 
 #if CONFIG_EXTENDED_WARP_PREDICTION
   xd->tile.mi_col_start = tile->mi_col_start;
@@ -2225,7 +2288,7 @@
   if (xd->width > xd->height)
     if (!(mi_row & (xd->width - 1))) xd->is_first_horizontal_rect = 1;
 
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   xd->is_last_horizontal_rect = 0;
   if (xd->width > xd->height) {
     if (!((mi_row + xd->height) & (xd->width - 1))) {
@@ -2236,10 +2299,29 @@
   xd->is_first_vertical_rect = 0;
   if (xd->width < xd->height)
     if (!(mi_col & (xd->height - 1))) xd->is_first_vertical_rect = 1;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
 }
 
+#if CONFIG_ATC_DCTX_ALIGNED
+// Return the inter TX context based on last position value.
+static INLINE int get_lp2tx_ctx(TX_SIZE tx_size, int bwl, int eob) {
+  assert(eob != 0);
+  const int lim = 2;
+  const int eoby = (eob - 1) >> bwl;
+  const int eobx = (eob - 1) - (eoby << bwl);
+  const int diag = eobx + eoby;
+  const int max_diag = tx_size_wide[tx_size] + tx_size_high[tx_size] - 2;
+  int ctx_idx = 0;
+  if (diag < lim) {
+    ctx_idx = 1;
+  } else if (diag > (max_diag - lim)) {
+    ctx_idx = 2;
+  }
+  return ctx_idx;
+}
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 static INLINE int get_fsc_mode_ctx(const MACROBLOCKD *xd, const int is_key) {
   int ctx = 0;
   if (is_key) {
@@ -2281,6 +2363,19 @@
 }
 #endif  // !CONFIG_AIMC
 
+#if CONFIG_EXT_DIR
+static INLINE int get_mrl_index_ctx(const MB_MODE_INFO *neighbor0,
+                                    const MB_MODE_INFO *neighbor1) {
+  int ctx0 = neighbor0 && !is_inter_block(neighbor0, SHARED_PART) &&
+             !is_intrabc_block(neighbor0, SHARED_PART) &&
+             neighbor0->mrl_index != 0;
+  int ctx1 = neighbor1 && !is_inter_block(neighbor1, SHARED_PART) &&
+             !is_intrabc_block(neighbor1, SHARED_PART) &&
+             neighbor1->mrl_index != 0;
+  return ctx0 + ctx1;
+}
+#endif  // CONFIG_EXT_DIR
+
 static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row,
                                             int mi_col, BLOCK_SIZE subsize,
                                             BLOCK_SIZE bsize) {
@@ -2708,8 +2803,9 @@
   if (subsize < BLOCK_SIZES_ALL) {
     CHROMA_REF_INFO tmp_chroma_ref_info = { 1,      0,       mi_row,
                                             mi_col, subsize, subsize };
-    set_chroma_ref_info(mi_row, mi_col, 0, subsize, &tmp_chroma_ref_info,
-                        parent_chroma_ref_info, bsize, partition, ss_x, ss_y);
+    set_chroma_ref_info(tree_type, mi_row, mi_col, 0, subsize,
+                        &tmp_chroma_ref_info, parent_chroma_ref_info, bsize,
+                        partition, ss_x, ss_y);
     is_valid = get_plane_block_size(tmp_chroma_ref_info.bsize_base, ss_x,
                                     ss_y) != BLOCK_INVALID;
   }
@@ -2806,6 +2902,38 @@
   return is_implied;
 }
 
+static AOM_INLINE PARTITION_TYPE av1_get_normative_forced_partition_type(
+    const CommonModeInfoParams *const mi_params, TREE_TYPE tree_type, int ss_x,
+    int ss_y, int mi_row, int mi_col, BLOCK_SIZE bsize,
+    const PARTITION_TREE *ptree_luma, const CHROMA_REF_INFO *chroma_ref_info) {
+  // Return NONE if this block size is not splittable
+  if (!is_partition_point(bsize)) {
+    return PARTITION_NONE;
+  }
+
+  // Special case where 8x8 chroma blocks are not splittable.
+  // TODO(chiyotsai@google.com): This should be moved into `is_partition_point`,
+  // but this will require too many lines of change to do right now.
+  if (tree_type == CHROMA_PART && bsize == BLOCK_8X8) {
+    return PARTITION_NONE;
+  }
+
+  // Partitions forced by SDP
+  if (is_luma_chroma_share_same_partition(tree_type, ptree_luma, bsize)) {
+    assert(ptree_luma);
+    return sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ss_x, ss_y);
+  }
+
+  // Partitions forced by boundary
+  PARTITION_TYPE implied_partition;
+  const bool is_part_implied = is_partition_implied_at_boundary(
+      mi_params, tree_type, ss_x, ss_y, mi_row, mi_col, bsize, chroma_ref_info,
+      &implied_partition);
+  if (is_part_implied) return implied_partition;
+
+  // No forced partitions
+  return PARTITION_INVALID;
+}
 #else
 // Return the number of sub-blocks whose width and height are
 // less than half of the parent block.
@@ -3101,11 +3229,13 @@
       // PARTITION_HORZ_B. To distinguish the latter two, check if the lower
       // half was split.
       if (sshigh * 4 == bhigh) {
-#if CONFIG_EXT_RECUR_PARTITIONS
+#if CONFIG_UNEVEN_4WAY
+        return PARTITION_HORZ_4A;
+#elif CONFIG_EXT_RECUR_PARTITIONS
         return PARTITION_HORZ_3;
-#else   // CONFIG_EXT_RECUR_PARTITIONS
+#else   // !CONFIG_UNEVEN_4WAY && !CONFIG_EXT_RECUR_PARTITIONS
         return PARTITION_HORZ_4;
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
+#endif  // CONFIG_UNEVEN_4WAY
       }
 #if !CONFIG_EXT_RECUR_PARTITIONS
       assert(sshigh * 2 == bhigh);
@@ -3120,11 +3250,13 @@
       // PARTITION_VERT_B. To distinguish the latter two, check if the right
       // half was split.
       if (sswide * 4 == bwide) {
-#if CONFIG_EXT_RECUR_PARTITIONS
+#if CONFIG_UNEVEN_4WAY
+        return PARTITION_VERT_4A;
+#elif CONFIG_EXT_RECUR_PARTITIONS
         return PARTITION_VERT_3;
-#else   // CONFIG_EXT_RECUR_PARTITIONS
+#else   // !CONFIG_UNEVEN_4WAY && !CONFIG_EXT_RECUR_PARTITIONS
         return PARTITION_VERT_4;
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
+#endif  // CONFIG_UNEVEN_4WAY
       }
 #if !CONFIG_EXT_RECUR_PARTITIONS
       assert(sswide * 2 == bhigh);
@@ -3279,8 +3411,73 @@
           seq_level_idx != SEQ_LEVEL_7_2 && seq_level_idx != SEQ_LEVEL_7_3);
 }
 
-// Intra derivative for second directional predictor of IBP
+// Intra derivative for directional predictions.
 // second_dr_intra_derivative[x] = 64*64/dr_intra_derivative[x]
+#if CONFIG_EXT_DIR
+static const int16_t dr_intra_derivative[90] = {
+  // Angle in degrees.
+  // Starred (*) values are unused.
+  0,    4096, 2048,            //    *,  0.9,  1.8,
+  1365, 1024, 819,             //  2.7,  3.6,  4.5,
+  682,  585,  512,             //  5.4,  6.2,  7.1,
+  455,  409,  409,  409, 372,  //  8.0,  8.9, *, *,  9.8,
+  341,  292,  273,             // 10.6, 12.4, 13.2,
+  256,  227,  215,             // 14.0, 15.7, 16.6,
+  204,  186,  178,             // 17.4, 19.0, 19.8,
+  170,  157,  151,             // 20.6, 22.2, 23.0,
+  146,  136,  132,             // 23.7, 25.2, 25.9,
+  128,  117,  110,             // 26.6, 28.7, 30.2,
+  107,  99,   97,   97,        // 30.9, 32.9,    *, 33.4,
+  93,   87,   83,              // 34.5, 36.3, 37.6,
+  81,   77,   74,              // 38.3, 39.7, 40.9,
+  73,   69,   66,              // 41.2, 42.8, 44.1,
+  64,   62,   59,              // 45.0, 45.9, 47.3,
+  56,   55,   53,              // 48.8, 49.3, 50.4,
+  50,   49,   47,              // 52.0, 52.6, 53.7,
+  44,   42,   42,   41,        // 55.5, 56.7,    *, 57.4,
+  38,   37,   35,              // 59.3, 60.0, 61.3,
+  32,   31,   30,              // 63.4, 64.2, 64.9,
+  28,   27,   26,              // 66.4, 67.1, 67.9,
+  24,   23,   22,              // 69.4, 70.2, 71.0,
+  20,   19,   18,              // 72.6, 73.5, 74.3,
+  16,   15,   14,              // 76.0, 76.8, 77.7,
+  12,   11,   10,   10,  10,   // 79.4, 80.2, *, *, 81.1,
+  9,    8,    7,               // 82.0, 82.9, 83.8,
+  6,    5,    4,               // 84.6, 85.5, 86.4,
+  3,    2,    1,               // 87.3, 88.2, 89.1,
+};
+#elif CONFIG_IMPROVED_ANGULAR_INTRA
+static const int16_t second_dr_intra_derivative[90] = {
+  0,    0, 0,        //
+  2,    0, 0,        // 3, ...
+  4,    0, 0,        // 6, ...
+  8,    0, 0, 0, 0,  // 9, ...
+  12,   0, 0,        // 14, ...
+  16,   0, 0,        // 17, ...
+  20,   0, 0,        // 20, ...
+  24,   0, 0,        // 23, ... (113 & 203 are base angles)
+  28,   0, 0,        // 26, ...
+  32,   0, 0,        // 29, ...
+  38,   0, 0, 0,     // 32, ...
+  44,   0, 0,        // 36, ...
+  50,   0, 0,        // 39, ...
+  56,   0, 0,        // 42, ...
+  64,   0, 0,        // 45, ... (45 & 135 are base angles)
+  72,   0, 0,        // 48, ...
+  82,   0, 0,        // 51, ...
+  92,   0, 0, 0,     // 54, ...
+  106,  0, 0,        // 58, ...
+  128,  0, 0,        // 61, ...
+  146,  0, 0,        // 64, ...
+  170,  0, 0,        // 67, ... (67 & 157 are base angles)
+  204,  0, 0,        // 70, ...
+  256,  0, 0,        // 73, ...
+  340,  0, 0, 0, 0,  // 76, ...
+  512,  0, 0,        // 81, ...
+  1024, 0, 0,        // 84, ...
+  2048, 0, 0,        // 87, ...
+};
+#else
 static const int16_t second_dr_intra_derivative[90] = {
   0,    0, 0,        //
   4,    0, 0,        // 3, ...
@@ -3311,6 +3508,7 @@
   585,  0, 0,        // 84, ...
   1365, 0, 0,        // 87, ...
 };
+#endif  // CONFIG_EXT_DIR
 
 // Generate the weights per pixel position for IBP
 static void av1_dr_prediction_z1_info(uint8_t *weights, int bw, int bh,
@@ -3393,7 +3591,11 @@
     int delta, int txw, int txh, int txw_log2, int txh_log2) {
   const int angle = mode_to_angle_map[mode] + delta * 3;
   const int mode_idx = angle_to_mode_index[angle];
+#if CONFIG_EXT_DIR
+  const int dy = dr_intra_derivative[90 - angle];
+#else
   const int dy = second_dr_intra_derivative[angle];
+#endif  // CONFIG_EXT_DIR
   weights[block_idx][mode_idx] =
       (uint8_t *)(aom_malloc(txw * txh * sizeof(uint8_t)));
   av1_dr_prediction_z1_info(weights[block_idx][mode_idx], txw, txh, txw_log2,
@@ -3410,7 +3612,11 @@
     const int txh = tx_size_high[iblock];
     const int txw_log2 = tx_size_wide_log2[iblock];
     const int txh_log2 = tx_size_high_log2[iblock];
+#if CONFIG_IMPROVED_ANGULAR_INTRA
+    for (int delta = -2; delta < 0; delta += 2) {
+#else
     for (int delta = -3; delta < 0; delta++) {
+#endif  // CONFIG_IMPROVED_ANGULAR_INTRA
       init_ibp_info_per_mode(weights, iblock, V_PRED, delta, txw, txh, txw_log2,
                              txh_log2);
       init_ibp_info_per_mode(weights, iblock, D67_PRED, delta, txw, txh,
@@ -3418,7 +3624,11 @@
       init_ibp_info_per_mode(weights, iblock, D45_PRED, delta, txw, txh,
                              txw_log2, txh_log2);
     }
+#if CONFIG_IMPROVED_ANGULAR_INTRA
+    for (int delta = 0; delta <= 2; delta += 2) {
+#else
     for (int delta = 0; delta <= 3; delta++) {
+#endif  // CONFIG_IMPROVED_ANGULAR_INTRA
       init_ibp_info_per_mode(weights, iblock, D67_PRED, delta, txw, txh,
                              txw_log2, txh_log2);
       init_ibp_info_per_mode(weights, iblock, D45_PRED, delta, txw, txh,
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl
index 4d53661..e0efb4e 100644
--- a/av1/common/av1_rtcd_defs.pl
+++ b/av1/common/av1_rtcd_defs.pl
@@ -229,6 +229,15 @@
 add_proto qw/void av1_highbd_dr_prediction_z3/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd, int mrl_index";
 specialize qw/av1_highbd_dr_prediction_z3 avx2/;
 
+if (aom_config("CONFIG_IDIF") eq "yes") {
+    add_proto qw/void av1_highbd_dr_prediction_z1_idif/ , "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int dx, int dy, int bd, int mrl_index";
+    specialize qw/av1_highbd_dr_prediction_z1_idif avx2/;
+    add_proto qw/void av1_highbd_dr_prediction_z2_idif/ , "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int dx, int dy, int bd, int mrl_index";
+    specialize qw/av1_highbd_dr_prediction_z2_idif avx2/;
+    add_proto qw/void av1_highbd_dr_prediction_z3_idif/ , "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int dx, int dy, int bd, int mrl_index";
+    specialize qw/av1_highbd_dr_prediction_z3_idif avx2/
+}
+
 add_proto qw / void av1_highbd_ibp_dr_prediction_z1 /,
     "uint8_t* weights, uint16_t *dst, ptrdiff_t stride, uint16_t* second_pred, ptrdiff_t second_stride, int bw, int bh";
 add_proto qw / void av1_highbd_ibp_dr_prediction_z3 /,
@@ -334,10 +343,14 @@
   # txb
   add_proto qw/void av1_txb_init_levels_skip/, "const tran_low_t *const coeff, const int width, const int height, uint8_t *const levels";
   specialize qw/av1_txb_init_levels_skip sse4_1 avx2/;
-  add_proto qw/void av1_get_nz_map_contexts_skip/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, int8_t *const coeff_contexts";
-  specialize qw/av1_get_nz_map_contexts_skip sse2/;
+  if (aom_config("CONFIG_ATC_DCTX_ALIGNED") eq "yes") {
+    add_proto qw/void av1_get_nz_map_contexts_skip/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t bob, const uint16_t eob, const TX_SIZE tx_size, int8_t *const coeff_contexts";
+  } else {
+    add_proto qw/void av1_get_nz_map_contexts_skip/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, int8_t *const coeff_contexts";
+    specialize qw/av1_get_nz_map_contexts_skip sse2/;
+  }
 
-  if (aom_config("CONFIG_ATC_COEFCODING") eq "yes") {
+  if (aom_config("CONFIG_ATC") eq "yes") {
     add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts, const int plane";
   } else {
     add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts";
diff --git a/av1/common/blockd.c b/av1/common/blockd.c
index c629834..26c05fd 100644
--- a/av1/common/blockd.c
+++ b/av1/common/blockd.c
@@ -203,18 +203,27 @@
 #endif  // CONFIG_WIENER_NONSEP
 ) {
   for (int p = plane_start; p < plane_end; ++p) {
-    av1_reset_wiener_bank(&xd->wiener_info[p]);
+    av1_reset_wiener_bank(&xd->wiener_info[p], p != AOM_PLANE_Y);
     av1_reset_sgrproj_bank(&xd->sgrproj_info[p]);
 #if CONFIG_WIENER_NONSEP
     av1_reset_wienerns_bank(&xd->wienerns_info[p], xd->current_base_qindex,
-                            num_filter_classes[p], p != AOM_PLANE_Y);
+                            num_filter_classes[p], p != AOM_PLANE_Y
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                            ,
+                            0
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    );
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    av1_reset_wienerns_bank(&xd->wienerns_cross_info[p],
+                            xd->current_base_qindex, 1, p != AOM_PLANE_Y, 1);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #endif  // CONFIG_WIENER_NONSEP
   }
 }
 
 // Initialize bank
-void av1_reset_wiener_bank(WienerInfoBank *bank) {
-  set_default_wiener(&bank->filter[0]);
+void av1_reset_wiener_bank(WienerInfoBank *bank, int chroma) {
+  set_default_wiener(&bank->filter[0], chroma);
   bank->bank_size = 0;
   bank->bank_ptr = 0;
 }
@@ -262,19 +271,6 @@
   memcpy(av1_ref_from_wiener_bank(bank, ndx), info, sizeof(*info));
 }
 
-// Convenience function to fill the provided info structure with
-// filter at given index
-void av1_get_from_wiener_bank(WienerInfoBank *bank, int ndx, WienerInfo *info) {
-  if (bank->bank_size == 0) {
-    set_default_wiener(info);
-  } else {
-    assert(ndx < bank->bank_size);
-    const int ptr =
-        bank->bank_ptr - ndx + (bank->bank_ptr < ndx ? LR_BANK_SIZE : 0);
-    memcpy(info, &bank->filter[ptr], sizeof(*info));
-  }
-}
-
 // Initialize bank
 void av1_reset_sgrproj_bank(SgrprojInfoBank *bank) {
   set_default_sgrproj(&bank->filter[0]);
@@ -325,29 +321,25 @@
   memcpy(av1_ref_from_sgrproj_bank(bank, ndx), info, sizeof(*info));
 }
 
-// Convenience function to fill the provided info structure with
-// filter at given index
-void av1_get_from_sgrproj_bank(SgrprojInfoBank *bank, int ndx,
-                               SgrprojInfo *info) {
-  if (bank->bank_size == 0) {
-    set_default_sgrproj(info);
-  } else {
-    assert(ndx < bank->bank_size);
-    const int ptr =
-        bank->bank_ptr - ndx + (bank->bank_ptr < ndx ? LR_BANK_SIZE : 0);
-    memcpy(info, &bank->filter[ptr], sizeof(*info));
-  }
-}
-
 #if CONFIG_WIENER_NONSEP
 // Initialize bank
 void av1_reset_wienerns_bank(WienerNonsepInfoBank *bank, int qindex,
-                             int num_classes, int chroma) {
+                             int num_classes, int chroma
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                             ,
+                             int is_cross
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+) {
   for (int i = 0; i < LR_BANK_SIZE; ++i) {
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    set_default_wienerns(&bank->filter[i], qindex, num_classes, chroma,
+                         is_cross);
+#else
     set_default_wienerns(&bank->filter[i], qindex, num_classes, chroma);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   }
   for (int c_id = 0; c_id < num_classes; ++c_id) {
-    bank->bank_size_for_class[c_id] = 1;
+    bank->bank_size_for_class[c_id] = 0;
     bank->bank_ptr_for_class[c_id] = 0;
   }
 }
@@ -379,26 +371,30 @@
 WienerNonsepInfo *av1_ref_from_wienerns_bank(WienerNonsepInfoBank *bank,
                                              int ndx, int wiener_class_id) {
   assert(wiener_class_id != ALL_WIENERNS_CLASSES);
-  assert(bank->bank_size_for_class[wiener_class_id] > 0);
-
-  assert(ndx < bank->bank_size_for_class[wiener_class_id]);
-  const int ptr =
-      bank->bank_ptr_for_class[wiener_class_id] - ndx +
-      (bank->bank_ptr_for_class[wiener_class_id] < ndx ? LR_BANK_SIZE : 0);
-  return &bank->filter[ptr];
+  if (bank->bank_size_for_class[wiener_class_id] == 0) {
+    return &bank->filter[0];
+  } else {
+    assert(ndx < bank->bank_size_for_class[wiener_class_id]);
+    const int ptr =
+        bank->bank_ptr_for_class[wiener_class_id] - ndx +
+        (bank->bank_ptr_for_class[wiener_class_id] < ndx ? LR_BANK_SIZE : 0);
+    return &bank->filter[ptr];
+  }
 }
 
 // Get a const reference to a filter given the index
 const WienerNonsepInfo *av1_constref_from_wienerns_bank(
     const WienerNonsepInfoBank *bank, int ndx, int wiener_class_id) {
   assert(wiener_class_id != ALL_WIENERNS_CLASSES);
-  assert(bank->bank_size_for_class[wiener_class_id] > 0);
-
-  assert(ndx < bank->bank_size_for_class[wiener_class_id]);
-  const int ptr =
-      bank->bank_ptr_for_class[wiener_class_id] - ndx +
-      (bank->bank_ptr_for_class[wiener_class_id] < ndx ? LR_BANK_SIZE : 0);
-  return &bank->filter[ptr];
+  if (bank->bank_size_for_class[wiener_class_id] == 0) {
+    return &bank->filter[0];
+  } else {
+    assert(ndx < bank->bank_size_for_class[wiener_class_id]);
+    const int ptr =
+        bank->bank_ptr_for_class[wiener_class_id] - ndx +
+        (bank->bank_ptr_for_class[wiener_class_id] < ndx ? LR_BANK_SIZE : 0);
+    return &bank->filter[ptr];
+  }
 }
 
 // Directly replace a filter in the bank at given index
diff --git a/av1/common/blockd.h b/av1/common/blockd.h
index d934852..eab428b 100644
--- a/av1/common/blockd.h
+++ b/av1/common/blockd.h
@@ -365,6 +365,40 @@
   DIFFWTD_MASK_TYPE mask_type;
   COMPOUND_TYPE type;
 } INTERINTER_COMPOUND_DATA;
+#if CONFIG_D071_IMP_MSK_BLD
+// This structure is used for the position check of the implicit masked blending
+typedef struct BacpBlockData {
+  int x0;  // top left sample horizontal cood.
+  int x1;  // x0 + bw
+  int y0;  // top left sample vertical cood.
+  int y1;  // y0 + bh
+} BacpBlockData;
+// This struct contains enable flag and date for implicit masked blending mode
+typedef struct {
+  uint8_t enable_bacp;  // enable boundary aware compound prediction
+  BacpBlockData *bacp_block_data;
+} INTERINTER_COMPOUND_BORDER_DATA;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
+#if CONFIG_REFINEMV
+#define REF_BUFFER_WIDTH \
+  (REFINEMV_SUBBLOCK_WIDTH + (AOM_INTERP_EXTEND - 1) + AOM_INTERP_EXTEND)
+#define REF_BUFFER_HEIGHT \
+  (REFINEMV_SUBBLOCK_HEIGHT + (AOM_INTERP_EXTEND - 1) + AOM_INTERP_EXTEND)
+typedef struct PadBlock {
+  int x0;
+  int x1;
+  int y0;
+  int y1;
+} PadBlock;
+
+typedef struct PadArea {
+  PadBlock pad_block;
+  uint16_t paded_ref_buf[(REF_BUFFER_WIDTH) * (REF_BUFFER_HEIGHT)];
+  int paded_ref_buf_stride;
+} ReferenceArea;
+
+#endif  // CONFIG_REFINEMV
 
 #if CONFIG_OPTFLOW_REFINEMENT
 // Macros for optical flow experiment where offsets are added in nXn blocks
@@ -420,6 +454,12 @@
   int mi_row_start;
   /*! \brief Starting mi_col of current coding block */
   int mi_col_start;
+#if CONFIG_EXT_RECUR_PARTITIONS
+  /*! \brief Starting chroma mi_row of current coding block */
+  int chroma_mi_row_start;
+  /*! \brief Starting chroma mi_col of current coding block */
+  int chroma_mi_col_start;
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
   /*! \brief The partition type of the current coding block. */
   PARTITION_TYPE partition;
   /*! \brief The prediction mode used */
@@ -464,6 +504,11 @@
    */
   uint8_t mb_precision_set;
 #endif
+#if CONFIG_REFINEMV
+  /*! \brief The flag to signal if DMVR is used for the inter prediction. */
+  uint8_t refinemv_flag;
+#endif  // CONFIG_REFINEMV
+
   /*! \brief The motion mode used by the inter prediction. */
   MOTION_MODE motion_mode;
   /*! \brief Number of samples used by spatial warp prediction */
@@ -491,6 +536,11 @@
   /*! \brief The bawp parameters offset*/
   int32_t bawp_beta[3][2];  //[yuv][ref0/1], current only [0][0] is used.
 #endif                      // CONFIG_BAWP
+
+#if CONFIG_CWP
+  //! Index for compound weighted prediction parameters.
+  int8_t cwp_idx;
+#endif  // CONFIG_CWP
   /**@}*/
 
   /*****************************************************************************
@@ -561,7 +611,11 @@
   /*! \brief Only valid when temporal update if off. */
   uint8_t seg_id_predicted : 1;
   /*! \brief Which ref_mv to use */
+#if CONFIG_SEP_COMP_DRL
+  int ref_mv_idx[2];
+#else
   uint8_t ref_mv_idx : 3;
+#endif  // CONFIG_SEP_COMP_DRL
   /*! \brief Inter skip mode */
 #if CONFIG_SKIP_MODE_ENHANCEMENT
   uint8_t skip_mode : 2;
@@ -570,14 +624,14 @@
 #endif  // CONFIG_SKIP_MODE_ENHANCEMENT
   /*! \brief Whether intrabc is used. */
   uint8_t use_intrabc[PARTITION_STRUCTURE_NUM];
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   /*! \brief Intrabc BV prediction mode. */
   uint8_t intrabc_mode;
   /*! \brief Index of ref_bv. */
   uint8_t intrabc_drl_idx;
   /*! \brief Which ref_bv to use. */
   int_mv ref_bv;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 #if CONFIG_WARP_REF_LIST
   /*! \brief Which index to use for warp base parameter. */
@@ -585,6 +639,10 @@
   /*! \brief Maximum number of warp reference indices to use for warp base
    * parameter. */
   uint8_t max_num_warp_candidates;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  /*! \brief warpmv_with_mvd_flag. */
+  uint8_t warpmv_with_mvd_flag;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
 
   /*! \brief Indicates if masked compound is used(1) or not (0). */
@@ -634,6 +692,15 @@
 } SUBMB_INFO;
 #endif  // CONFIG_C071_SUBBLK_WARPMV
 
+#if CONFIG_REFINEMV
+/*! \brief Stores the subblock refinemv motion info of the current coding block
+ */
+typedef struct REFINEMV_SUBMB_INFO {
+  /*! \brief Stored subblock mv for reference. */
+  int_mv refinemv[2];
+} REFINEMV_SUBMB_INFO;
+#endif  // CONFIG_REFINEMV
+
 /*!\cond */
 // Get the start plane for semi-decoupled partitioning
 static INLINE int get_partition_plane_start(int tree_type) {
@@ -750,7 +817,7 @@
   if (bsize > BLOCK_64X64) {
     return false;
   }
-  // At bsize \leq 8X8, extended partitions will lead to dimension < 2.
+  // At bsize <= 8X8, extended partitions will lead to dimension < 2.
   if (bsize <= BLOCK_8X8) {
     return false;
   }
@@ -771,7 +838,7 @@
     return false;
   }
   // A splittable wide block has ratio 2:1. If it performs HORZ_3 split, then
-  // we'll get a block ratio of 2:0.5 == 4:1, which is illegal. So extended
+  // we'll get a block ratio of 2:0.25 == 8:1, which is illegal. So extended
   // partition is disabled. The same goes for tall block.
   if ((is_wide_block(bsize) && rect_type == HORZ) ||
       (is_tall_block(bsize) && rect_type == VERT)) {
@@ -780,6 +847,25 @@
   return true;
 }
 
+#if CONFIG_UNEVEN_4WAY
+/*!\brief Checks whether uneven 4-way partition is allowed for current bsize and
+ * rect_type. */
+static AOM_INLINE bool is_uneven_4way_partition_allowed(
+    BLOCK_SIZE bsize, RECT_PART_TYPE rect_type, TREE_TYPE tree_type) {
+  assert(is_ext_partition_allowed(bsize, rect_type, tree_type));
+
+  if (rect_type == HORZ) {
+    if (bsize == BLOCK_32X64) return true;
+    if (bsize == BLOCK_16X32 && tree_type != CHROMA_PART) return true;
+  } else {
+    assert(rect_type == VERT);
+    if (bsize == BLOCK_64X32) return true;
+    if (bsize == BLOCK_32X16 && tree_type != CHROMA_PART) return true;
+  }
+  return false;
+}
+#endif  // CONFIG_UNEVEN_4WAY
+
 /*!\brief Returns the rect_type that's implied by the bsize. If the rect_type
  * cannot be derived from bsize, returns RECT_INVALID. */
 static AOM_INLINE RECT_PART_TYPE
@@ -807,18 +893,24 @@
 /*!\brief Returns whether square split is allowed for current bsize. */
 static AOM_INLINE bool is_square_split_eligible(BLOCK_SIZE bsize,
                                                 BLOCK_SIZE sb_size) {
-  if (sb_size != BLOCK_256X256) {
-    return false;
-  }
+  (void)sb_size;
   return bsize == BLOCK_128X128 || bsize == BLOCK_256X256;
 }
 
-/*!\brief Returns whether the current partition is horizontal type for vertical
+/*!\brief Returns whether the current partition is horizontal type or vertical
  * type. */
 static AOM_INLINE RECT_PART_TYPE get_rect_part_type(PARTITION_TYPE partition) {
-  if (partition == PARTITION_HORZ || partition == PARTITION_HORZ_3) {
+  if (partition == PARTITION_HORZ || partition == PARTITION_HORZ_3
+#if CONFIG_UNEVEN_4WAY
+      || partition == PARTITION_HORZ_4A || partition == PARTITION_HORZ_4B
+#endif  // CONFIG_UNEVEN_4WAY
+  ) {
     return HORZ;
-  } else if (partition == PARTITION_VERT || partition == PARTITION_VERT_3) {
+  } else if (partition == PARTITION_VERT || partition == PARTITION_VERT_3
+#if CONFIG_UNEVEN_4WAY
+             || partition == PARTITION_VERT_4A || partition == PARTITION_VERT_4B
+#endif  // CONFIG_UNEVEN_4WAY
+  ) {
     return VERT;
   }
   assert(0 && "Rectangular partition expected!");
@@ -830,6 +922,20 @@
   return is_inter_ref_frame(mbmi->ref_frame[1]);
 }
 
+#if CONFIG_SEP_COMP_DRL
+/*!\brief Return whether the current coding block has two separate DRLs */
+static INLINE int has_second_drl(const MB_MODE_INFO *mbmi) {
+  int ret = (mbmi->mode == NEAR_NEARMV || mbmi->mode == NEAR_NEWMV) &&
+            !is_tip_ref_frame(mbmi->ref_frame[0]) && !mbmi->skip_mode;
+  return ret;
+}
+
+/*!\brief Return the mv_ref_idx of the current coding block based on ref_idx */
+static INLINE int get_ref_mv_idx(const MB_MODE_INFO *mbmi, int ref_idx) {
+  return has_second_drl(mbmi) ? mbmi->ref_mv_idx[ref_idx] : mbmi->ref_mv_idx[0];
+}
+#endif  // CONFIG_SEP_COMP_DRL
+
 #if CONFIG_AIMC
 PREDICTION_MODE av1_get_joint_mode(const MB_MODE_INFO *mi);
 #else
@@ -893,7 +999,7 @@
   }
 }
 
-#if CONFIG_H_PARTITION
+#if CONFIG_EXT_RECUR_PARTITIONS
 // Get the block size of the ith sub-block in a block partitioned via an
 // h-partition mode.
 static INLINE BLOCK_SIZE get_h_partition_subsize(BLOCK_SIZE bsize, int index,
@@ -981,7 +1087,7 @@
     }
   }
 }
-#endif  // CONFIG_H_PARTITION
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
 
 static INLINE int is_partition_valid(BLOCK_SIZE bsize, PARTITION_TYPE p) {
   if (is_partition_point(bsize))
@@ -1009,25 +1115,36 @@
                                             int subsampling_y) {
   const int bw = block_size_wide[bsize] >> subsampling_x;
   const int bh = block_size_high[bsize] >> subsampling_y;
+  // Check if block width/height is less than 4.
   const int bw_less_than_4 = bw < 4;
   const int bh_less_than_4 = bh < 4;
+  // Check if half block width/height is less than 8.
   const int hbw_less_than_4 = bw < 8;
   const int hbh_less_than_4 = bh < 8;
+#if !CONFIG_UNEVEN_4WAY || CONFIG_EXT_RECUR_PARTITIONS
+  // Check if quarter block width/height is less than 16.
   const int qbw_less_than_4 = bw < 16;
   const int qbh_less_than_4 = bh < 16;
+#endif  // !CONFIG_UNEVEN_4WAY || CONFIG_EXT_RECUR_PARTITIONS
+#if CONFIG_UNEVEN_4WAY
+  // Check if one-eighth block width/height is less than 32.
+  const int ebw_less_than_4 = bw < 32;
+  const int ebh_less_than_4 = bh < 32;
+#endif  // CONFIG_UNEVEN_4WAY
   switch (partition) {
     case PARTITION_NONE: return bw_less_than_4 || bh_less_than_4;
     case PARTITION_HORZ: return bw_less_than_4 || hbh_less_than_4;
     case PARTITION_VERT: return hbw_less_than_4 || bh_less_than_4;
     case PARTITION_SPLIT: return hbw_less_than_4 || hbh_less_than_4;
 #if CONFIG_EXT_RECUR_PARTITIONS
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A:
+    case PARTITION_HORZ_4B: return bw_less_than_4 || ebh_less_than_4;
+    case PARTITION_VERT_4A:
+    case PARTITION_VERT_4B: return ebw_less_than_4 || bh_less_than_4;
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_HORZ_3: return hbw_less_than_4 || qbh_less_than_4;
     case PARTITION_VERT_3: return qbw_less_than_4 || hbh_less_than_4;
-#else
-    case PARTITION_HORZ_3: return bw_less_than_4 || qbh_less_than_4;
-    case PARTITION_VERT_3: return qbw_less_than_4 || bh_less_than_4;
-#endif  // CONFIG_H_PARTITION
 #else   // CONFIG_EXT_RECUR_PARTITIONS
     case PARTITION_HORZ_A:
     case PARTITION_HORZ_B:
@@ -1077,13 +1194,14 @@
           return 1;
       }
 #if CONFIG_EXT_RECUR_PARTITIONS
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A:
+    case PARTITION_HORZ_4B:
+    case PARTITION_VERT_4A:
+    case PARTITION_VERT_4B: return index == 3;
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_VERT_3:
     case PARTITION_HORZ_3: return index == 3;
-#else
-    case PARTITION_VERT_3:
-    case PARTITION_HORZ_3: return index == 2;
-#endif  // CONFIG_H_PARTITION
 #else   // CONFIG_EXT_RECUR_PARTITIONS
     case PARTITION_HORZ_A:
     case PARTITION_HORZ_B:
@@ -1162,6 +1280,12 @@
     case PARTITION_HORZ:
     case PARTITION_VERT:
 #if CONFIG_EXT_RECUR_PARTITIONS
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A:
+    case PARTITION_HORZ_4B:
+    case PARTITION_VERT_4A:
+    case PARTITION_VERT_4B:
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_VERT_3:
     case PARTITION_HORZ_3:
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
@@ -1252,14 +1376,23 @@
   }
 }
 
-static INLINE void set_chroma_ref_info(int mi_row, int mi_col, int index,
-                                       BLOCK_SIZE bsize, CHROMA_REF_INFO *info,
+static INLINE void set_chroma_ref_info(TREE_TYPE tree_type, int mi_row,
+                                       int mi_col, int index, BLOCK_SIZE bsize,
+                                       CHROMA_REF_INFO *info,
                                        const CHROMA_REF_INFO *parent_info,
                                        BLOCK_SIZE parent_bsize,
                                        PARTITION_TYPE parent_partition,
                                        int ss_x, int ss_y) {
   assert(bsize < BLOCK_SIZES_ALL);
   initialize_chroma_ref_info(mi_row, mi_col, bsize, info);
+  if (tree_type == LUMA_PART) {
+    info->is_chroma_ref = 0;
+    return;
+  }
+  if (tree_type == CHROMA_PART) {
+    info->is_chroma_ref = 1;
+    return;
+  }
   if (parent_info == NULL) return;
   if (parent_info->is_chroma_ref) {
     if (parent_info->offset_started) {
@@ -1290,7 +1423,7 @@
   }
 }
 
-#if CONFIG_MISMATCH_DEBUG
+#if CONFIG_MISMATCH_DEBUG || CONFIG_INSPECTION
 static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col,
                                    int mi_row, int tx_blk_col, int tx_blk_row,
                                    int subsampling_x, int subsampling_y) {
@@ -1318,8 +1451,21 @@
 
 typedef struct {
   DECLARE_ALIGNED(32, tran_low_t, dqcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]);
+#if CONFIG_INSPECTION
+  // dqcoeff gets clobbered before the inspect callback happens, so keep a
+  // copy here.
+  DECLARE_ALIGNED(32, tran_low_t, dqcoeff_copy[MAX_MB_PLANE][MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, tran_low_t, qcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]);
+  DECLARE_ALIGNED(32, tran_low_t, dequant_values[MAX_MB_PLANE][MAX_SB_SQUARE]);
+#endif
+  // keeps the index that corresponds to end-of-block (eob)
   eob_info eob_data[MAX_MB_PLANE]
                    [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+#if CONFIG_ATC_DCTX_ALIGNED
+  // keeps the index that corresponds to beginning-of-block (bob)
+  eob_info bob_data[MAX_MB_PLANE]
+                   [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]);
 } CB_BUFFER;
 
@@ -1351,6 +1497,9 @@
 
   qm_val_t *seg_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
   qm_val_t *seg_qmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
+#if CONFIG_INSPECTION
+  DECLARE_ALIGNED(32, int16_t, predicted_pixels[MAX_SB_SQUARE]);
+#endif
 } MACROBLOCKD_PLANE;
 
 #define BLOCK_OFFSET(i) ((i) << 4)
@@ -1360,7 +1509,6 @@
 #else
 #define LR_BANK_SIZE 1
 #endif  // CONFIG_LR_MERGE_COEFFS
-
 /*!\endcond */
 
 /*!\brief Parameters related to Wiener Filter */
@@ -1456,7 +1604,12 @@
    */
   DECLARE_ALIGNED(16, int16_t,
                   allfiltertaps[WIENERNS_MAX_CLASSES * WIENERNS_YUV_MAX]);
-
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  /*!
+   * Whether this is a cross-filter, temporaly used
+   */
+  int is_cross_filter;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #if CONFIG_LR_MERGE_COEFFS
   /*!
    * Best Reference from dynamic bank for each class.
@@ -1599,7 +1752,7 @@
 } WARP_PARAM_BANK;
 
 #endif  // CONFIG_WARP_REF_LIST
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
 /*! \brief Variables related to mvp list of skip mode.*/
 typedef struct {
   //! MV list
@@ -1617,7 +1770,7 @@
   //! Global mvs
   int_mv global_mvs[2];
 } SKIP_MODE_MVP_LIST;
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
 /*! \brief Variables related to current coding block.
  *
@@ -1644,7 +1797,7 @@
    * \name Reference MV bank info.
    */
   /**@{*/
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
   REF_MV_BANK *ref_mv_bank_pt; /*!< Pointer to bank to refer to */
 #endif
   REF_MV_BANK ref_mv_bank; /*!< Ref mv bank to update */
@@ -1902,6 +2055,13 @@
    * Nonseparable Wiener filter information for all planes.
    */
   WienerNonsepInfoBank wienerns_info[MAX_MB_PLANE];
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  /*!
+   * Nonseparable Wiener cross filter information for all planes, only Cb and Cr
+   * are applied
+   */
+  WienerNonsepInfoBank wienerns_cross_info[MAX_MB_PLANE];
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #endif  // CONFIG_WIENER_NONSEP
   /**@}*/
 
@@ -1930,9 +2090,9 @@
 /*!
  * skip_mvp_candidate_list is the MVP list for skip mode.
  */
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   SKIP_MODE_MVP_LIST skip_mvp_candidate_list;
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
 #if CONFIG_WARP_REF_LIST
   /*!
@@ -1958,7 +2118,7 @@
    */
   bool is_first_horizontal_rect;
 
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   /*!
    * True if this is the last horizontal rectangular block in a HORIZONTAL or
    * HORIZONTAL_4 partition.
@@ -1969,7 +2129,7 @@
    * VERTICAL_4 partition.
    */
   bool is_first_vertical_rect;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
 
   /*!
@@ -2132,6 +2292,11 @@
   /** variable to store eob_u flag */
   uint8_t eob_u_flag;
 #endif  // CONFIG_CONTEXT_DERIVATION
+
+#if CONFIG_REFINEMV
+  /** block level storage to store luma refined MVs for chroma use */
+  REFINEMV_SUBMB_INFO refinemv_subinfo[MAX_MIB_SIZE * MAX_MIB_SIZE];
+#endif  // CONFIG_REFINEMV
 } MACROBLOCKD;
 
 /*!\cond */
@@ -2180,14 +2345,14 @@
 // Number of transform types in each set type for intra blocks
 static const int av1_num_ext_tx_set_intra[EXT_TX_SET_TYPES] = { 1, 1,  4,
                                                                 6, 11, 15,
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
                                                                 7
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 };
 
-#if CONFIG_ATC_NEWTXSETS && CONFIG_ATC_REDUCED_TXSET
+#if CONFIG_ATC && CONFIG_ATC_REDUCED_TXSET
 static const int av1_num_reduced_tx_set = 2;
-#endif  // CONFIG_ATC_NEWTXSETS && CONFIG_ATC_REDUCED_TXSET
+#endif  // CONFIG_ATC && CONFIG_ATC_REDUCED_TXSET
 
 // Number of transform types in each set type
 static const int av1_num_ext_tx_set[EXT_TX_SET_TYPES] = {
@@ -2201,12 +2366,12 @@
   { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0 },
   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 },
   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 };
 
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
 static const int av1_mdtx_used_flag[EXT_TX_SIZES][INTRA_MODES][TX_TYPES] = {
   {
       { 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0 },
@@ -2269,7 +2434,7 @@
       { 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
   },  // size_class: 3
 };
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 
 static const uint16_t av1_reduced_intra_tx_used_flag[INTRA_MODES] = {
   0x080F,  // DC_PRED:       0000 1000 0000 1111
@@ -2294,12 +2459,12 @@
   0x0E0F,  // 0000 1110 0000 1111
   0x0FFF,  // 0000 1111 1111 1111
   0xFFFF,  // 1111 1111 1111 1111
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   0xFFFF,
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 };
 
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
 static const uint16_t av1_md_trfm_used_flag[EXT_TX_SIZES][INTRA_MODES] = {
   {
       0x218F,
@@ -2362,7 +2527,7 @@
       0x0000,
   },  // size_class: 3
 };
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 
 static const TxSetType av1_ext_tx_set_lookup[2][2] = {
   { EXT_TX_SET_DTT4_IDTX_1DDCT, EXT_TX_SET_DTT4_IDTX },
@@ -2374,14 +2539,18 @@
   const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
   if (tx_size_sqr_up > TX_32X32) return EXT_TX_SET_DCTONLY;
   if (tx_size_sqr_up == TX_32X32)
+#if CONFIG_ATC_DCTX_ALIGNED
+    return EXT_TX_SET_DCT_IDTX;
+#else
     return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DCTONLY;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 #if CONFIG_ATC_REDUCED_TXSET
   if (use_reduced_set) return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_NEW_TX_SET;
 #else
   if (use_reduced_set)
     return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX;
 #endif  // CONFIG_ATC_REDUCED_TXSET
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   if (is_inter) {
     const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
     return av1_ext_tx_set_lookup[is_inter][tx_size_sqr == TX_16X16];
@@ -2391,17 +2560,17 @@
 #else
   const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size];
   return av1_ext_tx_set_lookup[is_inter][tx_size_sqr == TX_16X16];
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 }
 
 // Maps tx set types to the indices.
 static const int ext_tx_set_index[2][EXT_TX_SET_TYPES] = {
   { // Intra
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
     0, -1, -1, -1, -1, -1, 1 },
 #else
     0, -1, 2, 1, -1, -1 },
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
   {     // Inter
     0, 3, -1, -1, 2, 1 },
 };
@@ -2466,7 +2635,11 @@
                                           int is_screen_content_type) {
   const MB_MODE_INFO *const mbmi = xd->mi[0];
   if (is_inter_block(mbmi, xd->tree_type) || plane_type != PLANE_TYPE_Y ||
+#if CONFIG_ATC_DCTX_ALIGNED
+      xd->lossless[mbmi->segment_id] || tx_size > TX_32X32 ||
+#else
       xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32 ||
+#endif  // CONFIG_ATC_DCTX_ALIGNED
       is_screen_content_type)
     return DCT_DCT;
 
@@ -2846,6 +3019,9 @@
   const int code_stx =
       (primary_tx_type == DCT_DCT || primary_tx_type == ADST_ADST) &&
       (intra_dir < PAETH_PRED) &&
+#if CONFIG_ATC_DCTX_ALIGNED
+      (eob != 1) &&
+#endif  // CONFIG_ATC_DCTX_ALIGNED
       !(mbmi->filter_intra_mode_info.use_filter_intra) && is_depth0 && ist_eob;
   return code_stx;
 }
@@ -2990,14 +3166,13 @@
 
 void av1_reset_loop_filter_delta(MACROBLOCKD *xd, int num_planes);
 
-void av1_reset_wiener_bank(WienerInfoBank *bank);
+void av1_reset_wiener_bank(WienerInfoBank *bank, int chroma);
 void av1_add_to_wiener_bank(WienerInfoBank *bank, const WienerInfo *info);
 WienerInfo *av1_ref_from_wiener_bank(WienerInfoBank *bank, int ndx);
 const WienerInfo *av1_constref_from_wiener_bank(const WienerInfoBank *bank,
                                                 int ndx);
 void av1_upd_to_wiener_bank(WienerInfoBank *bank, int ndx,
                             const WienerInfo *info);
-void av1_get_from_wiener_bank(WienerInfoBank *bank, int ndx, WienerInfo *info);
 
 void av1_reset_sgrproj_bank(SgrprojInfoBank *bank);
 void av1_add_to_sgrproj_bank(SgrprojInfoBank *bank, const SgrprojInfo *info);
@@ -3006,14 +3181,17 @@
                                                   int ndx);
 void av1_upd_to_sgrproj_bank(SgrprojInfoBank *bank, int ndx,
                              const SgrprojInfo *info);
-void av1_get_from_sgrproj_bank(SgrprojInfoBank *bank, int ndx,
-                               SgrprojInfo *info);
 
 #if CONFIG_WIENER_NONSEP
 // Resets the bank data structure holding LR_BANK_SIZE nonseparable Wiener
 // filters. The bank holds a rootating buffer of filters.
 void av1_reset_wienerns_bank(WienerNonsepInfoBank *bank, int qindex,
-                             int num_classes, int chroma);
+                             int num_classes, int chroma
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                             ,
+                             int is_cross
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+);
 
 // Adds the nonseparable Wiener filter in info into the bank of rotating
 // filters. The add is so that once the bank has LR_BANK_SIZE filters the first
@@ -3149,6 +3327,7 @@
     return 0;
   }
 #if CONFIG_EXT_RECUR_PARTITIONS
+#if !CONFIG_UNEVEN_4WAY
   // TODO(urvang): Enable this special case, if we make OBMC work.
   // TODO(yuec): Enable this case when the alignment issue is fixed. There
   // will be memory leak in global above_pred_buff and left_pred_buff if
@@ -3156,11 +3335,13 @@
   if ((mi_row & 0x01) || (mi_col & 0x01)) {
     return 0;
   }
+#endif  // !CONFIG_UNEVEN_4WAY
 #else
   assert(!(mi_row & 0x01) && !(mi_col & 0x01));
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
   (void)mi_row;
   (void)mi_col;
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
+
   return 1;
 }
 
@@ -3276,10 +3457,10 @@
                               [PALETTE_COLORS];
 /* clang-format on */
 
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
 typedef aom_cdf_prob (*IdentityRowCdf)[CDF_SIZE(2)];
 typedef const int (*IdentityRowCost)[PALETTE_ROW_FLAG_CONTEXTS][2];
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 
 typedef struct {
   int rows;
@@ -3290,10 +3471,10 @@
   uint8_t *color_map;
   MapCdf map_cdf;
   ColorCost color_cost;
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   IdentityRowCdf identity_row_cdf;
   IdentityRowCost identity_row_cost;
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 } Av1ColorMapParam;
 
 static INLINE int is_nontrans_global_motion(const MACROBLOCKD *xd,
@@ -3337,6 +3518,29 @@
 }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
+#if CONFIG_CWP
+// check whether compound weighted prediction can be allowed
+static INLINE int is_cwp_allowed(const MB_MODE_INFO *mbmi) {
+#if CONFIG_REFINEMV
+  if (mbmi->refinemv_flag) return 0;
+#endif  // CONFIG_REFINEMV
+
+  if (mbmi->skip_mode) return 1;
+  int use_cwp = has_second_ref(mbmi) && mbmi->mode < NEAR_NEARMV_OPTFLOW &&
+                mbmi->interinter_comp.type == COMPOUND_AVERAGE &&
+                mbmi->motion_mode == SIMPLE_TRANSLATION;
+  use_cwp &=
+      (mbmi->mode == NEAR_NEARMV || is_joint_mvd_coding_mode(mbmi->mode));
+  use_cwp &= (mbmi->jmvd_scale_mode == 0);
+  return use_cwp;
+}
+// Return the index for compound weighted prediction
+static INLINE int8_t get_cwp_idx(const MB_MODE_INFO *mbmi) {
+  assert(mbmi->cwp_idx <= CWP_MAX && mbmi->cwp_idx >= CWP_MIN);
+  return mbmi->cwp_idx;
+}
+#endif
+
 /*!\endcond */
 
 #ifdef __cplusplus
diff --git a/av1/common/cfl.c b/av1/common/cfl.c
index b244669..7f1f66f 100644
--- a/av1/common/cfl.c
+++ b/av1/common/cfl.c
@@ -218,17 +218,19 @@
       for (int i = 0; i < width; i += 2) {
         const int bot = i + input_stride;
 #if CONFIG_ADAPTIVE_DS_FILTER
-#if DS_FRAME_LEVEL
-        const int filter_type = cm->features.ds_filter_type;
-#else
         const int filter_type = cm->seq_params.enable_cfl_ds_filter;
-#endif  // DS_FRAME_LEVEL
         if (filter_type == 1) {
           output_q3[i >> 1] = input[AOMMAX(0, i - 1)] + 2 * input[i] +
                               input[i + 1] + input[bot + AOMMAX(-1, -i)] +
                               2 * input[bot] + input[bot + 1];
         } else if (filter_type == 2) {
+#if CONFIG_CFL_IMPROVEMENTS
+          const int top = i - input_stride;
+          output_q3[i >> 1] = input[AOMMAX(0, i - 1)] + 4 * input[i] +
+                              input[i + 1] + input[top] + input[bot];
+#else
           output_q3[i >> 1] = input[i] * 8;
+#endif  // CONFIG_CFL_IMPROVEMENTS
         } else {
           output_q3[i >> 1] =
               (input[i] + input[i + 1] + input[bot] + input[bot + 1] + 2) << 1;
@@ -244,6 +246,25 @@
 #endif
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
       }
+#if CONFIG_ADPTIVE_DS_422
+    } else if (sub_x) {
+      uint16_t *input = dst - input_stride;
+      for (int i = 0; i < width; i += 2) {
+#if CONFIG_ADAPTIVE_DS_FILTER
+        const int filter_type = cm->seq_params.enable_cfl_ds_filter;
+        if (filter_type == 1) {
+          output_q3[i >> 1] =
+              (input[AOMMAX(0, i - 1)] + 2 * input[i] + input[i + 1]) << 1;
+        } else if (filter_type == 2) {
+          output_q3[i >> 1] = input[i] << 3;
+        } else {
+          output_q3[i >> 1] = (input[i] + input[i + 1]) << 2;
+        }
+#else
+        output_q3[i >> 1] = input[i] << 3;
+#endif  // CONFIG_ADAPTIVE_DS_FILTER
+      }
+#endif  // CONFIG_ADPTIVE_DS_422
     } else if (sub_y) {
       uint16_t *input = dst - 2 * input_stride;
       for (int i = 0; i < width; ++i) {
@@ -277,16 +298,18 @@
       for (int j = 0; j < height; j += 2) {
         const int bot = input_stride;
 #if CONFIG_ADAPTIVE_DS_FILTER
-#if DS_FRAME_LEVEL
-        const int filter_type = cm->features.ds_filter_type;
-#else
         const int filter_type = cm->seq_params.enable_cfl_ds_filter;
-#endif  // DS_FRAME_LEVEL
         if (filter_type == 1) {
           output_q3[j >> 1] = input[-1] + 2 * input[0] + input[1] +
                               input[bot - 1] + 2 * input[bot] + input[bot + 1];
         } else if (filter_type == 2) {
+#if CONFIG_CFL_IMPROVEMENTS
+          const int top = (j == 0) ? 0 : (0 - input_stride);
+          output_q3[j >> 1] =
+              input[-1] + 4 * input[0] + input[1] + input[top] + input[bot];
+#else
           output_q3[j >> 1] = input[0] * 8;
+#endif  // CONFIG_CFL_IMPROVEMENTS
         } else {
           output_q3[j >> 1] =
               (input[0] + input[1] + input[bot] + input[bot + 1]) << 1;
@@ -302,6 +325,25 @@
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
         input += input_stride * 2;
       }
+#if CONFIG_ADPTIVE_DS_422
+    } else if (sub_x) {
+      uint16_t *input = dst - 2;
+      for (int j = 0; j < height; ++j) {
+#if CONFIG_ADAPTIVE_DS_FILTER
+        const int filter_type = cm->seq_params.enable_cfl_ds_filter;
+        if (filter_type == 1) {
+          output_q3[j] = (input[-1] + 2 * input[0] + input[1]) << 1;
+        } else if (filter_type == 2) {
+          output_q3[j] = input[0] << 3;
+        } else {
+          output_q3[j] = (input[0] + input[1]) << 2;
+        }
+#else
+        output_q3[j] = input[0] << 3;
+#endif  // CONFIG_ADAPTIVE_DS_FILTER
+        input += input_stride;
+      }
+#endif  // CONFIG_ADPTIVE_DS_422
     } else if (sub_y) {
       uint16_t *input = dst - 1;
       for (int j = 0; j < height; ++j) {
@@ -609,7 +651,14 @@
                                             int height) {
   for (int j = 0; j < height; j += 2) {
     for (int i = 0; i < width; i += 2) {
+#if CONFIG_CFL_IMPROVEMENTS
+      const int top = (j == 0) ? i : (i - input_stride);
+      const int bot = i + input_stride;
+      output_q3[i >> 1] = input[AOMMAX(0, i - 1)] + 4 * input[i] +
+                          input[i + 1] + input[top] + input[bot];
+#else
       output_q3[i >> 1] = input[i] * 8;
+#endif  // CONFIG_CFL_IMPROVEMENTS
     }
     input += input_stride << 1;
     output_q3 += CFL_BUF_LINE;
@@ -631,6 +680,45 @@
   }
 }
 
+#if CONFIG_ADPTIVE_DS_422
+#if CONFIG_ADAPTIVE_DS_FILTER
+void cfl_adaptive_luma_subsampling_422_hbd_c(const uint16_t *input,
+                                             int input_stride,
+                                             uint16_t *output_q3, int width,
+                                             int height, int filter_type) {
+  assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i += 2) {
+      if (filter_type == 1) {
+        output_q3[i >> 1] =
+            (input[AOMMAX(0, i - 1)] + 2 * input[i] + input[i + 1]) << 1;
+      } else if (filter_type == 2) {
+        output_q3[i >> 1] = (input[i]) << 3;
+      } else {
+        output_q3[i >> 1] = (input[i] + input[i + 1]) << 2;
+      }
+    }
+    input += input_stride;
+    output_q3 += CFL_BUF_LINE;
+  }
+}
+#else
+void cfl_luma_subsampling_422_hbd_colocated(const uint16_t *input,
+                                            int input_stride,
+                                            uint16_t *output_q3, int width,
+                                            int height) {
+  assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE);
+  for (int j = 0; j < height; j++) {
+    for (int i = 0; i < width; i += 2) {
+      output_q3[i >> 1] = (input[i]) << 3;
+    }
+    input += input_stride;
+    output_q3 += CFL_BUF_LINE;
+  }
+}
+#endif  // CONFIG_ADAPTIVE_DS_FILTER
+#endif  // CONFIG_ADPTIVE_DS_422
+
 static void cfl_luma_subsampling_444_hbd_c(const uint16_t *input,
                                            int input_stride,
                                            uint16_t *output_q3, int width,
@@ -706,7 +794,14 @@
   uint16_t *recon_buf_q3 =
       cfl->recon_buf_q3 + (store_row * CFL_BUF_LINE + store_col);
 #if CONFIG_ADAPTIVE_DS_FILTER
+#if CONFIG_ADPTIVE_DS_422
+  if (sub_x == 1 && sub_y == 0) {
+    cfl_adaptive_luma_subsampling_422_hbd_c(input, input_stride, recon_buf_q3,
+                                            width, height, filter_type);
+  } else if (filter_type == 1) {
+#else
   if (filter_type == 1) {
+#endif  // CONFIG_ADPTIVE_DS_422
     if (sub_x && sub_y)
       cfl_luma_subsampling_420_hbd_121_c(input, input_stride, recon_buf_q3,
                                          width, height);
@@ -729,6 +824,11 @@
   if (sub_x && sub_y)
     cfl_luma_subsampling_420_hbd_121_c(input, input_stride, recon_buf_q3, width,
                                        height);
+#if CONFIG_ADPTIVE_DS_422
+  else if (sub_x == 1 && sub_y == 0)
+    cfl_luma_subsampling_422_hbd_colocated(input, input_stride, recon_buf_q3,
+                                           width, height);
+#endif  // CONFIG_ADPTIVE_DS_422
   else
 #endif
     cfl_subsampling_hbd(tx_size, sub_x, sub_y)(input, input_stride,
diff --git a/av1/common/cfl.h b/av1/common/cfl.h
index 53cfc7c..2115a19 100644
--- a/av1/common/cfl.h
+++ b/av1/common/cfl.h
@@ -96,13 +96,27 @@
                                             int input_stride,
                                             uint16_t *output_q3, int width,
                                             int height);
+#if CONFIG_ADPTIVE_DS_422
+void cfl_adaptive_luma_subsampling_422_hbd_c(const uint16_t *input,
+                                             int input_stride,
+                                             uint16_t *output_q3, int width,
+                                             int height, int filter_type);
+#endif  // CONFIG_ADPTIVE_DS_422
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
 
+#if CONFIG_ADPTIVE_DS_422 && !CONFIG_ADAPTIVE_DS_FILTER
+void cfl_luma_subsampling_422_hbd_colocated(const uint16_t *input,
+                                            int input_stride,
+                                            uint16_t *output_q3, int width,
+                                            int height);
+#endif  // CONFIG_ADPTIVE_DS_422 && !CONFIG_ADAPTIVE_DS_FILTER
+
 #if CONFIG_IMPROVED_CFL
 // 121 subsample filter
 void cfl_luma_subsampling_420_hbd_121_c(const uint16_t *input, int input_stride,
                                         uint16_t *output_q3, int width,
                                         int height);
+
 // Get neighbor luma reconstruction pixels
 void cfl_implicit_fetch_neighbor_luma(const AV1_COMMON *cm,
                                       MACROBLOCKD *const xd, int row, int col,
diff --git a/av1/common/common_data.h b/av1/common/common_data.h
index ada0d89..6b08776 100644
--- a/av1/common/common_data.h
+++ b/av1/common/common_data.h
@@ -68,7 +68,19 @@
 };
 
 static const uint8_t fsc_bsize_groups[BLOCK_SIZES_ALL] = {
-  0, 1, 1, 2, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 5, 5, 5, 5
+#if CONFIG_ATC_DCTX_ALIGNED
+  0, 1, 1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6,
+#if CONFIG_BLOCK_256
+  6, 6, 6,
+#endif  // CONFIG_BLOCK_256
+  3, 3, 4, 4, 6, 6
+#else
+  0, 1, 1, 2, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+#if CONFIG_BLOCK_256
+  5, 5, 5,
+#endif  // CONFIG_BLOCK_256
+  3, 3, 5, 5, 5, 5
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 };
 
 static const uint8_t num_pels_log2_lookup[BLOCK_SIZES_ALL] = {
@@ -76,11 +88,19 @@
   13, 13, 14, 15, 15, 16, 6, 6, 8, 8,  10, 10
 };
 
+#if CONFIG_CWP
+// Supported weighting factor for compound weighted prediction
+static const int8_t cwp_weighting_factor[2][MAX_CWP_NUM] = {
+  { 8, 12, 4, 10, 6 },
+  { 8, 12, 4, 20, -4 },
+};
+#endif  // CONFIG_CWP
+
 #if CONFIG_EXT_RECUR_PARTITIONS
 /* clang-format off */
 // This table covers all square blocks and 1:2/2:1 rectangular blocks
 static const BLOCK_SIZE
-    subsize_lookup[EXT_PARTITION_TYPES + 1][BLOCK_SIZES_ALL] = { {
+    subsize_lookup[ALL_PARTITION_TYPES][BLOCK_SIZES_ALL] = { {
     // PARTITION_NONE
     BLOCK_4X4,                                   // 4
     BLOCK_4X8,     BLOCK_8X4,     BLOCK_8X8,     // 8
@@ -140,6 +160,52 @@
     BLOCK_INVALID, BLOCK_4X4,                    // 4,16
     BLOCK_INVALID, BLOCK_8X8,                    // 8,32
     BLOCK_INVALID, BLOCK_16X16,                  // 32,64
+#if CONFIG_UNEVEN_4WAY
+  }, {  // PARTITION_HORZ_4A
+    BLOCK_INVALID,                               // 4
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 16
+    BLOCK_16X4,    BLOCK_INVALID, BLOCK_INVALID, // 32
+    BLOCK_32X8,    BLOCK_INVALID, BLOCK_INVALID, // 64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 256
+    BLOCK_INVALID, BLOCK_INVALID,                // 4,16
+    BLOCK_INVALID, BLOCK_INVALID,                // 8,32
+    BLOCK_INVALID, BLOCK_INVALID,                // 32,64
+  }, {  // PARTITION_HORZ_4B
+    BLOCK_INVALID,                               // 4
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 16
+    BLOCK_16X4,    BLOCK_INVALID, BLOCK_INVALID, // 32
+    BLOCK_32X8,    BLOCK_INVALID, BLOCK_INVALID, // 64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 256
+    BLOCK_INVALID, BLOCK_INVALID,                // 4,16
+    BLOCK_INVALID, BLOCK_INVALID,                // 8,32
+    BLOCK_INVALID, BLOCK_INVALID,                // 32,64
+  }, {  // PARTITION_VERT_4A
+    BLOCK_INVALID,                               // 4
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 16
+    BLOCK_INVALID, BLOCK_4X16,    BLOCK_INVALID, // 32
+    BLOCK_INVALID, BLOCK_8X32,    BLOCK_INVALID, // 64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 256
+    BLOCK_INVALID, BLOCK_INVALID,                // 4,16
+    BLOCK_INVALID, BLOCK_INVALID,                // 8,32
+    BLOCK_INVALID, BLOCK_INVALID,                // 32,64
+  }, {  // PARTITION_VERT_4B
+    BLOCK_INVALID,                               // 4
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 8
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 16
+    BLOCK_INVALID, BLOCK_4X16, BLOCK_INVALID,    // 32
+    BLOCK_INVALID, BLOCK_8X32, BLOCK_INVALID,    // 64
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 128
+    BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 256
+    BLOCK_INVALID, BLOCK_INVALID,                // 4,16
+    BLOCK_INVALID, BLOCK_INVALID,                // 8,32
+    BLOCK_INVALID, BLOCK_INVALID,                // 32,64
+#endif  // CONFIG_UNEVEN_4WAY
   }, {
     // PARTITION_SPLIT
     BLOCK_INVALID,                               // 4
@@ -160,16 +226,52 @@
     BLOCK_SIZE bsize, PARTITION_TYPE luma_part, int ssx, int ssy) {
   const int bh_chr = block_size_high[bsize] >> ssy;
   const int bw_chr = block_size_wide[bsize] >> ssx;
+  assert(bh_chr >= 16 && bw_chr >= 16 &&
+         "Current implementation cannot handle SDP for sub 16x16 blocks!");
 
   switch (luma_part) {
     case PARTITION_NONE: return PARTITION_NONE;
     case PARTITION_HORZ: return (bh_chr < 8) ? PARTITION_NONE : PARTITION_HORZ;
+    case PARTITION_VERT: return (bw_chr < 8) ? PARTITION_NONE : PARTITION_VERT;
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A:
+      if (bh_chr >= 32) {
+        return PARTITION_HORZ_4A;
+      } else if (bh_chr >= 8) {
+        return PARTITION_HORZ;
+      } else {
+        return PARTITION_NONE;
+      }
+    case PARTITION_HORZ_4B:
+      if (bh_chr >= 32) {
+        return PARTITION_HORZ_4B;
+      } else if (bh_chr >= 8) {
+        return PARTITION_HORZ;
+      } else {
+        return PARTITION_NONE;
+      }
+    case PARTITION_VERT_4A:
+      if (bw_chr >= 32) {
+        return PARTITION_VERT_4A;
+      } else if (bw_chr >= 8) {
+        return PARTITION_VERT;
+      } else {
+        return PARTITION_NONE;
+      }
+    case PARTITION_VERT_4B:
+      if (bw_chr >= 32) {
+        return PARTITION_VERT_4B;
+      } else if (bw_chr >= 8) {
+        return PARTITION_VERT;
+      } else {
+        return PARTITION_NONE;
+      }
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_HORZ_3:
       if (bh_chr >= 16)
         return PARTITION_HORZ_3;
       else
         return (bh_chr < 8) ? PARTITION_NONE : PARTITION_HORZ;
-    case PARTITION_VERT: return (bw_chr < 8) ? PARTITION_NONE : PARTITION_VERT;
     case PARTITION_VERT_3:
       if (bw_chr >= 16)
         return PARTITION_VERT_3;
@@ -632,17 +734,17 @@
   { 13, 3 },
 };
 
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
 // Mapping of mode dependent TX  based on intra modes.
 static const int av1_md_class[INTRA_MODES] = {
   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
 };
 
 // Mapping between mode dependent TX size groups based on allowed TX sizes.
-static const int av1_size_class[MODE_DEPTX_TXSIZES] = {
-  0, 1, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2,
+static const int av1_size_class[TX_SIZES_ALL] = {
+  0, 1, 2, 3, 3, 0, 0, 1, 1, 3, 3, 3, 3, 0, 0, 3, 3, 3, 3,
 };
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 
 static AOM_INLINE bool is_bsize_geq(BLOCK_SIZE bsize1, BLOCK_SIZE bsize2) {
   if (bsize1 == BLOCK_INVALID || bsize2 == BLOCK_INVALID) {
diff --git a/av1/common/entropy.c b/av1/common/entropy.c
index 2016bc1..bf459eb 100644
--- a/av1/common/entropy.c
+++ b/av1/common/entropy.c
@@ -45,13 +45,13 @@
   av1_copy(cm->fc->v_dc_sign_cdf, av1_default_v_dc_sign_cdfs[index]);
   av1_copy(cm->fc->v_ac_sign_cdf, av1_default_v_ac_sign_cdfs[index]);
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   av1_copy(cm->fc->coeff_base_lf_cdf,
            av1_default_coeff_base_lf_multi_cdfs[index]);
   av1_copy(cm->fc->coeff_base_lf_eob_cdf,
            av1_default_coeff_base_lf_eob_multi_cdfs[index]);
   av1_copy(cm->fc->coeff_br_lf_cdf, av1_default_coeff_lps_lf_multi_cdfs[index]);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   av1_copy(cm->fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index]);
   av1_copy(cm->fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index]);
   av1_copy(cm->fc->idtx_sign_cdf, av1_default_idtx_sign_cdfs[index]);
@@ -72,6 +72,10 @@
   av1_copy(cm->fc->coeff_base_ph_cdf, av1_default_coeff_base_ph_cdfs[index]);
   av1_copy(cm->fc->coeff_br_ph_cdf, av1_default_coeff_br_ph_cdfs[index]);
 #endif  // CONFIG_PAR_HIDING
+#if CONFIG_ATC_DCTX_ALIGNED
+  av1_copy(cm->fc->coeff_base_bob_cdf,
+           av1_default_coeff_base_bob_multi_cdfs[index]);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 }
 
 static AOM_INLINE void reset_cdf_symbol_counter(aom_cdf_prob *cdf_ptr,
@@ -129,19 +133,22 @@
   RESET_CDF_COUNTER(fc->v_dc_sign_cdf, 2);
   RESET_CDF_COUNTER(fc->v_ac_sign_cdf, 2);
 #endif  // CONFIG_CONTEXT_DERIVATION
-  RESET_CDF_COUNTER(fc->eob_flag_cdf16, 5);
-  RESET_CDF_COUNTER(fc->eob_flag_cdf32, 6);
-  RESET_CDF_COUNTER(fc->eob_flag_cdf64, 7);
-  RESET_CDF_COUNTER(fc->eob_flag_cdf128, 8);
-  RESET_CDF_COUNTER(fc->eob_flag_cdf256, 9);
-  RESET_CDF_COUNTER(fc->eob_flag_cdf512, 10);
-  RESET_CDF_COUNTER(fc->eob_flag_cdf1024, 11);
+  RESET_CDF_COUNTER(fc->eob_flag_cdf16, EOB_MAX_SYMS - 6);
+  RESET_CDF_COUNTER(fc->eob_flag_cdf32, EOB_MAX_SYMS - 5);
+  RESET_CDF_COUNTER(fc->eob_flag_cdf64, EOB_MAX_SYMS - 4);
+  RESET_CDF_COUNTER(fc->eob_flag_cdf128, EOB_MAX_SYMS - 3);
+  RESET_CDF_COUNTER(fc->eob_flag_cdf256, EOB_MAX_SYMS - 2);
+  RESET_CDF_COUNTER(fc->eob_flag_cdf512, EOB_MAX_SYMS - 1);
+  RESET_CDF_COUNTER(fc->eob_flag_cdf1024, EOB_MAX_SYMS);
   RESET_CDF_COUNTER(fc->coeff_base_eob_cdf, 3);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC_DCTX_ALIGNED
+  RESET_CDF_COUNTER(fc->coeff_base_bob_cdf, 3);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+#if CONFIG_ATC
   RESET_CDF_COUNTER(fc->coeff_base_lf_cdf, LF_BASE_SYMBOLS);
   RESET_CDF_COUNTER(fc->coeff_base_lf_eob_cdf, LF_BASE_SYMBOLS - 1);
   RESET_CDF_COUNTER(fc->coeff_br_lf_cdf, BR_CDF_SIZE);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   RESET_CDF_COUNTER(fc->coeff_base_cdf, 4);
   RESET_CDF_COUNTER(fc->idtx_sign_cdf, 2);
   RESET_CDF_COUNTER(fc->coeff_base_cdf_idtx, 4);
@@ -154,15 +161,19 @@
   RESET_CDF_COUNTER(fc->drl_cdf[0], 2);
   RESET_CDF_COUNTER(fc->drl_cdf[1], 2);
   RESET_CDF_COUNTER(fc->drl_cdf[2], 2);
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   RESET_CDF_COUNTER(fc->skip_drl_cdf, 2);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if CONFIG_OPTFLOW_REFINEMENT
   RESET_CDF_COUNTER(fc->use_optflow_cdf, 2);
   RESET_CDF_COUNTER(fc->inter_compound_mode_cdf, INTER_COMPOUND_REF_TYPES);
 #else
   RESET_CDF_COUNTER(fc->inter_compound_mode_cdf, INTER_COMPOUND_MODES);
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#if CONFIG_CWP
+  RESET_CDF_COUNTER(fc->cwp_idx_cdf, 2);
+#endif
 #if CONFIG_IMPROVED_JMVD
   RESET_CDF_COUNTER(fc->jmvd_scale_mode_cdf, JOINT_NEWMV_SCALE_FACTOR_CNT);
   RESET_CDF_COUNTER(fc->jmvd_amvd_scale_mode_cdf, JOINT_AMVD_SCALE_FACTOR_CNT);
@@ -180,6 +191,11 @@
   RESET_CDF_COUNTER(fc->interintra_cdf, 2);
   RESET_CDF_COUNTER(fc->wedge_interintra_cdf, 2);
   RESET_CDF_COUNTER(fc->interintra_mode_cdf, INTERINTRA_MODES);
+
+#if CONFIG_REFINEMV
+  RESET_CDF_COUNTER(fc->refinemv_flag_cdf, REFINEMV_NUM_MODES);
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_EXTENDED_WARP_PREDICTION
   RESET_CDF_COUNTER(fc->obmc_cdf, 2);
   RESET_CDF_COUNTER(fc->warped_causal_cdf, 2);
@@ -191,6 +207,9 @@
   RESET_CDF_COUNTER(fc->warp_ref_idx_cdf[0], 2);
   RESET_CDF_COUNTER(fc->warp_ref_idx_cdf[1], 2);
   RESET_CDF_COUNTER(fc->warp_ref_idx_cdf[2], 2);
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  RESET_CDF_COUNTER(fc->warpmv_with_mvd_flag_cdf, 2);
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
   RESET_CDF_COUNTER(fc->warp_delta_param_cdf, WARP_DELTA_NUM_SYMBOLS);
   RESET_CDF_COUNTER(fc->warp_extend_cdf, 2);
@@ -206,10 +225,10 @@
 #endif  // CONFIG_TIP
   RESET_CDF_COUNTER(fc->palette_y_size_cdf, PALETTE_SIZES);
   RESET_CDF_COUNTER(fc->palette_uv_size_cdf, PALETTE_SIZES);
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   RESET_CDF_COUNTER(fc->identity_row_cdf_y, 2);
   RESET_CDF_COUNTER(fc->identity_row_cdf_uv, 2);
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
   for (int j = 0; j < PALETTE_SIZES; j++) {
     int nsymbs = j + PALETTE_MIN_SIZE;
     RESET_CDF_COUNTER_STRIDE(fc->palette_y_color_index_cdf[j], nsymbs,
@@ -234,20 +253,20 @@
 #endif  // CONFIG_NEW_TX_PARTITION
   RESET_CDF_COUNTER(fc->comp_group_idx_cdf, 2);
   RESET_CDF_COUNTER(fc->skip_mode_cdfs, 2);
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   RESET_CDF_COUNTER(fc->intra_inter_cdf[0], 2);
   RESET_CDF_COUNTER(fc->intra_inter_cdf[1], 2);
 #else
   RESET_CDF_COUNTER(fc->intra_inter_cdf, 2);
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   RESET_CDF_COUNTER(fc->skip_txfm_cdfs, 2);
   reset_nmv_counter(&fc->nmvc);
   reset_nmv_counter(&fc->ndvc);
   RESET_CDF_COUNTER(fc->intrabc_cdf, 2);
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   RESET_CDF_COUNTER(fc->intrabc_mode_cdf, 2);
   RESET_CDF_COUNTER(fc->intrabc_drl_idx_cdf, 2);
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
   RESET_CDF_COUNTER(fc->seg.tree_cdf, MAX_SEGMENTS);
   RESET_CDF_COUNTER(fc->seg.pred_cdf, 2);
   RESET_CDF_COUNTER(fc->seg.spatial_pred_seg_cdf, MAX_SEGMENTS);
@@ -294,6 +313,7 @@
   RESET_CDF_COUNTER_STRIDE(fc->uv_mode_cdf[0], UV_INTRA_MODES - 1,
                            CDF_SIZE(UV_INTRA_MODES));
   RESET_CDF_COUNTER(fc->uv_mode_cdf[1], UV_INTRA_MODES);
+
 #if CONFIG_EXT_RECUR_PARTITIONS
   for (int plane_index = 0; plane_index < PARTITION_STRUCTURE_NUM;
        plane_index++) {
@@ -318,6 +338,13 @@
     for (RECT_PART_TYPE rect = 0; rect < NUM_RECT_PARTS; rect++) {
       for (int i = 0; i < PARTITION_CONTEXTS; i++) {
         RESET_CDF_COUNTER(fc->do_ext_partition_cdf[plane_index][rect][i], 2);
+#if CONFIG_UNEVEN_4WAY
+        RESET_CDF_COUNTER(
+            fc->do_uneven_4way_partition_cdf[plane_index][rect][i], 2);
+        RESET_CDF_COUNTER(
+            fc->uneven_4way_partition_type_cdf[plane_index][rect][i],
+            NUM_UNEVEN_4WAY_PARTS);
+#endif  // CONFIG_UNEVEN_4WAY
       }
     }
   }
@@ -361,10 +388,10 @@
   }
   RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[1], INTRA_TX_SET1,
                            CDF_SIZE(TX_TYPES));
-#if !(CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET)
+#if !(CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET)
   RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[2], INTRA_TX_SET2,
                            CDF_SIZE(TX_TYPES));
-#endif  // !(CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET)
+#endif  // !(CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET)
   RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[1], 16, CDF_SIZE(TX_TYPES));
   RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[2], 12, CDF_SIZE(TX_TYPES));
   RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[3], 2, CDF_SIZE(TX_TYPES));
diff --git a/av1/common/entropy.h b/av1/common/entropy.h
index e6319aa..8efd218 100644
--- a/av1/common/entropy.h
+++ b/av1/common/entropy.h
@@ -41,13 +41,18 @@
 #define IDTX_LEVEL_CONTEXTS 14
 
 #define EOB_COEF_CONTEXTS 9
+#if CONFIG_ATC_DCTX_ALIGNED
+#define SIG_COEF_CONTEXTS_BOB 3
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
+#define EOB_MAX_SYMS 11
 
 #if CONFIG_PAR_HIDING
 #define COEFF_BASE_PH_CONTEXTS 5
 #define COEFF_BR_PH_CONTEXTS 7
 #endif  // CONFIG_PAR_HIDING
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 // Number of coefficient coding contexts for the low-frequency region
 // for 2D and 1D transforms
 #define LF_SIG_COEF_CONTEXTS_2D 21
@@ -79,7 +84,7 @@
 #define SIG_COEF_CONTEXTS_1D 16
 #define SIG_COEF_CONTEXTS_EOB 4
 #define SIG_COEF_CONTEXTS (SIG_COEF_CONTEXTS_2D + SIG_COEF_CONTEXTS_1D)
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 #define COEFF_BASE_CONTEXTS (SIG_COEF_CONTEXTS)
 #define DC_SIGN_CONTEXTS 3
@@ -132,23 +137,28 @@
   switch (size) {
     case 4: return ctx[0] != 0;
     case 8:
-#if CONFIG_H_PARTITION
+#if CONFIG_EXT_RECUR_PARTITIONS
       return ctx[0] != 0 || ctx[1] != 0;
 #else
       return !!*(const uint16_t *)ctx;
-#endif  // CONFIG_H_PARTITION
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
     case 16:
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+      return ctx[0] != 0 || ctx[1] != 0 || ctx[2] != 0 || ctx[3] != 0;
+#elif CONFIG_EXT_RECUR_PARTITIONS
       return !!(*(const uint16_t *)ctx | *(const uint16_t *)(ctx + 2));
 #else
       return !!*(const uint32_t *)ctx;
-#endif  // CONFIG_H_PARTITION
+#endif  // CONFIG_UNEVEN_4WAY
     case 32:
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+      return !!(*(const uint16_t *)ctx | *(const uint16_t *)(ctx + 2) |
+                *(const uint16_t *)(ctx + 4) | *(const uint16_t *)(ctx + 6));
+#elif CONFIG_EXT_RECUR_PARTITIONS
       return !!(*(const uint32_t *)ctx | *(const uint32_t *)(ctx + 4));
 #else
-      return !*(const uint64_t *)ctx;
-#endif  // CONFIG_H_PARTITION
+      return !!*(const uint64_t *)ctx;
+#endif  // CONFIG_UNEVEN_4WAY
     case 64: return !!(*(const uint64_t *)ctx | *(const uint64_t *)(ctx + 8));
     default: assert(0 && "Invalid transform 1d size."); break;
   }
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c
index abfc9de..693ed11 100644
--- a/av1/common/entropymode.c
+++ b/av1/common/entropymode.c
@@ -77,11 +77,48 @@
     };
 #endif
 
+#if CONFIG_EXT_DIR
+static const aom_cdf_prob
+    default_mrl_index_cdf[MRL_INDEX_CONTEXTS][CDF_SIZE(MRL_LINE_NUMBER)] = {
+      { AOM_CDF4(27852, 29491, 31129) },
+      { AOM_CDF4(23920, 27852, 30474) },
+      { AOM_CDF4(20316, 26542, 29818) },
+    };
+#else
 static const aom_cdf_prob default_mrl_index_cdf[CDF_SIZE(MRL_LINE_NUMBER)] = {
   AOM_CDF4(24756, 29049, 31092)
 };
+#endif  // CONFIG_EXT_DIR
 
 #if CONFIG_NEW_CONTEXT_MODELING
+#if CONFIG_ATC_DCTX_ALIGNED
+static const aom_cdf_prob
+    default_fsc_mode_cdf[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS]
+                        [CDF_SIZE(FSC_MODES)] = { { { AOM_CDF2(29360) },
+                                                    { AOM_CDF2(31501) },
+                                                    { AOM_CDF2(32278) },
+                                                    { AOM_CDF2(32371) },
+                                                    { AOM_CDF2(32560) },
+                                                    { AOM_CDF2(32531) } },
+                                                  { { AOM_CDF2(24973) },
+                                                    { AOM_CDF2(24385) },
+                                                    { AOM_CDF2(24145) },
+                                                    { AOM_CDF2(26258) },
+                                                    { AOM_CDF2(21038) },
+                                                    { AOM_CDF2(15313) } },
+                                                  { { AOM_CDF2(20868) },
+                                                    { AOM_CDF2(16117) },
+                                                    { AOM_CDF2(12254) },
+                                                    { AOM_CDF2(14424) },
+                                                    { AOM_CDF2(5350) },
+                                                    { AOM_CDF2(2348) } },
+                                                  { { AOM_CDF2(31265) },
+                                                    { AOM_CDF2(31284) },
+                                                    { AOM_CDF2(32247) },
+                                                    { AOM_CDF2(32253) },
+                                                    { AOM_CDF2(32560) },
+                                                    { AOM_CDF2(32533) } } };
+#else
 static const aom_cdf_prob
     default_fsc_mode_cdf[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS]
                         [CDF_SIZE(FSC_MODES)] = { { { AOM_CDF2(29802) },
@@ -104,6 +141,7 @@
                                                     { AOM_CDF2(32027) },
                                                     { AOM_CDF2(32272) },
                                                     { AOM_CDF2(32317) } } };
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 #else
 static const aom_cdf_prob
     default_fsc_mode_cdf[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS]
@@ -349,7 +387,6 @@
       }
     };
 
-
 static aom_cdf_prob
     default_rect_type_cdf[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS][CDF_SIZE(2)] = {
       // Luma
@@ -440,6 +477,9 @@
       }
     };
 
+// Note: For the partition CDFs below, most entries are unused. An optimized
+// implementation could create smaller arrays with only used values + some
+// mapping tables.
 static aom_cdf_prob default_do_ext_partition_cdf
     [PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_CONTEXTS]
     [CDF_SIZE(2)] = {
@@ -623,7 +663,6 @@
       }
     };
 
-
 static aom_cdf_prob
     default_do_square_split_cdf[PARTITION_STRUCTURE_NUM][SQUARE_SPLIT_CONTEXTS][CDF_SIZE(2)] = {
       // Luma
@@ -642,6 +681,373 @@
       },
     };
 
+#if CONFIG_UNEVEN_4WAY
+static aom_cdf_prob default_do_uneven_4way_partition_cdf
+    [PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_CONTEXTS]
+    [CDF_SIZE(2)] = {
+      // Luma
+      {
+        // HORZ
+        {
+          // BLOCK_4X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_4X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X32,
+          { AOM_CDF2(23888) }, { AOM_CDF2(26675) }, { AOM_CDF2(18213) }, { AOM_CDF2(21839) },
+          // BLOCK_32X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X64,
+          { AOM_CDF2(18156) }, { AOM_CDF2(22434) }, { AOM_CDF2(17065) }, { AOM_CDF2(23048) },
+          // BLOCK_64X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#if CONFIG_BLOCK_256
+          // BLOCK_128X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#endif  // CONFIG_BLOCK_256
+        },
+        // VERT
+        {
+          // BLOCK_4X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_4X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X16,
+          { AOM_CDF2(18858) }, { AOM_CDF2(14975) }, { AOM_CDF2(21057) }, { AOM_CDF2(19369) },
+          // BLOCK_32X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X32,
+          { AOM_CDF2(12384) }, { AOM_CDF2(11622) }, { AOM_CDF2(17504) }, { AOM_CDF2(17608) },
+          // BLOCK_64X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#if CONFIG_BLOCK_256
+          // BLOCK_128X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#endif  // CONFIG_BLOCK_256
+        }
+      },
+      // Chroma
+      {
+        // HORZ
+        {
+          // BLOCK_4X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_4X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X64,
+          { AOM_CDF2(16705) }, { AOM_CDF2(20904) }, { AOM_CDF2(18601) }, { AOM_CDF2(22088) },
+          // BLOCK_64X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#if CONFIG_BLOCK_256
+          // BLOCK_128X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#endif  // CONFIG_BLOCK_256
+        },
+        // VERT
+        {
+          // BLOCK_4X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_4X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X32,
+          { AOM_CDF2(15452) }, { AOM_CDF2(15654) }, { AOM_CDF2(20986) }, { AOM_CDF2(20924) },
+          // BLOCK_64X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#if CONFIG_BLOCK_256
+          // BLOCK_128X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#endif  // CONFIG_BLOCK_256
+        }
+      },
+    };
+
+static aom_cdf_prob default_uneven_4way_partition_type_cdf
+    [PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_CONTEXTS]
+    [CDF_SIZE(NUM_UNEVEN_4WAY_PARTS)] = {
+      // Luma
+      {
+        // HORZ
+        {
+          // BLOCK_4X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_4X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X32,
+          { AOM_CDF2(20372) }, { AOM_CDF2(19885) }, { AOM_CDF2(20532) }, { AOM_CDF2(18382) },
+          // BLOCK_32X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X64,
+          { AOM_CDF2(20219) }, { AOM_CDF2(19289) }, { AOM_CDF2(18815) }, { AOM_CDF2(21548) },
+          // BLOCK_64X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#if CONFIG_BLOCK_256
+          // BLOCK_128X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#endif  // CONFIG_BLOCK_256
+        },
+        // VERT
+        {
+          // BLOCK_4X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_4X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X16,
+          { AOM_CDF2(18025) }, { AOM_CDF2(18978) }, { AOM_CDF2(18146) }, { AOM_CDF2(20127) },
+          // BLOCK_32X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X32,
+          { AOM_CDF2(17700) }, { AOM_CDF2(17721) }, { AOM_CDF2(18585) }, { AOM_CDF2(17912) },
+          // BLOCK_64X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#if CONFIG_BLOCK_256
+          // BLOCK_128X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#endif  // CONFIG_BLOCK_256
+        }
+      },
+      // Chroma
+      {
+        // HORZ
+        {
+          // BLOCK_4X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_4X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X64,
+          { AOM_CDF2(17990) }, { AOM_CDF2(23831) }, { AOM_CDF2(17318) }, { AOM_CDF2(18155) },
+          // BLOCK_64X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#if CONFIG_BLOCK_256
+          // BLOCK_128X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#endif  // CONFIG_BLOCK_256
+        },
+        // VERT
+        {
+          // BLOCK_4X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_4X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X4, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_8X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X8, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_16X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X16, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X32, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_32X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X32,
+          { AOM_CDF2(15888) }, { AOM_CDF2(18079) }, { AOM_CDF2(21845) }, { AOM_CDF2(18507) },
+          // BLOCK_64X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_64X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X64, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_128X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#if CONFIG_BLOCK_256
+          // BLOCK_128X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X128, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+          // BLOCK_256X256, unused
+          { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+#endif  // CONFIG_BLOCK_256
+        }
+      },
+    };
+#endif  // CONFIG_UNEVEN_4WAY
 // clang-format on
 #else
 static const aom_cdf_prob
@@ -782,7 +1188,7 @@
               { 0 },
           },
       },
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
       {
           {
               { AOM_CDF7(3368, 14670, 18533, 22660, 26441, 30407) },
@@ -1034,9 +1440,121 @@
               { AOM_CDF4(8192, 16384, 24576) },
           },
       }
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
     };
 
+#if CONFIG_ATC_DCTX_ALIGNED
+static const aom_cdf_prob default_inter_ext_tx_cdf
+    [EXT_TX_SETS_INTER][EOB_TX_CTXS][EXT_TX_SIZES][CDF_SIZE(TX_TYPES)] = {
+      {
+          {
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+          },
+          {
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+          },
+          {
+              { 0 },
+              { 0 },
+              { 0 },
+              { 0 },
+          },
+      },
+      {
+          {
+              { AOM_CDF16(10569, 11484, 12610, 14058, 15880, 17184, 18929,
+                          19803, 20702, 21995, 22642, 23795, 26269, 28128,
+                          30321) },
+              { AOM_CDF16(2184, 3028, 4033, 5127, 6410, 7400, 8605, 13222,
+                          15760, 18377, 20510, 22737, 25720, 27841, 30221) },
+              { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+                          18432, 20480, 22528, 24576, 26624, 28672, 30720) },
+              { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+                          18432, 20480, 22528, 24576, 26624, 28672, 30720) },
+          },
+          {
+              { AOM_CDF16(3919, 4527, 5261, 6289, 7251, 8118, 9179, 12234,
+                          12471, 12730, 12785, 13079, 18477, 21441, 26844) },
+              { AOM_CDF16(307, 498, 725, 1194, 1577, 1962, 2378, 26001, 26439,
+                          26880, 27109, 27393, 29418, 30271, 31374) },
+              { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+                          18432, 20480, 22528, 24576, 26624, 28672, 30720) },
+              { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+                          18432, 20480, 22528, 24576, 26624, 28672, 30720) },
+          },
+          {
+              { AOM_CDF16(18553, 19114, 19866, 21300, 23396, 24613, 26561,
+                          26686, 26933, 27441, 27579, 27906, 29437, 30176,
+                          31237) },
+              { AOM_CDF16(14114, 15409, 17116, 18125, 19579, 20544, 21927,
+                          24115, 25337, 26585, 27781, 28994, 29938, 30846,
+                          31760) },
+              { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+                          18432, 20480, 22528, 24576, 26624, 28672, 30720) },
+              { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+                          18432, 20480, 22528, 24576, 26624, 28672, 30720) },
+          },
+      },
+      {
+          {
+              { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                          24576, 27307, 30037) },
+              { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                          24576, 27307, 30037) },
+              { AOM_CDF12(847, 1837, 2897, 8379, 12029, 15839, 18755, 21734,
+                          25244, 27430, 30001) },
+              { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                          24576, 27307, 30037) },
+          },
+          {
+              { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                          24576, 27307, 30037) },
+              { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                          24576, 27307, 30037) },
+              { AOM_CDF12(56, 370, 765, 27899, 28744, 29465, 30060, 30562,
+                          31471, 31806, 32229) },
+              { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                          24576, 27307, 30037) },
+          },
+          {
+              { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                          24576, 27307, 30037) },
+              { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                          24576, 27307, 30037) },
+              { AOM_CDF12(25781, 26621, 27994, 28993, 29530, 30097, 30597,
+                          31182, 31622, 32019, 32396) },
+              { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                          24576, 27307, 30037) },
+          },
+      },
+      {
+          {
+              { AOM_CDF2(16384) },
+              { AOM_CDF2(2100) },
+              { AOM_CDF2(1066) },
+              { AOM_CDF2(938) },
+          },
+          {
+              { AOM_CDF2(16384) },
+              { AOM_CDF2(37) },
+              { AOM_CDF2(15) },
+              { AOM_CDF2(12) },
+          },
+          {
+              { AOM_CDF2(16384) },
+              { AOM_CDF2(29478) },
+              { AOM_CDF2(29184) },
+              { AOM_CDF2(27781) },
+          },
+      },
+    };
+#else
 static const aom_cdf_prob
     default_inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES][CDF_SIZE(
         TX_TYPES)] = {
@@ -1073,6 +1591,7 @@
           { AOM_CDF2(748) },
       },
     };
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
 #if CONFIG_CROSS_CHROMA_TX
 static const aom_cdf_prob
@@ -1095,7 +1614,17 @@
 static const aom_cdf_prob default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)] = {
   AOM_CDF8(1418, 2123, 13340, 18405, 26972, 28343, 32294)
 };
-
+#if CONFIG_CFL_IMPROVEMENTS
+static const aom_cdf_prob
+    default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = {
+      { AOM_CDF8(7650, 20740, 31430, 32520, 32700, 32730, 32740) },
+      { AOM_CDF8(14400, 23680, 28230, 31270, 32290, 32530, 32640) },
+      { AOM_CDF8(11560, 22430, 28510, 31430, 32430, 32610, 32680) },
+      { AOM_CDF8(27000, 31430, 32310, 32610, 32730, 32740, 32750) },
+      { AOM_CDF8(17320, 26210, 29100, 30820, 31550, 32150, 32430) },
+      { AOM_CDF8(14990, 22180, 26430, 28600, 29820, 31200, 31980) }
+    };
+#else
 static const aom_cdf_prob
     default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = {
       { AOM_CDF16(7637, 20719, 31401, 32481, 32657, 32688, 32692, 32696, 32700,
@@ -1111,7 +1640,7 @@
       { AOM_CDF16(14738, 21678, 25779, 27901, 29024, 30302, 30980, 31843, 32144,
                   32413, 32520, 32594, 32622, 32656, 32660) }
     };
-
+#endif  // CONFIG_CFL_IMPROVEMENTS
 static const aom_cdf_prob
     default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE(
         SWITCHABLE_FILTERS)] = {
@@ -1281,6 +1810,21 @@
   { AOM_CDF2(16618) }, { AOM_CDF2(14980) }, { AOM_CDF2(15963) }
 };
 #endif  // CONFIG_C076_INTER_MOD_CTX
+
+#if CONFIG_CWP
+static const aom_cdf_prob default_cwp_idx_cdf[MAX_CWP_CONTEXTS][MAX_CWP_NUM - 1]
+                                             [CDF_SIZE(2)] = {
+                                               { { AOM_CDF2(16384) },
+                                                 { AOM_CDF2(16384) },
+                                                 { AOM_CDF2(16384) },
+                                                 { AOM_CDF2(16384) } },
+                                               { { AOM_CDF2(16384) },
+                                                 { AOM_CDF2(16384) },
+                                                 { AOM_CDF2(16384) },
+                                                 { AOM_CDF2(16384) } },
+                                             };
+#endif  // CONFIG_CWP
+
 #if CONFIG_IMPROVED_JMVD
 static const aom_cdf_prob
     default_jmvd_scale_mode_cdf[CDF_SIZE(JOINT_NEWMV_SCALE_FACTOR_CNT)] = {
@@ -1292,13 +1836,13 @@
     };
 #endif  // CONFIG_IMPROVED_JMVD
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
 static const aom_cdf_prob default_skip_drl_cdf[3][CDF_SIZE(2)] = {
   { AOM_CDF2(24394) },
   { AOM_CDF2(22637) },
   { AOM_CDF2(21474) },
 };
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
 #if CONFIG_C076_INTER_MOD_CTX
 #if CONFIG_OPTFLOW_REFINEMENT
@@ -1634,6 +2178,20 @@
     };
 #endif  // CONFIG_WARPMV
 
+#if CONFIG_REFINEMV
+static const aom_cdf_prob default_refinemv_flag_cdf[NUM_REFINEMV_CTX][CDF_SIZE(
+    REFINEMV_NUM_MODES)] = {
+  { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+  { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+  { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+  { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+  { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+  { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+  { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+  { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }
+};
+#endif  // CONFIG_REFINEMV
+
 static const aom_cdf_prob default_warp_delta_cdf[BLOCK_SIZES_ALL][CDF_SIZE(
     2)] = { { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
             { AOM_CDF2(4015) },  { AOM_CDF2(5407) },  { AOM_CDF2(4988) },
@@ -1652,6 +2210,19 @@
     2)] = { { AOM_CDF2(15903) } };
 static const aom_cdf_prob default_warp_ref_idx2_cdf[WARP_REF_CONTEXTS][CDF_SIZE(
     2)] = { { AOM_CDF2(18242) } };
+#if CONFIG_CWG_D067_IMPROVED_WARP
+static const aom_cdf_prob
+    default_warpmv_with_mvd_flag_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = {
+      { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+      { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+      { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+      { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+      { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+      { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+      { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+      { AOM_CDF2(16384) }
+    };
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
 static const aom_cdf_prob
     default_warp_delta_param_cdf[2][CDF_SIZE(WARP_DELTA_NUM_SYMBOLS)] = {
@@ -1721,7 +2292,7 @@
 static const aom_cdf_prob default_bawp_cdf[CDF_SIZE(2)] = { AOM_CDF2(23664) };
 #endif  // CONFIG_BAWP
 
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
 #if CONFIG_NEW_CONTEXT_MODELING
 static const aom_cdf_prob default_intra_inter_cdf
     [INTRA_INTER_SKIP_TXFM_CONTEXTS][INTRA_INTER_CONTEXTS][CDF_SIZE(2)] = {
@@ -1753,7 +2324,7 @@
                                                    { AOM_CDF2(20186) },
                                                    { AOM_CDF2(26538) }
                                                  };
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
 
 #if CONFIG_TIP
 static const aom_cdf_prob default_tip_cdf[TIP_CONTEXTS][CDF_SIZE(2)] = {
@@ -2047,7 +2618,7 @@
       { AOM_CDF2(32461) }, { AOM_CDF2(21488) }
     };
 
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
 static const aom_cdf_prob
     default_identity_row_cdf_y[PALETTE_ROW_FLAG_CONTEXTS][CDF_SIZE(2)] = {
       { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }
@@ -2280,7 +2851,7 @@
           { AOM_CDF8(31190, 31329, 31516, 31679, 31825, 32026, 32322) },
       },
     };
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 
 #if CONFIG_NEW_TX_PARTITION
 static const aom_cdf_prob default_inter_4way_txfm_partition_cdf
@@ -2392,14 +2963,14 @@
     30531) };
 #endif  // CONFIG_NEW_CONTEXT_MODELING
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
 static const aom_cdf_prob default_intrabc_mode_cdf[CDF_SIZE(2)] = { AOM_CDF2(
     16384) };
 static const aom_cdf_prob
     default_intrabc_drl_idx_cdf[MAX_REF_BV_STACK_SIZE - 1][CDF_SIZE(2)] = {
       { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }
     };
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 static const aom_cdf_prob default_filter_intra_mode_cdf[CDF_SIZE(
     FILTER_INTRA_MODES)] = { AOM_CDF5(8949, 12776, 17211, 29558) };
@@ -2625,10 +3196,10 @@
 int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
                                         int r, int c, int palette_size,
                                         uint8_t *color_order, int *color_idx
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
                                         ,
                                         int row_flag, int prev_row_flag
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 ) {
   assert(palette_size <= PALETTE_MAX_SIZE);
   assert(r > 0 || c > 0);
@@ -2687,12 +3258,12 @@
   if (color_idx != NULL)
     *color_idx = inverse_color_order[color_map[r * stride + c]];
 
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   // Special context value for the first (and only) index of an identity row and
   // when the previous row is also an identity row.
   if (c == 0 && row_flag && prev_row_flag)
     return PALETTE_COLOR_INDEX_CONTEXTS - 1;
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 
   // Get hash value of context.
   int color_index_ctx_hash = 0;
@@ -2713,10 +3284,10 @@
 
 int av1_fast_palette_color_index_context(const uint8_t *color_map, int stride,
                                          int r, int c, int *color_idx
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
                                          ,
                                          int row_flag, int prev_row_flag
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 ) {
   assert(r > 0 || c > 0);
 
@@ -2806,12 +3377,12 @@
     }
   }
 
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   // Special context value for the first (and only) index of an identity row and
   // when the previous row is also an identity row.
   if (c == 0 && row_flag && prev_row_flag)
     return PALETTE_COLOR_INDEX_CONTEXTS - 1;
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 
   // Get hash value of context.
   int color_index_ctx_hash = 0;
@@ -2837,10 +3408,10 @@
   (void)seq_params;
   av1_copy(fc->palette_y_size_cdf, default_palette_y_size_cdf);
   av1_copy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf);
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   av1_copy(fc->identity_row_cdf_y, default_identity_row_cdf_y);
   av1_copy(fc->identity_row_cdf_uv, default_identity_row_cdf_uv);
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
   av1_copy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf);
   av1_copy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf);
 #if !CONFIG_AIMC
@@ -2886,6 +3457,10 @@
   av1_copy(fc->drl_cdf[1], default_drl1_cdf);
   av1_copy(fc->drl_cdf[2], default_drl2_cdf);
 #endif  // CONFIG_REF_MV_BANK
+#if CONFIG_REFINEMV
+  av1_copy(fc->refinemv_flag_cdf, default_refinemv_flag_cdf);
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_EXTENDED_WARP_PREDICTION
   av1_copy(fc->obmc_cdf, default_obmc_cdf);
   av1_copy(fc->warped_causal_cdf, default_warped_causal_cdf);
@@ -2897,6 +3472,9 @@
   av1_copy(fc->warp_ref_idx_cdf[0], default_warp_ref_idx0_cdf);
   av1_copy(fc->warp_ref_idx_cdf[1], default_warp_ref_idx1_cdf);
   av1_copy(fc->warp_ref_idx_cdf[2], default_warp_ref_idx2_cdf);
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  av1_copy(fc->warpmv_with_mvd_flag_cdf, default_warpmv_with_mvd_flag_cdf);
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
   av1_copy(fc->warp_delta_param_cdf, default_warp_delta_param_cdf);
   av1_copy(fc->warp_extend_cdf, default_warp_extend_cdf);
@@ -2904,15 +3482,19 @@
   av1_copy(fc->motion_mode_cdf, default_motion_mode_cdf);
   av1_copy(fc->obmc_cdf, default_obmc_cdf);
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   av1_copy(fc->skip_drl_cdf, default_skip_drl_cdf);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if CONFIG_BAWP
   av1_copy(fc->bawp_cdf, default_bawp_cdf);
 #endif  // CONFIG_BAWP
 #if CONFIG_OPTFLOW_REFINEMENT
   av1_copy(fc->use_optflow_cdf, default_use_optflow_cdf);
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#if CONFIG_CWP
+  av1_copy(fc->cwp_idx_cdf, default_cwp_idx_cdf);
+#endif  // CONFIG_CWP
 #if CONFIG_IMPROVED_JMVD
   av1_copy(fc->jmvd_scale_mode_cdf, default_jmvd_scale_mode_cdf);
   av1_copy(fc->jmvd_amvd_scale_mode_cdf, default_jmvd_amvd_scale_mode_cdf);
@@ -2980,6 +3562,12 @@
   av1_copy(fc->do_square_split_cdf, default_do_square_split_cdf);
   av1_copy(fc->rect_type_cdf, default_rect_type_cdf);
   av1_copy(fc->do_ext_partition_cdf, default_do_ext_partition_cdf);
+#if CONFIG_UNEVEN_4WAY
+  av1_copy(fc->do_uneven_4way_partition_cdf,
+           default_do_uneven_4way_partition_cdf);
+  av1_copy(fc->uneven_4way_partition_type_cdf,
+           default_uneven_4way_partition_type_cdf);
+#endif  // CONFIG_UNEVEN_4WAY
 #else
   av1_copy(fc->partition_cdf, default_partition_cdf);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
@@ -2987,12 +3575,12 @@
   av1_copy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf);
   av1_copy(fc->skip_mode_cdfs, default_skip_mode_cdfs);
   av1_copy(fc->skip_txfm_cdfs, default_skip_txfm_cdfs);
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   av1_copy(fc->intra_inter_cdf[0], default_intra_inter_cdf[0]);
   av1_copy(fc->intra_inter_cdf[1], default_intra_inter_cdf[1]);
 #else
   av1_copy(fc->intra_inter_cdf, default_intra_inter_cdf);
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   for (int i = 0; i < SPATIAL_PREDICTION_PROBS; i++)
     av1_copy(fc->seg.spatial_pred_seg_cdf[i],
              default_spatial_pred_seg_tree_cdf[i]);
@@ -3010,10 +3598,10 @@
   av1_copy(fc->cfl_sign_cdf, default_cfl_sign_cdf);
   av1_copy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf);
   av1_copy(fc->intrabc_cdf, default_intrabc_cdf);
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   av1_copy(fc->intrabc_mode_cdf, default_intrabc_mode_cdf);
   av1_copy(fc->intrabc_drl_idx_cdf, default_intrabc_drl_idx_cdf);
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
   av1_copy(fc->stx_cdf, default_stx_cdf);
 #if CONFIG_FLEX_MVRES
   av1_copy(fc->pb_mv_precision_cdf, default_pb_mv_precision_cdf);
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h
index 7875226..15fb5d9 100644
--- a/av1/common/entropymode.h
+++ b/av1/common/entropymode.h
@@ -31,7 +31,7 @@
 #define INTER_OFFSET(mode) ((mode)-NEARMV)
 #define INTER_COMPOUND_OFFSET(mode) (uint8_t)((mode)-NEAR_NEARMV)
 // Number of possible contexts for a color index.
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
 // As can be seen from av1_get_palette_color_index_context(), the possible
 // contexts are (2,0,0), (2,2,1), (3,2,0), (4,1,0), (5,0,0) pluss one
 // extra case for the first element of an identity row. These are mapped to
@@ -43,7 +43,7 @@
 // contexts are (2,0,0), (2,2,1), (3,2,0), (4,1,0), (5,0,0). These are mapped to
 // a value from 0 to 4 using 'palette_color_index_context_lookup' table.
 #define PALETTE_COLOR_INDEX_CONTEXTS 5
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 
 // Palette Y mode context for a block is determined by number of neighboring
 // blocks (top and/or left) using a palette for Y plane. So, possible Y mode'
@@ -69,7 +69,15 @@
 #define KF_MODE_CONTEXTS 5
 
 #define FSC_MODE_CONTEXTS 4
+#if CONFIG_ATC_DCTX_ALIGNED
+#define FSC_BSIZE_CONTEXTS 6
+#else
 #define FSC_BSIZE_CONTEXTS 5
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
+#if CONFIG_EXT_DIR
+#define MRL_INDEX_CONTEXTS 3
+#endif  // CONFIG_EXT_DIR
 
 #define COMPREF_BIT_TYPES 2
 #define RANKED_REF0_TO_PRUNE 3
@@ -114,6 +122,11 @@
 #define WARP_EXTEND_CTXS2 5
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
+#if CONFIG_REFINEMV
+#define NUM_REFINEMV_CTX 24
+#define REFINEMV_NUM_MODES 2
+#endif  // CONFIG_REFINEMV
+
 struct AV1Common;
 
 typedef struct {
@@ -134,6 +147,16 @@
                             [CDF_SIZE(2)];
   aom_cdf_prob v_ac_sign_cdf[CROSS_COMPONENT_CONTEXTS][CDF_SIZE(2)];
 #endif  // CONFIG_CONTEXT_DERIVATION
+#if CONFIG_ATC_DCTX_ALIGNED
+  aom_cdf_prob coeff_base_bob_cdf[SIG_COEF_CONTEXTS_BOB][CDF_SIZE(3)];
+  aom_cdf_prob eob_flag_cdf16[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 6)];
+  aom_cdf_prob eob_flag_cdf32[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 5)];
+  aom_cdf_prob eob_flag_cdf64[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 4)];
+  aom_cdf_prob eob_flag_cdf128[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 3)];
+  aom_cdf_prob eob_flag_cdf256[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 2)];
+  aom_cdf_prob eob_flag_cdf512[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 1)];
+  aom_cdf_prob eob_flag_cdf1024[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS)];
+#else
   aom_cdf_prob eob_flag_cdf16[PLANE_TYPES][2][CDF_SIZE(5)];
   aom_cdf_prob eob_flag_cdf32[PLANE_TYPES][2][CDF_SIZE(6)];
   aom_cdf_prob eob_flag_cdf64[PLANE_TYPES][2][CDF_SIZE(7)];
@@ -141,6 +164,7 @@
   aom_cdf_prob eob_flag_cdf256[PLANE_TYPES][2][CDF_SIZE(9)];
   aom_cdf_prob eob_flag_cdf512[PLANE_TYPES][2][CDF_SIZE(10)];
   aom_cdf_prob eob_flag_cdf1024[PLANE_TYPES][2][CDF_SIZE(11)];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   aom_cdf_prob coeff_base_eob_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB]
                                  [CDF_SIZE(3)];
   aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS]
@@ -148,7 +172,7 @@
   aom_cdf_prob idtx_sign_cdf[IDTX_SIGN_CONTEXTS][CDF_SIZE(2)];
   aom_cdf_prob coeff_base_cdf_idtx[IDTX_SIG_COEF_CONTEXTS][CDF_SIZE(4)];
   aom_cdf_prob coeff_br_cdf_idtx[IDTX_LEVEL_CONTEXTS][CDF_SIZE(BR_CDF_SIZE)];
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   aom_cdf_prob coeff_base_lf_cdf[TX_SIZES][PLANE_TYPES][LF_SIG_COEF_CONTEXTS]
                                 [CDF_SIZE(LF_BASE_SYMBOLS)];
   aom_cdf_prob coeff_base_lf_eob_cdf[TX_SIZES][PLANE_TYPES]
@@ -160,7 +184,7 @@
 #else
   aom_cdf_prob coeff_br_cdf[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS]
                            [CDF_SIZE(BR_CDF_SIZE)];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #if CONFIG_PAR_HIDING
   aom_cdf_prob coeff_base_ph_cdf[COEFF_BASE_PH_CONTEXTS]
                                 [CDF_SIZE(NUM_BASE_LEVELS + 2)];
@@ -174,9 +198,14 @@
 #endif  // CONFIG_WARPMV
 
   aom_cdf_prob drl_cdf[3][DRL_MODE_CONTEXTS][CDF_SIZE(2)];
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   aom_cdf_prob skip_drl_cdf[3][CDF_SIZE(2)];
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+
+#if CONFIG_REFINEMV
+  aom_cdf_prob refinemv_flag_cdf[NUM_REFINEMV_CTX]
+                                [CDF_SIZE(REFINEMV_NUM_MODES)];
+#endif  // CONFIG_REFINEMV
 
 #if CONFIG_OPTFLOW_REFINEMENT
   aom_cdf_prob use_optflow_cdf[INTER_COMPOUND_MODE_CONTEXTS][CDF_SIZE(2)];
@@ -186,6 +215,10 @@
   aom_cdf_prob inter_compound_mode_cdf[INTER_COMPOUND_MODE_CONTEXTS]
                                       [CDF_SIZE(INTER_COMPOUND_MODES)];
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#if CONFIG_CWP
+  aom_cdf_prob cwp_idx_cdf[MAX_CWP_CONTEXTS][MAX_CWP_NUM - 1][CDF_SIZE(2)];
+#endif  // CONFIG_CWP
 #if CONFIG_IMPROVED_JMVD
   aom_cdf_prob jmvd_scale_mode_cdf[CDF_SIZE(JOINT_NEWMV_SCALE_FACTOR_CNT)];
   aom_cdf_prob jmvd_amvd_scale_mode_cdf[CDF_SIZE(JOINT_AMVD_SCALE_FACTOR_CNT)];
@@ -214,6 +247,9 @@
 #endif  // CONFIG_WARPMV
 #if CONFIG_WARP_REF_LIST
   aom_cdf_prob warp_ref_idx_cdf[3][WARP_REF_CONTEXTS][CDF_SIZE(2)];
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  aom_cdf_prob warpmv_with_mvd_flag_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)];
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
   aom_cdf_prob warp_delta_param_cdf[2][CDF_SIZE(WARP_DELTA_NUM_SYMBOLS)];
 
@@ -231,10 +267,10 @@
 #endif  // CONFIG_TIP
   aom_cdf_prob palette_y_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)];
   aom_cdf_prob palette_uv_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)];
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   aom_cdf_prob identity_row_cdf_y[PALETTE_ROW_FLAG_CONTEXTS][CDF_SIZE(2)];
   aom_cdf_prob identity_row_cdf_uv[PALETTE_ROW_FLAG_CONTEXTS][CDF_SIZE(2)];
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
   aom_cdf_prob palette_y_color_index_cdf[PALETTE_SIZES]
                                         [PALETTE_COLOR_INDEX_CONTEXTS]
                                         [CDF_SIZE(PALETTE_COLORS)];
@@ -268,12 +304,12 @@
   aom_cdf_prob comp_group_idx_cdf[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)];
   aom_cdf_prob skip_mode_cdfs[SKIP_MODE_CONTEXTS][CDF_SIZE(2)];
   aom_cdf_prob skip_txfm_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)];
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   aom_cdf_prob intra_inter_cdf[INTRA_INTER_SKIP_TXFM_CONTEXTS]
                               [INTRA_INTER_CONTEXTS][CDF_SIZE(2)];
 #else
   aom_cdf_prob intra_inter_cdf[INTRA_INTER_CONTEXTS][CDF_SIZE(2)];
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   nmv_context nmvc;
   nmv_context ndvc;
 #if CONFIG_NEW_CONTEXT_MODELING
@@ -281,10 +317,10 @@
 #else
   aom_cdf_prob intrabc_cdf[CDF_SIZE(2)];
 #endif  // CONFIG_NEW_CONTEXT_MODELING
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   aom_cdf_prob intrabc_mode_cdf[CDF_SIZE(2)];
   aom_cdf_prob intrabc_drl_idx_cdf[MAX_REF_BV_STACK_SIZE - 1][CDF_SIZE(2)];
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
   struct segmentation_probs seg;
   aom_cdf_prob filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(2)];
   aom_cdf_prob filter_intra_mode_cdf[CDF_SIZE(FILTER_INTRA_MODES)];
@@ -324,7 +360,11 @@
   aom_cdf_prob uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES]
                           [CDF_SIZE(UV_INTRA_MODES)];
 #endif  // !CONFIG_AIMC
+#if CONFIG_EXT_DIR
+  aom_cdf_prob mrl_index_cdf[MRL_INDEX_CONTEXTS][CDF_SIZE(MRL_LINE_NUMBER)];
+#else
   aom_cdf_prob mrl_index_cdf[CDF_SIZE(MRL_LINE_NUMBER)];
+#endif  // CONFIG_EXT_DIR
   aom_cdf_prob fsc_mode_cdf[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS]
                            [CDF_SIZE(FSC_MODES)];
 #if CONFIG_IMPROVED_CFL
@@ -348,7 +388,17 @@
                             [CDF_SIZE(2)];
   aom_cdf_prob do_ext_partition_cdf[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS]
                                    [PARTITION_CONTEXTS][CDF_SIZE(2)];
+#if CONFIG_UNEVEN_4WAY
+  aom_cdf_prob do_uneven_4way_partition_cdf[PARTITION_STRUCTURE_NUM]
+                                           [NUM_RECT_PARTS][PARTITION_CONTEXTS]
+                                           [CDF_SIZE(2)];
+  aom_cdf_prob uneven_4way_partition_type_cdf[PARTITION_STRUCTURE_NUM]
+                                             [NUM_RECT_PARTS]
+                                             [PARTITION_CONTEXTS]
+                                             [CDF_SIZE(NUM_UNEVEN_4WAY_PARTS)];
+#endif  // CONFIG_UNEVEN_4WAY
 #else
+  // Partition type for a square block, without limitations.
   aom_cdf_prob partition_cdf[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS]
                             [CDF_SIZE(EXT_PARTITION_TYPES)];
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
@@ -378,8 +428,13 @@
   aom_cdf_prob delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)];
   aom_cdf_prob intra_ext_tx_cdf[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
                                [CDF_SIZE(TX_TYPES)];
+#if CONFIG_ATC_DCTX_ALIGNED
+  aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SETS_INTER][EOB_TX_CTXS][EXT_TX_SIZES]
+                               [CDF_SIZE(TX_TYPES)];
+#else
   aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES]
                                [CDF_SIZE(TX_TYPES)];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   aom_cdf_prob cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)];
   aom_cdf_prob cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)];
   aom_cdf_prob stx_cdf[TX_SIZES][CDF_SIZE(STX_TYPES)];
@@ -432,7 +487,7 @@
   { 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 4, 5, 3, 6, 7, 8 },
 };
 
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
 static const int av1_md_type2idx[EXT_TX_SIZES][INTRA_MODES][TX_TYPES] = {
   {
       { 0, 2, 3, 1, 0, 0, 0, 4, 5, 0, 0, 0, 0, 6, 0, 0 },  // mode_class: 0
@@ -572,7 +627,7 @@
              ? av1_md_idx2type[size_idx][av1_md_class[intra_mode]][tx_idx]
              : av1_ext_tx_inv[tx_set_type][tx_idx];
 }
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 
 void av1_set_default_ref_deltas(int8_t *ref_deltas);
 void av1_set_default_mode_deltas(int8_t *mode_deltas);
@@ -674,7 +729,7 @@
 int av1_get_palette_color_index_context(const uint8_t *color_map, int stride,
                                         int r, int c, int palette_size,
                                         uint8_t *color_order, int *color_idx
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
                                         ,
                                         int row_flag, int prev_row_flag
 #endif
@@ -683,7 +738,7 @@
 // exploiting the fact that the encoder does not need to maintain a color order.
 int av1_fast_palette_color_index_context(const uint8_t *color_map, int stride,
                                          int r, int c, int *color_idx
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
                                          ,
                                          int row_flag, int prev_row_flag
 #endif
diff --git a/av1/common/enums.h b/av1/common/enums.h
index 7876d29..a86f03e 100644
--- a/av1/common/enums.h
+++ b/av1/common/enums.h
@@ -28,6 +28,15 @@
 /*!\cond */
 
 #undef MAX_SB_SIZE
+#define BAWP_BUGFIX 1
+
+#if CONFIG_REFINEMV
+#define SINGLE_STEP_SEARCH 0
+#endif  // CONFIG_REFINEMV
+
+#if CONFIG_D071_IMP_MSK_BLD
+#define DEFAULT_IMP_MSK_WT 0  // default implict masked blending weight
+#endif                        // CONFIG_D071_IMP_MSK_BLD
 
 #if CONFIG_WEDGE_MOD_EXT
 /*WEDGE_0 is defined in the three o'clock direciton, the angles are defined in
@@ -59,17 +68,20 @@
 #define H_WEDGE_ANGLES 10
 #define NUM_WEDGE_DIST 4
 #define MAX_WEDGE_TYPES 68
+#define WEDGE_BLD_SIG 1  // 0 for linear blending, 1 for sigmoid blending
+#define WEDGE_BLD_LUT_SIZE 128
 #endif  // CONFIG_WEDGE_MOD_EXT
 
-#if CONFIG_ADAPTIVE_DS_FILTER
-#define DS_FRAME_LEVEL 1  // Signal at key frame
-#endif
-
-#if CONFIG_WARP_REF_LIST && CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_WARP_REF_LIST && CONFIG_MVP_IMPROVEMENT
 #define WARP_CU_BANK 1
 #else
 #define WARP_CU_BANK 0
-#endif  // CONFIG_WARP_REF_LIST && CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_WARP_REF_LIST && CONFIG_MVP_IMPROVEMENT
+
+#if CONFIG_REFINEMV
+#define REFINEMV_SUBBLOCK_WIDTH 16
+#define REFINEMV_SUBBLOCK_HEIGHT 16
+#endif  // CONFIG_REFINEMV
 
 // Cross-Component Sample Offset (CCSO)
 #if CONFIG_CCSO
@@ -179,14 +191,14 @@
 #define IST_8x8_WIDTH 64
 #define IST_8x8_HEIGHT 32
 
-#if CONFIG_ATC_NEWTXSETS
-// TX sizes used for mode dependent TX sets
-#define MODE_DEPTX_TXSIZES 19
-#endif  // CONFIG_ATC_NEWTXSETS
-
 #define FSC_MODES 2
+#if CONFIG_ATC_DCTX_ALIGNED
+#define FSC_MAXWIDTH 32
+#define FSC_MAXHEIGHT 32
+#else
 #define FSC_MAXWIDTH 16
 #define FSC_MAXHEIGHT 16
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 #define FSC_MINWIDTH 4
 #define FSC_MINHEIGHT 4
 
@@ -303,14 +315,37 @@
 //
 #if CONFIG_EXT_RECUR_PARTITIONS
 //  HORZ_3                 VERT_3
-//  +--------------+       +---+------+---+
-//  |              |       |   |      |   |
-//  +--------------+       |   |      |   |
-//  |              |       |   |      |   |
-//  |              |       |   |      |   |
-//  +--------------+       |   |      |   |
-//  |              |       |   |      |   |
-//  +--------------+       +---+------+---+
+//  +---------------+       +---+------+---+
+//  |               |       |   |      |   |
+//  +---------------+       |   |      |   |
+//  |       |       |       |   |______|   |
+//  |       |       |       |   |      |   |
+//  +---------------+       |   |      |   |
+//  |               |       |   |      |   |
+//  +---------------+       +---+------+---+
+#if CONFIG_UNEVEN_4WAY
+//  HORZ_4A                 HORZ_4B
+//  +---------------+       +---------------+
+//  |               |       |               |
+//  +---------------+       +---------------+
+//  |               |       |               |
+//  |               |       |               |
+//  +---------------+       |               |
+//  |               |       |               |
+//  |               |       +---------------+
+//  |               |       |               |
+//  |               |       |               |
+//  +---------------+       +---------------+
+//  |               |       |               |
+//  +---------------+       +---------------+
+//
+//  VERT_4A                                 VERT_4B
+//  +-------------------------+          +-------------------------+
+//  |   |      |          |   |          |   |          |      |   |
+//  |   |      |          |   |          |   |          |      |   |
+//  |   |      |          |   |          |   |          |      |   |
+//  +-------------------------+          +-------------------------+
+#endif  // CONFIG_UNEVEN_4WAY
 #else
 //  HORZ_A        HORZ_B        VERT_A        VERT_B
 //  +---+---+     +-------+     +---+---+     +---+---+
@@ -332,10 +367,20 @@
   PARTITION_VERT,
   PARTITION_HORZ_3,  // 3 horizontal sub-partitions with ratios 4:1, 2:1 and 4:1
   PARTITION_VERT_3,  // 3 vertical sub-partitions with ratios 4:1, 2:1 and 4:1
+#if CONFIG_UNEVEN_4WAY
+  PARTITION_HORZ_4A,  // 4 horizontal uneven sub-partitions (1:2:4:1).
+  PARTITION_HORZ_4B,  // 4 horizontal uneven sub-partitions (1:4:2:1).
+  PARTITION_VERT_4A,  // 4 vertical uneven sub-partitions (1:2:4:1).
+  PARTITION_VERT_4B,  // 4 vertical uneven sub-partitions (1:4:2:1).
+#endif                // CONFIG_UNEVEN_4WAY
   PARTITION_SPLIT,
+  EXT_PARTITION_TYPES = PARTITION_SPLIT,
+  ALL_PARTITION_TYPES = EXT_PARTITION_TYPES + 1,
   PARTITION_TYPES = PARTITION_VERT + 1,
-  EXT_PARTITION_TYPES = PARTITION_VERT_3 + 1,
-  ALL_PARTITION_TYPES = PARTITION_SPLIT + 1,
+#if !CONFIG_UNEVEN_4WAY
+  LIMITED_PARTITION_TYPES = PARTITION_TYPES - 1,
+  LIMITED_EXT_PARTITION_TYPES = EXT_PARTITION_TYPES - 1,
+#endif  // !CONFIG_UNEVEN_4WAY
   PARTITION_INVALID = 255
 } UENUM1BYTE(PARTITION_TYPE);
 #else   // CONFIG_EXT_RECUR_PARTITIONS
@@ -355,6 +400,7 @@
   PARTITION_INVALID = 255
 } UENUM1BYTE(PARTITION_TYPE);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
+
 // Rectangular partition types.
 enum {
   HORZ = 0,
@@ -362,6 +408,16 @@
   NUM_RECT_PARTS,
   RECT_INVALID = NUM_RECT_PARTS
 } UENUM1BYTE(RECT_PART_TYPE);
+
+#if CONFIG_UNEVEN_4WAY
+// Uneven 4-way partition types.
+enum {
+  UNEVEN_4A = 0,
+  UNEVEN_4B,
+  NUM_UNEVEN_4WAY_PARTS,
+} UENUM1BYTE(UNEVEN_4WAY_PART_TYPE);
+#endif  // CONFIG_UNEVEN_4WAY
+
 typedef char PARTITION_CONTEXT;
 #define PARTITION_PLOFFSET 4  // number of probability models per block size
 
@@ -568,21 +624,24 @@
   EXT_TX_SET_DTT9_IDTX_1DDCT,
   // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver (6)
   EXT_TX_SET_ALL16,
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   EXT_NEW_TX_SET,
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
   EXT_TX_SET_TYPES
 } UENUM1BYTE(TxSetType);
 
+#if CONFIG_ATC_DCTX_ALIGNED
+#define EOB_TX_CTXS 3
+#endif                       // CONFIG_ATC_DCTX_ALIGNED
 #define EXT_TX_SIZES 4       // number of sizes that use extended transforms
 #define EXT_TX_SETS_INTER 4  // Sets of transform selections for INTER
-#if CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET
+#if CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET
 #define EXT_TX_SETS_INTRA 2  // Sets of transform selections for INTRA
 #else
 #define EXT_TX_SETS_INTRA 3  // Sets of transform selections for INTRA
-#endif  // CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET
+#endif                       // CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET
 
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
 #define INTRA_TX_SET1 7
 #if CONFIG_ATC_REDUCED_TXSET
 #define INTRA_TX_SET2 2
@@ -590,7 +649,7 @@
 #else
 #define INTRA_TX_SET1 6
 #define INTRA_TX_SET2 4
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 
 enum {
   UNIDIR_COMP_REFERENCE,
@@ -600,7 +659,11 @@
 
 enum { PLANE_TYPE_Y, PLANE_TYPE_UV, PLANE_TYPES } UENUM1BYTE(PLANE_TYPE);
 
+#if CONFIG_CFL_IMPROVEMENTS
+#define CFL_ALPHABET_SIZE_LOG2 3
+#else
 #define CFL_ALPHABET_SIZE_LOG2 4
+#endif  // CONFIG_CFL_IMPROVEMENTS
 #define CFL_ALPHABET_SIZE (1 << CFL_ALPHABET_SIZE_LOG2)
 #define CFL_MAGS_SIZE ((2 << CFL_ALPHABET_SIZE_LOG2) + 1)
 #define CFL_IDX_U(idx) (idx >> CFL_ALPHABET_SIZE_LOG2)
@@ -636,6 +699,10 @@
 #define CFL_CONTEXT_V(js) \
   (CFL_SIGN_V(js) * CFL_SIGNS + CFL_SIGN_U(js) - CFL_SIGNS)
 
+#if CONFIG_SEP_COMP_DRL
+#define SEP_COMP_DRL_SIZE 3
+#endif  // CONFIG_SEP_COMP_DRL
+
 enum {
   PALETTE_MAP,
   COLOR_MAP_TYPES,
@@ -897,9 +964,9 @@
 #define WARPMV_MODE_CONTEXT 10
 #endif  // CONFIG_WARPMV
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
 #define MAX_REF_BV_STACK_SIZE 4
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 #define GLOBALMV_OFFSET 3
 #define REFMV_OFFSET 4
@@ -915,6 +982,19 @@
 #define INTER_COMPOUND_MODE_CONTEXTS 8
 #endif  // CONFIG_C076_INTER_MOD_CTX
 
+#if CONFIG_CWP
+// Number of supported factors for compound weighted prediction
+#define MAX_CWP_NUM 5
+// maximum value for the supported factors
+#define CWP_MAX 20
+// minimum value for the supported factors
+#define CWP_MIN -4
+// Weighting factor for simple averge prediction
+#define CWP_EQUAL 8
+#define CWP_WEIGHT_BITS 4
+#define MAX_CWP_CONTEXTS 2
+#endif
+
 #define DELTA_Q_SMALL 3
 #define DELTA_Q_PROBS (DELTA_Q_SMALL)
 #define DEFAULT_DELTA_Q_RES_PERCEPTUAL 4
@@ -1089,7 +1169,7 @@
   PROJ_SPATIAL,        /**< Project from spatial neighborhood */
   PROJ_PARAM_BANK,     /**< Project from circular buffer */
   PROJ_DEFAULT,        /**< Default values */
-  WARP_PROJ_TYPES = 5, /**< Num projection types */
+  WARP_PROJ_TYPES = 4, /**< Num projection types */
 } WarpProjectionType;
 #endif  // CONFIG_WARP_REF_LIST
 
diff --git a/av1/common/idct.c b/av1/common/idct.c
index a9a3f1f..4462ef6 100644
--- a/av1/common/idct.c
+++ b/av1/common/idct.c
@@ -318,8 +318,9 @@
 }
 #endif  // CONFIG_CROSS_CHROMA_TX
 
-void av1_inverse_transform_block(const MACROBLOCKD *xd, tran_low_t *dqcoeff,
-                                 int plane, TX_TYPE tx_type, TX_SIZE tx_size,
+void av1_inverse_transform_block(const MACROBLOCKD *xd,
+                                 const tran_low_t *dqcoeff, int plane,
+                                 TX_TYPE tx_type, TX_SIZE tx_size,
                                  uint16_t *dst, int stride, int eob,
                                  int reduced_tx_set) {
   if (!eob) return;
@@ -338,9 +339,14 @@
   assert(((intra_mode >= PAETH_PRED || filter) && txfm_param.sec_tx_type) == 0);
   (void)intra_mode;
   (void)filter;
-  av1_inv_stxfm(dqcoeff, &txfm_param);
 
-  av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param);
+  // Work buffer for secondary transform
+  DECLARE_ALIGNED(32, tran_low_t, temp_dqcoeff[MAX_SB_SQUARE]);
+  memcpy(temp_dqcoeff, dqcoeff, sizeof(tran_low_t) * tx_size_2d[tx_size]);
+
+  av1_inv_stxfm(temp_dqcoeff, &txfm_param);
+
+  av1_highbd_inv_txfm_add(temp_dqcoeff, dst, stride, &txfm_param);
 }
 
 // Inverse secondary transform
diff --git a/av1/common/idct.h b/av1/common/idct.h
index b50d972..2652f7c 100644
--- a/av1/common/idct.h
+++ b/av1/common/idct.h
@@ -39,8 +39,9 @@
                                    CctxType cctx_type);
 #endif  // CONFIG_CROSS_CHROMA_TX
 
-void av1_inverse_transform_block(const MACROBLOCKD *xd, tran_low_t *dqcoeff,
-                                 int plane, TX_TYPE tx_type, TX_SIZE tx_size,
+void av1_inverse_transform_block(const MACROBLOCKD *xd,
+                                 const tran_low_t *dqcoeff, int plane,
+                                 TX_TYPE tx_type, TX_SIZE tx_size,
                                  uint16_t *dst, int stride, int eob,
                                  int reduced_tx_set);
 void av1_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
diff --git a/av1/common/mv.h b/av1/common/mv.h
index 42cac87..472c9bd 100644
--- a/av1/common/mv.h
+++ b/av1/common/mv.h
@@ -391,7 +391,7 @@
 
 #define SUBEXPFIN_K 3
 
-#if CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_EXTENDED_WARP_PREDICTION || CONFIG_IMPROVED_GLOBAL_MOTION
 #define GM_TRANS_PREC_BITS 3
 #define GM_ABS_TRANS_BITS 14
 #define GM_ABS_TRANS_ONLY_BITS (GM_ABS_TRANS_BITS - GM_TRANS_PREC_BITS + 3)
@@ -401,7 +401,11 @@
 #define GM_TRANS_ONLY_DECODE_FACTOR (1 << GM_TRANS_ONLY_PREC_DIFF)
 
 #define GM_ALPHA_PREC_BITS 10
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+#define GM_ABS_ALPHA_BITS 8
+#else
 #define GM_ABS_ALPHA_BITS 7
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 #define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS)
 #define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF)
 #else
@@ -417,7 +421,7 @@
 #define GM_ABS_ALPHA_BITS 12
 #define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS)
 #define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF)
-#endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#endif  // CONFIG_EXTENDED_WARP_PREDICTION || CONFIG_IMPROVED_GLOBAL_MOTION
 
 #define GM_ROW3HOMO_PREC_BITS 16
 #define GM_ABS_ROW3HOMO_BITS 11
@@ -425,7 +429,11 @@
   (WARPEDMODEL_ROW3HOMO_PREC_BITS - GM_ROW3HOMO_PREC_BITS)
 #define GM_ROW3HOMO_DECODE_FACTOR (1 << GM_ROW3HOMO_PREC_DIFF)
 
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+#define GM_TRANS_MAX ((1 << GM_ABS_TRANS_BITS) - 1)
+#else
 #define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS)
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 #define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS)
 #define GM_ROW3HOMO_MAX (1 << GM_ABS_ROW3HOMO_BITS)
 
@@ -533,6 +541,10 @@
   // candidate, and so does not allow WARP_EXTEND
   int row_offset;
   int col_offset;
+#if CONFIG_CWP
+  // Record the cwp index of the neighboring blocks
+  int8_t cwp_idx;
+#endif  // CONFIG_CWP
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 } CANDIDATE_MV;
 
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c
index 64ca39e..f22423d 100644
--- a/av1/common/mvref_common.c
+++ b/av1/common/mvref_common.c
@@ -20,12 +20,12 @@
 #endif  // CONFIG_TIP
 #include "av1/common/warped_motion.h"
 
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
 typedef struct single_mv_candidate {
   int_mv mv;
   MV_REFERENCE_FRAME ref_frame;
 } SINGLE_MV_CANDIDATE;
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 #define MFMV_STACK_SIZE 3
 
@@ -145,7 +145,7 @@
       mv->mv.as_int = 0;
 #endif  // CONFIG_TIP
 
-#if CONFIG_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
       if (is_inter_ref_frame(mi->ref_frame[0]) &&
           mi->ref_frame[1] == NONE_FRAME) {
         if ((abs(mi->mv[0].as_mv.row) <= REFMVS_LIMIT) &&
@@ -159,7 +159,7 @@
 #endif  // CONFIG_TIP
         }
       } else {
-#endif  // CONFIG_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
         for (int idx = 0; idx < 2; ++idx) {
           MV_REFERENCE_FRAME ref_frame = mi->ref_frame[idx];
           if (is_inter_ref_frame(ref_frame)) {
@@ -177,9 +177,9 @@
 #endif  // CONFIG_TIP
           }
         }
-#if CONFIG_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
       }
-#endif  // CONFIG_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
       mv++;
     }
@@ -187,25 +187,25 @@
   }
 }
 
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
 // Fetch MVP candidates from derived SMVP into MVP candidate list
 // when there is no enough MVP candidates.
 static AOM_INLINE void fill_mvp_from_derived_smvp(
     const MV_REFERENCE_FRAME rf[2], CANDIDATE_MV *ref_mv_stack,
     uint16_t *ref_mv_weight, uint8_t *refmv_count,
     CANDIDATE_MV *derived_mv_stack, uint8_t derived_mv_count,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     const MB_MODE_INFO *mbmi, MV_REFERENCE_FRAME *ref_frame_idx0,
     MV_REFERENCE_FRAME *ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
     const int max_ref_mv_count) {
   int index = 0;
   int derived_idx = 0;
 
   if (rf[1] == NONE_FRAME) {
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     assert(!mbmi->skip_mode);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
     for (derived_idx = 0; derived_idx < derived_mv_count; ++derived_idx) {
       for (index = 0; index < *refmv_count; ++index) {
@@ -222,6 +222,9 @@
         ref_mv_stack[index].row_offset = OFFSET_NONSPATIAL;
         ref_mv_stack[index].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+        ref_mv_stack[index].cwp_idx = derived_mv_stack[derived_idx].cwp_idx;
+#endif  // CONFIG_CWP
         ref_mv_weight[index] = REF_CAT_LEVEL;
         ++(*refmv_count);
       }
@@ -233,10 +236,10 @@
              derived_mv_stack[derived_idx].this_mv.as_int) &&
             (ref_mv_stack[index].comp_mv.as_int ==
              derived_mv_stack[derived_idx].comp_mv.as_int)) {
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
           if (!mbmi->skip_mode || (ref_frame_idx0[index] == rf[0] &&
                                    ref_frame_idx1[index] == rf[1]))
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
             break;
         }
       }
@@ -249,19 +252,22 @@
         ref_mv_stack[index].row_offset = OFFSET_NONSPATIAL;
         ref_mv_stack[index].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_CWP
+        ref_mv_stack[index].cwp_idx = derived_mv_stack[derived_idx].cwp_idx;
+#endif  // CONFIG_CWP
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         if (mbmi->skip_mode) {
           ref_frame_idx0[index] = rf[0];
           ref_frame_idx1[index] = rf[1];
         }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
         ref_mv_weight[index] = REF_CAT_LEVEL;
         ++(*refmv_count);
       }
     }
   }
 }
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 #if CONFIG_TIP
 static AOM_INLINE void derive_ref_mv_candidate_from_tip_mode(
@@ -323,6 +329,9 @@
     ref_mv_stack[index].row_offset = OFFSET_NONSPATIAL;
     ref_mv_stack[index].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+    ref_mv_stack[index].cwp_idx = candidate->cwp_idx;
+#endif  // CONFIG_CWP
     ++(*refmv_count);
   }
   if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count;
@@ -374,9 +383,9 @@
 
 static AOM_INLINE void add_ref_mv_candidate(
 #if CONFIG_TIP
-#if !CONFIG_SMVP_IMPROVEMENT
+#if !CONFIG_MVP_IMPROVEMENT
     const AV1_COMMON *cm,
-#endif  // !CONFIG_SMVP_IMPROVEMENT
+#endif  // !CONFIG_MVP_IMPROVEMENT
     int mi_row, int mi_col, int mi_row_cand, int mi_col_cand,
 #endif  // CONFIG_TIP
     const MB_MODE_INFO *const candidate,
@@ -387,16 +396,16 @@
     uint8_t *ref_match_count, uint8_t *newmv_count, CANDIDATE_MV *ref_mv_stack,
     uint16_t *ref_mv_weight, int_mv *gm_mv_candidates,
     const WarpedMotionParams *gm_params,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     const MB_MODE_INFO *mbmi,
     MV_REFERENCE_FRAME ref_frame_idx0[MAX_REF_MV_STACK_SIZE],
     MV_REFERENCE_FRAME ref_frame_idx1[MAX_REF_MV_STACK_SIZE],
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
     const AV1_COMMON *cm, int add_more_mvs, SINGLE_MV_CANDIDATE *single_mv,
     uint8_t *single_mv_count, CANDIDATE_MV *derived_mv_stack,
     uint16_t *derived_mv_weight, uint8_t *derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_IBC_SR_EXT
     uint8_t is_intrabc,
 #endif  // CONFIG_IBC_SR_EXT
@@ -425,7 +434,7 @@
   const TIP *tip_ref = &cm->tip_ref;
 #endif  // CONFIG_TIP
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   if (mbmi->skip_mode) {
 #if CONFIG_TIP
     if (!is_tip_ref_frame(candidate->ref_frame[0]) &&
@@ -464,18 +473,21 @@
         ref_mv_stack[index].comp_mv = this_refmv[1];
         ref_frame_idx0[index] = candidate->ref_frame[0];
         ref_frame_idx1[index] = candidate->ref_frame[1];
+#if CONFIG_CWP
+        ref_mv_stack[index].cwp_idx = candidate->cwp_idx;
+#endif  // CONFIG_CWP
         ref_mv_weight[index] = weight;
         ++(*refmv_count);
       }
     }
     return;
   }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
   if (rf[1] == NONE_FRAME) {
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     assert(!mbmi->skip_mode);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
     // single reference frame
     for (ref = 0; ref < 2; ++ref) {
@@ -523,13 +535,16 @@
           ref_mv_stack[index].row_offset = row_offset;
           ref_mv_stack[index].col_offset = col_offset;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+          ref_mv_stack[index].cwp_idx = candidate->cwp_idx;
+#endif  // CONFIG_CWP
           ref_mv_weight[index] = weight;
           ++(*refmv_count);
         }
         if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count;
         ++*ref_match_count;
       }
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
       else if (add_more_mvs && is_inter_ref_frame(candidate->ref_frame[ref]) &&
 #if CONFIG_IBC_SR_EXT
                rf[0] != INTRA_FRAME &&
@@ -586,11 +601,14 @@
               *derived_mv_count < MAX_REF_MV_STACK_SIZE) {
             derived_mv_stack[index].this_mv = this_refmv;
             derived_mv_weight[index] = weight;
+#if CONFIG_CWP
+            derived_mv_stack[index].cwp_idx = candidate->cwp_idx;
+#endif  // CONFIG_CWP
             ++(*derived_mv_count);
           }
         }
       }
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
     }
   } else {
 #if CONFIG_TIP
@@ -606,9 +624,9 @@
       // compound reference frame
       if (candidate->ref_frame[0] == rf[0] &&
           candidate->ref_frame[1] == rf[1]) {
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         if (mbmi->skip_mode) return;
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
         int_mv this_refmv[2];
 
@@ -640,25 +658,31 @@
           ref_mv_stack[index].row_offset = OFFSET_NONSPATIAL;
           ref_mv_stack[index].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+          ref_mv_stack[index].cwp_idx = candidate->cwp_idx;
+#endif  // CONFIG_CWP
           ++(*refmv_count);
         }
         if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count;
         ++*ref_match_count;
       }
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
       else if (add_more_mvs) {
         // Compound reference frame, but only have one reference frame
         // is the same as the reference frame of the neighboring block
         int candidate_ref_idx0 = -1;
         int candidate_ref_idx1 = -1;
+        int which_cand_ref = -1;
         if (candidate->ref_frame[0] == rf[0] ||
             candidate->ref_frame[1] == rf[0]) {
           candidate_ref_idx0 = 0;
           candidate_ref_idx1 = 1;
+          which_cand_ref = (candidate->ref_frame[0] == rf[0]) ? 0 : 1;
         } else if (candidate->ref_frame[0] == rf[1] ||
                    candidate->ref_frame[1] == rf[1]) {
           candidate_ref_idx0 = 1;
           candidate_ref_idx1 = 0;
+          which_cand_ref = (candidate->ref_frame[0] == rf[1]) ? 0 : 1;
         }
 
         if (candidate_ref_idx0 != -1 && candidate_ref_idx1 != -1) {
@@ -671,7 +695,7 @@
 #if CONFIG_C071_SUBBLK_WARPMV
                                          submi,
 #endif  // CONFIG_C071_SUBBLK_WARPMV
-                                         candidate_ref_idx0);
+                                         which_cand_ref);
 
           int cand_idx = 0;
           for (cand_idx = 0; cand_idx < *single_mv_count; ++cand_idx) {
@@ -700,6 +724,9 @@
               derived_mv_stack[index].this_mv = this_refmv[0];
               derived_mv_stack[index].comp_mv = this_refmv[1];
               derived_mv_weight[index] = weight;
+#if CONFIG_CWP
+              derived_mv_stack[index].cwp_idx = candidate->cwp_idx;
+#endif  // CONFIG_CWP
               ++(*derived_mv_count);
             }
           }
@@ -722,7 +749,7 @@
           }
         }
       }
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
     }
 #if CONFIG_TIP
   }
@@ -805,9 +832,8 @@
   }
 }
 #endif  // CONFIG_WARP_REF_LIST
-// both CONFIG_SMVP_IMPROVEMENT and CONFIG_C043_MVP_IMPROVEMENTS are ture case,
-// scan_row_mbmi does not called
-#if !(CONFIG_SMVP_IMPROVEMENT && CONFIG_C043_MVP_IMPROVEMENTS)
+// when CONFIG_MVP_IMPROVEMENT is ture, scan_row_mbmi does not called
+#if !CONFIG_MVP_IMPROVEMENT
 static AOM_INLINE void scan_row_mbmi(
     const AV1_COMMON *cm, const MACROBLOCKD *xd,
 #if CONFIG_TIP || CONFIG_EXT_RECUR_PARTITIONS
@@ -817,14 +843,14 @@
     CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight, uint8_t *refmv_count,
     uint8_t *ref_match_count, uint8_t *newmv_count, int_mv *gm_mv_candidates,
     int max_row_offset,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
     int add_more_mvs, SINGLE_MV_CANDIDATE *single_mv, uint8_t *single_mv_count,
     CANDIDATE_MV *derived_mv_stack, uint16_t *derived_mv_weight,
     uint8_t *derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
     WARP_CANDIDATE warp_param_stack[MAX_WARP_REF_CANDIDATES],
     int max_num_of_warp_candidates, uint8_t *valid_num_warp_candidates,
@@ -853,6 +879,7 @@
   const int plane_type = (xd->tree_type == CHROMA_PART);
   for (int i = 0; i < end_mi;) {
 #if CONFIG_EXT_RECUR_PARTITIONS
+    if (xd->mi_col + col_offset + i >= cm->mi_params.mi_cols) break;
     const int sb_mi_size = mi_size_wide[cm->sb_size];
     const int mask_row = mi_row & (sb_mi_size - 1);
     const int mask_col = mi_col & (sb_mi_size - 1);
@@ -867,6 +894,7 @@
     }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
     const MB_MODE_INFO *const candidate = candidate_mi0[col_offset + i];
+    assert(candidate != NULL);
 #if CONFIG_C071_SUBBLK_WARPMV
     const SUBMB_INFO *const submi = submi_mi0[col_offset + i];
 #endif  // CONFIG_C071_SUBBLK_WARPMV
@@ -878,7 +906,7 @@
     else if (abs(row_offset) > 1)
       len = AOMMAX(len, width_8x8);
 
-#if CONFIG_COMPLEXITY_SCALABLE_MVP
+#if CONFIG_MVP_IMPROVEMENT
     // Don't add weight to row_offset < -1 which is in the outer area
     uint16_t weight = row_offset < -1 ? 0 : 2;
 #else
@@ -887,7 +915,7 @@
     if (xd->width >= width_8x8 && xd->width <= n4_w) {
       uint16_t inc = AOMMIN(-max_row_offset + row_offset + 1,
                             mi_size_high[candidate_bsize]);
-#if !CONFIG_COMPLEXITY_SCALABLE_MVP
+#if !CONFIG_MVP_IMPROVEMENT
       // Obtain range used in weight calculation.
       weight = AOMMAX(weight, inc);
 #endif
@@ -911,9 +939,9 @@
 
     add_ref_mv_candidate(
 #if CONFIG_TIP
-#if !CONFIG_SMVP_IMPROVEMENT
+#if !CONFIG_MVP_IMPROVEMENT
         cm,
-#endif  // !CONFIG_SMVP_IMPROVEMENT
+#endif  // !CONFIG_MVP_IMPROVEMENT
         mi_row, mi_col, cand_mi_row, cand_mi_col,
 #endif  // CONFIG_TIP
         candidate,
@@ -922,13 +950,13 @@
 #endif  // CONFIG_C071_SUBBLK_WARPMV
         rf, refmv_count, ref_match_count, newmv_count, ref_mv_stack,
         ref_mv_weight, gm_mv_candidates, cm->global_motion,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         xd->mi[0], ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
         cm, add_more_mvs, single_mv, single_mv_count, derived_mv_stack,
         derived_mv_weight, derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_IBC_SR_EXT
         xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART],
 #endif  // CONFIG_IBC_SR_EXT
@@ -945,9 +973,9 @@
     i += len;
   }
 }
-#endif  // !(CONFIG_SMVP_IMPROVEMENT && CONFIG_C043_MVP_IMPROVEMENTS)
+#endif  // !CONFIG_MVP_IMPROVEMENT
 
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
 // update processed_cols variable, when scan_col_mbmi() is not used for adjacent
 // neigbhors
 static AOM_INLINE void update_processed_cols(const MACROBLOCKD *xd, int mi_row,
@@ -971,7 +999,7 @@
     }
   }
 }
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 static AOM_INLINE void scan_col_mbmi(
     const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row,
@@ -981,14 +1009,14 @@
     const MV_REFERENCE_FRAME rf[2], int col_offset, CANDIDATE_MV *ref_mv_stack,
     uint16_t *ref_mv_weight, uint8_t *refmv_count, uint8_t *ref_match_count,
     uint8_t *newmv_count, int_mv *gm_mv_candidates, int max_col_offset,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
     int add_more_mvs, SINGLE_MV_CANDIDATE *single_mv, uint8_t *single_mv_count,
     CANDIDATE_MV *derived_mv_stack, uint16_t *derived_mv_weight,
     uint8_t *derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
     WARP_CANDIDATE warp_param_stack[MAX_WARP_REF_CANDIDATES],
     int max_num_of_warp_candidates, uint8_t *valid_num_warp_candidates,
@@ -1012,6 +1040,7 @@
 
   for (i = 0; i < end_mi;) {
 #if CONFIG_EXT_RECUR_PARTITIONS
+    if (xd->mi_row + row_offset + i >= cm->mi_params.mi_rows) break;
     const int sb_mi_size = mi_size_wide[cm->sb_size];
     const int mask_row = mi_row & (sb_mi_size - 1);
     const int mask_col = mi_col & (sb_mi_size - 1);
@@ -1026,6 +1055,7 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
     const MB_MODE_INFO *const candidate =
         xd->mi[(row_offset + i) * xd->mi_stride + col_offset];
+    assert(candidate != NULL);
 #if CONFIG_C071_SUBBLK_WARPMV
     const SUBMB_INFO *const submi =
         xd->submi[(row_offset + i) * xd->mi_stride + col_offset];
@@ -1039,7 +1069,7 @@
     else if (abs(col_offset) > 1)
       len = AOMMAX(len, n8_h_8);
 
-#if CONFIG_COMPLEXITY_SCALABLE_MVP
+#if CONFIG_MVP_IMPROVEMENT
     // Don't add weight to col_offset < -1 which is in the outer area
     uint16_t weight = col_offset < -1 ? 0 : 2;
 #else
@@ -1048,7 +1078,7 @@
     if (xd->height >= n8_h_8 && xd->height <= n4_h) {
       int inc = AOMMIN(-max_col_offset + col_offset + 1,
                        mi_size_wide[candidate_bsize]);
-#if !CONFIG_COMPLEXITY_SCALABLE_MVP
+#if !CONFIG_MVP_IMPROVEMENT
       // Obtain range used in weight calculation.
       weight = AOMMAX(weight, inc);
 #endif
@@ -1072,9 +1102,9 @@
 
     add_ref_mv_candidate(
 #if CONFIG_TIP
-#if !CONFIG_SMVP_IMPROVEMENT
+#if !CONFIG_MVP_IMPROVEMENT
         cm,
-#endif  // !CONFIG_SMVP_IMPROVEMENT
+#endif  // !CONFIG_MVP_IMPROVEMENT
         mi_row, mi_col, cand_mi_row, cand_mi_col,
 #endif  // CONFIG_TIP
         candidate,
@@ -1083,13 +1113,13 @@
 #endif  // CONFIG_C071_SUBBLK_WARPMV
         rf, refmv_count, ref_match_count, newmv_count, ref_mv_stack,
         ref_mv_weight, gm_mv_candidates, cm->global_motion,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         xd->mi[0], ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
         cm, add_more_mvs, single_mv, single_mv_count, derived_mv_stack,
         derived_mv_weight, derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_IBC_SR_EXT
         xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART],
 #endif  // CONFIG_IBC_SR_EXT
@@ -1130,14 +1160,14 @@
     const int mi_col, const MV_REFERENCE_FRAME rf[2], int row_offset,
     int col_offset, CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight,
     uint8_t *ref_match_count, uint8_t *newmv_count, int_mv *gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
     int add_more_mvs, SINGLE_MV_CANDIDATE *single_mv, uint8_t *single_mv_count,
     CANDIDATE_MV *derived_mv_stack, uint16_t *derived_mv_weight,
     uint8_t *derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
     WARP_CANDIDATE warp_param_stack[MAX_WARP_REF_CANDIDATES],
     int max_num_of_warp_candidates, uint8_t *valid_num_warp_candidates,
@@ -1162,7 +1192,7 @@
 #endif  // CONFIG_C071_SUBBLK_WARPMV
     const int len = mi_size_wide[BLOCK_8X8];
 
-#if CONFIG_COMPLEXITY_SCALABLE_MVP
+#if CONFIG_MVP_IMPROVEMENT
     // Don't add weight to (-1,-1) which is in the outer area
     uint16_t weight = row_offset == -1 && col_offset == -1 ? 0 : 2;
 #endif
@@ -1183,9 +1213,9 @@
 
     add_ref_mv_candidate(
 #if CONFIG_TIP
-#if !CONFIG_SMVP_IMPROVEMENT
+#if !CONFIG_MVP_IMPROVEMENT
         cm,
-#endif  // !CONFIG_SMVP_IMPROVEMENT
+#endif  // !CONFIG_MVP_IMPROVEMENT
         mi_row, mi_col, cand_mi_row, cand_mi_col,
 #endif  // CONFIG_TIP
         candidate,
@@ -1194,20 +1224,20 @@
 #endif  // CONFIG_C071_SUBBLK_WARPMV
         rf, refmv_count, ref_match_count, newmv_count, ref_mv_stack,
         ref_mv_weight, gm_mv_candidates, cm->global_motion,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         xd->mi[0], ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
         cm, add_more_mvs, single_mv, single_mv_count, derived_mv_stack,
         derived_mv_weight, derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_IBC_SR_EXT
         xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART],
 #endif  // CONFIG_IBC_SR_EXT
 #if CONFIG_EXTENDED_WARP_PREDICTION
         row_offset, col_offset,
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-#if CONFIG_COMPLEXITY_SCALABLE_MVP
+#if CONFIG_MVP_IMPROVEMENT
         weight * len
 #else
         2 * len
@@ -1254,7 +1284,7 @@
   return has_tr;
 }
 
-#if CONFIG_C043_MVP_IMPROVEMENTS || CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_MVP_IMPROVEMENT || CONFIG_EXTENDED_WARP_PREDICTION
 static int has_bottom_left(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                            int mi_row, int mi_col, int n4_h) {
   const int sb_mi_size = mi_size_wide[cm->sb_size];
@@ -1284,7 +1314,7 @@
     return xd->is_mi_coded[av1_get_sdp_idx(xd->tree_type)][bl_offset];
   }
 }
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS || CONFIG_EXTENDED_WARP_PREDICTION
+#endif  // CONFIG_MVP_IMPROVEMENT || CONFIG_EXTENDED_WARP_PREDICTION
 #else
 static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                          int mi_row, int mi_col, int bs) {
@@ -1292,9 +1322,9 @@
   const int mask_row = mi_row & (sb_mi_size - 1);
   const int mask_col = mi_col & (sb_mi_size - 1);
 
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
   if (bs > mi_size_wide[BLOCK_64X64]) return 0;
-#endif  // !CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // !CONFIG_MVP_IMPROVEMENT
 
   // In a split partition all apart from the bottom right has a top right
   int has_tr = !((mask_row & bs) && (mask_col & bs));
@@ -1340,7 +1370,7 @@
   return has_tr;
 }
 
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
 static int has_bottom_left(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                            int mi_row, int mi_col, int bs) {
   const int sb_mi_size = mi_size_wide[cm->sb_size];
@@ -1396,10 +1426,10 @@
 
   return has_bl;
 }
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
-#if !CONFIG_C063_TMVP_IMPROVEMENT
+#if !CONFIG_MVP_IMPROVEMENT
 static int check_sb_border(const int mi_row, const int mi_col,
                            const int row_offset, const int col_offset) {
   const int sb_mi_size = mi_size_wide[BLOCK_64X64];
@@ -1412,7 +1442,7 @@
 
   return 1;
 }
-#endif  // !CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // !CONFIG_MVP_IMPROVEMENT
 
 static int add_tpl_ref_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
                           int mi_row, int mi_col, MV_REFERENCE_FRAME ref_frame,
@@ -1423,16 +1453,16 @@
 #endif  // !CONFIG_C076_INTER_MOD_CTX
                           ,
                           uint8_t *const refmv_count,
-#if CONFIG_C063_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
                           int *added_tmvp_cnt,
-#endif  // CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
                           CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE],
                           uint16_t ref_mv_weight[MAX_REF_MV_STACK_SIZE]
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                           ,
                           MV_REFERENCE_FRAME *ref_frame_idx0,
                           MV_REFERENCE_FRAME *ref_frame_idx1
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if !CONFIG_C076_INTER_MOD_CTX
                           ,
                           int16_t *mode_context
@@ -1469,9 +1499,17 @@
 #endif  // CONFIG_TIP
 
   const uint16_t weight_unit = 1;  // mi_size_wide[BLOCK_8X8];
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int cur_frame_index = cm->cur_frame->display_order_hint;
+#else
   const int cur_frame_index = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   const RefCntBuffer *const buf_0 = get_ref_frame_buf(cm, rf[0]);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int frame0_index = buf_0->display_order_hint;
+#else
   const int frame0_index = buf_0->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   const int cur_offset_0 = get_relative_dist(&cm->seq_params.order_hint_info,
                                              cur_frame_index, frame0_index);
   int idx;
@@ -1505,9 +1543,9 @@
     }
 #endif  // !CONFIG_C076_INTER_MOD_CTX
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     assert(!xd->mi[0]->skip_mode);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
     for (idx = 0; idx < *refmv_count; ++idx)
       if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
@@ -1520,16 +1558,23 @@
       ref_mv_stack[idx].row_offset = OFFSET_NONSPATIAL;
       ref_mv_stack[idx].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+      ref_mv_stack[idx].cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
       ref_mv_weight[idx] = 2 * weight_unit;
       ++(*refmv_count);
-#if CONFIG_C063_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
       ++(*added_tmvp_cnt);
-#endif  // CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
     }
   } else {
     // Process compound inter mode
     const RefCntBuffer *const buf_1 = get_ref_frame_buf(cm, rf[1]);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    const int frame1_index = buf_1->display_order_hint;
+#else
     const int frame1_index = buf_1->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
     const int cur_offset_1 = get_relative_dist(&cm->seq_params.order_hint_info,
                                                cur_frame_index, frame1_index);
     int_mv comp_refmv;
@@ -1554,7 +1599,7 @@
     }
 #endif  // !CONFIG_C076_INTER_MOD_CTX
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     if (xd->mi[0]->skip_mode) {
       for (idx = 0; idx < *refmv_count; ++idx) {
         if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int &&
@@ -1568,16 +1613,19 @@
       if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
         ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
         ref_mv_stack[idx].comp_mv.as_int = comp_refmv.as_int;
+#if CONFIG_CWP
+        ref_mv_stack[idx].cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
         ref_frame_idx0[idx] = rf[0];
         ref_frame_idx1[idx] = rf[1];
         ref_mv_weight[idx] = 2 * weight_unit;
         ++(*refmv_count);
-#if CONFIG_C063_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
         ++(*added_tmvp_cnt);
-#endif  // CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
       }
     } else {
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
       for (idx = 0; idx < *refmv_count; ++idx) {
         if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int &&
             comp_refmv.as_int == ref_mv_stack[idx].comp_mv.as_int)
@@ -1594,15 +1642,18 @@
         ref_mv_stack[idx].row_offset = OFFSET_NONSPATIAL;
         ref_mv_stack[idx].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+        ref_mv_stack[idx].cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
         ref_mv_weight[idx] = 2 * weight_unit;
         ++(*refmv_count);
-#if CONFIG_C063_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
         ++(*added_tmvp_cnt);
-#endif  // CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
       }
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
   }
 
   return 1;
@@ -1691,6 +1742,9 @@
         ref_mv_stack[stack_idx].row_offset = OFFSET_NONSPATIAL;
         ref_mv_stack[stack_idx].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+        ref_mv_stack[stack_idx].cwp_idx = candidate->cwp_idx;
+#endif  // CONFIG_CWP
 
         // TODO(jingning): Set an arbitrary small number here. The weight
         // doesn't matter as long as it is properly initialized.
@@ -1736,13 +1790,16 @@
   ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL;
   ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+  ref_mv_stack[*refmv_count].cwp_idx = cand_mv.cwp_idx;
+#endif  // CONFIG_CWP
   ++*refmv_count;
 
   return true;
 }
 #endif  // CONFIG_REF_MV_BANK
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
 // Add a BV candidate to ref MV stack without duplicate check
 static AOM_INLINE bool add_to_ref_bv_list(CANDIDATE_MV cand_mv,
                                           CANDIDATE_MV *ref_mv_stack,
@@ -1750,20 +1807,23 @@
                                           uint8_t *refmv_count) {
   ref_mv_stack[*refmv_count] = cand_mv;
   ref_mv_weight[*refmv_count] = REF_CAT_LEVEL;
+#if CONFIG_CWP
+  ref_mv_stack[*refmv_count].cwp_idx = cand_mv.cwp_idx;
+#endif  // CONFIG_CWP
   ++*refmv_count;
 
   return true;
 }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 static AOM_INLINE void setup_ref_mv_list(
     const AV1_COMMON *cm, const MACROBLOCKD *xd, MV_REFERENCE_FRAME ref_frame,
     uint8_t *const refmv_count,
     CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE],
     uint16_t ref_mv_weight[MAX_REF_MV_STACK_SIZE],
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
     int_mv mv_ref_list[MAX_MV_REF_CANDIDATES], int_mv *gm_mv_candidates,
     int mi_row, int mi_col
 #if !CONFIG_C076_INTER_MOD_CTX
@@ -1779,15 +1839,15 @@
 ) {
 #if CONFIG_EXT_RECUR_PARTITIONS
   const int has_tr = has_top_right(cm, xd, mi_row, mi_col, xd->width);
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   const int has_bl = has_bottom_left(cm, xd, mi_row, mi_col, xd->height);
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #else
   const int bs = AOMMAX(xd->width, xd->height);
   const int has_tr = has_top_right(cm, xd, mi_row, mi_col, bs);
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   const int has_bl = has_bottom_left(cm, xd, mi_row, mi_col, bs);
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
   MV_REFERENCE_FRAME rf[2];
 
@@ -1795,11 +1855,10 @@
   int max_row_offset = 0, max_col_offset = 0;
   const int row_adj = (xd->height < mi_size_high[BLOCK_8X8]) && (mi_row & 0x01);
   const int col_adj = (xd->width < mi_size_wide[BLOCK_8X8]) && (mi_col & 0x01);
-  // both CONFIG_SMVP_IMPROVEMENT and CONFIG_C043_MVP_IMPROVEMENTS are ture
-  // case, processed_rows does not needed
-#if !(CONFIG_SMVP_IMPROVEMENT && CONFIG_C043_MVP_IMPROVEMENTS)
+  // when CONFIG_MVP_IMPROVEMENT is true, processed_rows does not needed
+#if !CONFIG_MVP_IMPROVEMENT
   int processed_rows = 0;
-#endif  // !(CONFIG_SMVP_IMPROVEMENT && CONFIG_C043_MVP_IMPROVEMENTS)
+#endif  // !CONFIG_MVP_IMPROVEMENT
   int processed_cols = 0;
 
   av1_set_ref_frame(rf, ref_frame);
@@ -1812,16 +1871,42 @@
   for (int k = 0; k < MAX_REF_MV_STACK_SIZE; k++) {
     ref_mv_stack[k].row_offset = OFFSET_NONSPATIAL;
     ref_mv_stack[k].col_offset = OFFSET_NONSPATIAL;
+#if CONFIG_CWP
+    ref_mv_stack[k].cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
   }
 #endif
 
+#if CONFIG_WARP_REF_LIST && CONFIG_CWG_D067_IMPROVED_WARP
+  // derive a warp model from the 3 corner MVs
+  if (warp_param_stack && valid_num_warp_candidates &&
+      *valid_num_warp_candidates < max_num_of_warp_candidates) {
+    int mvs_32[2 * 3];
+    int pts[2 * 3];
+    int np = 0;
+    WarpedMotionParams cand_warp_param = default_warp_params;
+    const int valid_points =
+        generate_points_from_corners(xd, pts, mvs_32, &np, ref_frame);
+    const int valid_model =
+        get_model_from_corner_mvs(&cand_warp_param, pts, valid_points, mvs_32,
+                                  xd->mi[0]->sb_type[PLANE_TYPE_Y]);
+    if (valid_model && !cand_warp_param.invalid &&
+        !is_this_param_already_in_list(*valid_num_warp_candidates,
+                                       warp_param_stack, cand_warp_param)) {
+      insert_neighbor_warp_candidate(warp_param_stack, &cand_warp_param,
+                                     *valid_num_warp_candidates, PROJ_SPATIAL);
+      (*valid_num_warp_candidates)++;
+    }
+  }
+#endif  // CONFIG_WARP_REF_LIST && CONFIG_CWG_D067_IMPROVED_WARP
+
   // Find valid maximum row/col offset.
   if (xd->up_available) {
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
     max_row_offset = -(MVREF_ROWS << 1) + row_adj;
 #else
     max_row_offset = -(MVREF_ROW_COLS << 1) + row_adj;
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
     if (xd->height < mi_size_high[BLOCK_8X8])
       max_row_offset = -(2 << 1) + row_adj;
@@ -1830,11 +1915,11 @@
   }
 
   if (xd->left_available) {
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
     max_col_offset = -(MVREF_COLS << 1) + col_adj;
 #else
     max_col_offset = -(MVREF_ROW_COLS << 1) + col_adj;
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
     if (xd->width < mi_size_wide[BLOCK_8X8])
       max_col_offset = -(2 << 1) + col_adj;
@@ -1846,26 +1931,26 @@
   uint8_t row_match_count = 0;
   uint8_t newmv_count = 0;
 
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
   SINGLE_MV_CANDIDATE single_mv[MAX_REF_MV_STACK_SIZE];
   uint8_t single_mv_count = 0;
   CANDIDATE_MV derived_mv_stack[MAX_REF_MV_STACK_SIZE];
   uint16_t derived_mv_weight[MAX_REF_MV_STACK_SIZE];
   uint8_t derived_mv_count = 0;
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   if (xd->left_available) {
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, (xd->height - 1), -1,
                   ref_mv_stack, ref_mv_weight, &col_match_count, &newmv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -1878,13 +1963,13 @@
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, (xd->width - 1), ref_mv_stack,
                   ref_mv_weight, &row_match_count, &newmv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -1895,13 +1980,13 @@
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, 0, -1, ref_mv_stack,
                   ref_mv_weight, &col_match_count, &newmv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -1914,13 +1999,13 @@
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, 0, ref_mv_stack,
                   ref_mv_weight, &row_match_count, &newmv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -1931,13 +2016,13 @@
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, xd->height, -1, ref_mv_stack,
                   ref_mv_weight, &col_match_count, &newmv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -1948,13 +2033,13 @@
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, xd->width, ref_mv_stack,
                   ref_mv_weight, &row_match_count, &newmv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -1967,13 +2052,13 @@
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, -1, ref_mv_stack,
                   ref_mv_weight, &dummy_ref_match_count, &dummy_new_mv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -1984,13 +2069,13 @@
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, (xd->height >> 1), -1,
                   ref_mv_stack, ref_mv_weight, &col_match_count, &newmv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -2003,13 +2088,13 @@
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, (xd->width >> 1),
                   ref_mv_stack, ref_mv_weight, &row_match_count, &newmv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -2026,13 +2111,13 @@
                   mi_col, rf, -1, ref_mv_stack, ref_mv_weight, refmv_count,
                   &row_match_count, &newmv_count, gm_mv_candidates,
                   max_row_offset,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -2048,13 +2133,13 @@
                   rf, -1, ref_mv_stack, ref_mv_weight, refmv_count,
                   &col_match_count, &newmv_count, gm_mv_candidates,
                   max_col_offset,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
@@ -2066,19 +2151,19 @@
     scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, xd->width, ref_mv_stack,
                   ref_mv_weight, &row_match_count, &newmv_count,
                   gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                   ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                   1, single_mv, &single_mv_count, derived_mv_stack,
                   derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                   warp_param_stack, max_num_of_warp_candidates,
                   valid_num_warp_candidates, ref_frame,
 #endif  // CONFIG_WARP_REF_LIST
                   refmv_count);
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 #if !CONFIG_C076_INTER_MOD_CTX
   const uint8_t nearest_match = (row_match_count > 0) + (col_match_count > 0);
@@ -2098,13 +2183,13 @@
 #if !CONFIG_C076_INTER_MOD_CTX
     int is_available = 0;
 #endif  //! CONFIG_C076_INTER_MOD_CTX
-#if !CONFIG_C063_TMVP_IMPROVEMENT
+#if !CONFIG_MVP_IMPROVEMENT
     const int voffset = AOMMAX(mi_size_high[BLOCK_8X8], xd->height);
     const int hoffset = AOMMAX(mi_size_wide[BLOCK_8X8], xd->width);
-#endif  // !CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // !CONFIG_MVP_IMPROVEMENT
     const int blk_row_end = AOMMIN(xd->height, mi_size_high[BLOCK_64X64]);
     const int blk_col_end = AOMMIN(xd->width, mi_size_wide[BLOCK_64X64]);
-#if !CONFIG_C063_TMVP_IMPROVEMENT
+#if !CONFIG_MVP_IMPROVEMENT
     const int tpl_sample_pos[3][2] = {
       { voffset, -2 },
       { voffset, hoffset },
@@ -2114,7 +2199,7 @@
                                 (xd->height < mi_size_high[BLOCK_64X64]) &&
                                 (xd->width >= mi_size_wide[BLOCK_8X8]) &&
                                 (xd->width < mi_size_wide[BLOCK_64X64]);
-#endif  // !CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // !CONFIG_MVP_IMPROVEMENT
 
     const int step_h = (xd->height >= mi_size_high[BLOCK_64X64])
                            ? mi_size_high[BLOCK_16X16]
@@ -2123,11 +2208,11 @@
                            ? mi_size_wide[BLOCK_16X16]
                            : mi_size_wide[BLOCK_8X8];
 
-#if CONFIG_C063_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
     int added_tmvp_cnt = 0;
-#endif  // CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
-#if CONFIG_C063_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
     // Use reversed horizontal scan order to check TMVP candidates
     for (int blk_row = blk_row_end - step_h; blk_row >= 0; blk_row -= step_h) {
       for (int blk_col = blk_col_end - step_w; blk_col >= 0;
@@ -2136,7 +2221,7 @@
 #else
     for (int blk_row = 0; blk_row < blk_row_end; blk_row += step_h) {
       for (int blk_col = 0; blk_col < blk_col_end; blk_col += step_w) {
-#endif  // CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if !CONFIG_C076_INTER_MOD_CTX
         int ret =
 #endif  //! CONFIG_C076_INTER_MOD_CTX
@@ -2147,25 +2232,25 @@
 #endif  //! CONFIG_C076_INTER_MOD_CTX
                            ,
                            refmv_count,
-#if CONFIG_C063_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
                            &added_tmvp_cnt,
-#endif  // CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
                            ref_mv_stack, ref_mv_weight
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                            ,
                            ref_frame_idx0, ref_frame_idx1
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if !CONFIG_C076_INTER_MOD_CTX
                            ,
                            mode_context
 #endif  // !CONFIG_C076_INTER_MOD_CTX
             );
 #if !CONFIG_C076_INTER_MOD_CTX
-#if CONFIG_C063_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
         if (added_tmvp_cnt) is_available = ret;
 #else
         if (blk_row == 0 && blk_col == 0) is_available = ret;
-#endif  // CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #endif  //! CONFIG_C076_INTER_MOD_CTX
       }
     }
@@ -2173,7 +2258,7 @@
 #if !CONFIG_C076_INTER_MOD_CTX
     if (is_available == 0) mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
 #endif  //! CONFIG_C076_INTER_MOD_CTX
-#if !CONFIG_C063_TMVP_IMPROVEMENT
+#if !CONFIG_MVP_IMPROVEMENT
     for (int i = 0; i < 3 && allow_extension; ++i) {
       const int blk_row = tpl_sample_pos[i][0];
       const int blk_col = tpl_sample_pos[i][1];
@@ -2184,39 +2269,39 @@
                      gm_mv_candidates,
 #endif  //! CONFIG_C076_INTER_MOD_CTX
                      refmv_count, ref_mv_stack, ref_mv_weight,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                      ref_frame_idx0,
                      ref_frame_idx1
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if !CONFIG_C076_INTER_MOD_CTX
                          mode_context
 #endif  //! CONFIG_C076_INTER_MOD_CTX
       );
     }
-#endif  // !CONFIG_C063_TMVP_IMPROVEMENT
+#endif  // !CONFIG_MVP_IMPROVEMENT
   }
 
   uint8_t dummy_newmv_count = 0;
 
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
   // Scan the second outer area.
   scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, -1, ref_mv_stack, ref_mv_weight,
                 &row_match_count, &dummy_newmv_count, gm_mv_candidates,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                 ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-#if CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_MVP_IMPROVEMENT
                 0, single_mv, &single_mv_count, derived_mv_stack,
                 derived_mv_weight, &derived_mv_count,
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
                 warp_param_stack, max_num_of_warp_candidates,
                 valid_num_warp_candidates, ref_frame,
 #endif  // CONFIG_WARP_REF_LIST
                 refmv_count);
-#endif  // !CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // !CONFIG_MVP_IMPROVEMENT
 
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
   for (int idx = 2; idx <= MVREF_COLS; ++idx) {
     const int col_offset = -(idx << 1) + 1 + col_adj;
     if (abs(col_offset) <= abs(max_col_offset) &&
@@ -2228,9 +2313,9 @@
                     rf, col_offset, ref_mv_stack, ref_mv_weight, refmv_count,
                     &col_match_count, &dummy_newmv_count, gm_mv_candidates,
                     max_col_offset,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                     ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
                     0, single_mv, &single_mv_count, derived_mv_stack,
                     derived_mv_weight, &derived_mv_count,
 #if CONFIG_WARP_REF_LIST
@@ -2254,9 +2339,9 @@
                     mi_col, rf, row_offset, ref_mv_stack, ref_mv_weight,
                     refmv_count, &row_match_count, &dummy_newmv_count,
                     gm_mv_candidates, max_row_offset,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                     ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if CONFIG_WARP_REF_LIST
                     warp_param_stack, max_num_of_warp_candidates,
                     valid_num_warp_candidates, ref_frame,
@@ -2273,9 +2358,9 @@
                     rf, col_offset, ref_mv_stack, ref_mv_weight, refmv_count,
                     &col_match_count, &dummy_newmv_count, gm_mv_candidates,
                     max_col_offset,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                     ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if CONFIG_WARP_REF_LIST
                     warp_param_stack, max_num_of_warp_candidates,
                     valid_num_warp_candidates, ref_frame,
@@ -2283,10 +2368,10 @@
 
                     &processed_cols);
   }
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 #if !CONFIG_C076_INTER_MOD_CTX
-#if CONFIG_COMPLEXITY_SCALABLE_MVP
+#if CONFIG_MVP_IMPROVEMENT
   // These contexts are independent of the outer area search
   int new_ctx = 2 * nearest_match + (newmv_count > 0);
   int ref_ctx = 2 * nearest_match + (newmv_count < 3);
@@ -2335,7 +2420,7 @@
         ref_mv_stack[idx] = tmp_mv;
         ref_mv_weight[idx - 1] = ref_mv_weight[idx];
         ref_mv_weight[idx] = tmp_ref_mv_weight;
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         if (xd->mi[0]->skip_mode) {
           const MV_REFERENCE_FRAME temp_ref0 = ref_frame_idx0[idx - 1];
           const MV_REFERENCE_FRAME temp_ref1 = ref_frame_idx1[idx - 1];
@@ -2345,14 +2430,14 @@
           ref_frame_idx1[idx - 1] = ref_frame_idx1[idx];
           ref_frame_idx1[idx] = temp_ref1;
         }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
         nr_len = idx;
       }
     }
     len = nr_len;
   }
 
-#if !CONFIG_COMPLEXITY_SCALABLE_MVP
+#if !CONFIG_MVP_IMPROVEMENT
   len = *refmv_count;
   while (len > nearest_refmv_count) {
     int nr_len = nearest_refmv_count;
@@ -2371,15 +2456,16 @@
   }
 #endif
 
-#if (CONFIG_REF_MV_BANK && CONFIG_C043_MVP_IMPROVEMENTS)
+#if (CONFIG_REF_MV_BANK && CONFIG_MVP_IMPROVEMENT)
   if (cm->seq_params.enable_refmvbank) {
     const int ref_mv_limit =
         AOMMIN(cm->features.max_drl_bits + 1, MAX_REF_MV_STACK_SIZE);
+
     // If open slots are available, fetch reference MVs from the ref mv banks.
     if (*refmv_count < ref_mv_limit
-#if !CONFIG_BVP_IMPROVEMENT
+#if !CONFIG_IBC_BV_IMPROVEMENT
         && ref_frame != INTRA_FRAME
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
     ) {
       const REF_MV_BANK *ref_mv_bank = &xd->ref_mv_bank;
       const CANDIDATE_MV *queue = ref_mv_bank->rmb_buffer[ref_frame];
@@ -2393,40 +2479,40 @@
            ++idx_bank) {
         const int idx = (start_idx + count - 1 - idx_bank) % REF_MV_BANK_SIZE;
         const CANDIDATE_MV cand_mv = queue[idx];
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         bool rmb_candi_exist =
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
             check_rmb_cand(cand_mv, ref_mv_stack, ref_mv_weight, refmv_count,
                            is_comp, xd->mi_row, xd->mi_col, block_width,
                            block_height, cm->width, cm->height);
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         if (xd->mi[0]->skip_mode && rmb_candi_exist) {
           ref_frame_idx0[*refmv_count - 1] = rf[0];
           ref_frame_idx1[*refmv_count - 1] = rf[1];
         }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
       }
     }
   }
-#endif  // (CONFIG_REF_MV_BANK && CONFIG_C043_MVP_IMPROVEMENTS)
+#endif  // (CONFIG_REF_MV_BANK && CONFIG_MVP_IMPROVEMENT)
 
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
   const int max_ref_mv_count =
       AOMMIN(cm->features.max_drl_bits + 1, MAX_REF_MV_STACK_SIZE);
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   if (xd->mi[0]->skip_mode) derived_mv_count = 0;
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
   if (*refmv_count < max_ref_mv_count && derived_mv_count > 0) {
     fill_mvp_from_derived_smvp(rf, ref_mv_stack, ref_mv_weight, refmv_count,
                                derived_mv_stack, derived_mv_count,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
                                xd->mi[0], ref_frame_idx0, ref_frame_idx1,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
                                max_ref_mv_count);
   }
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
   int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->width);
   mi_width = AOMMIN(mi_width, cm->mi_params.mi_cols - mi_col);
@@ -2495,6 +2581,9 @@
           ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL;
           ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+          ref_mv_stack[*refmv_count].cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
         } else {
           ref_mv_stack[*refmv_count].this_mv = comp_list[0][0];
           ref_mv_stack[*refmv_count].comp_mv = comp_list[0][1];
@@ -2502,13 +2591,16 @@
           ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL;
           ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+          ref_mv_stack[*refmv_count].cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
         }
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         if (xd->mi[0]->skip_mode) {
           ref_frame_idx0[*refmv_count] = rf[0];
           ref_frame_idx1[*refmv_count] = rf[1];
         }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
         ref_mv_weight[*refmv_count] = 2;
         ++*refmv_count;
       } else {
@@ -2519,12 +2611,15 @@
           ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL;
           ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_CWP
+          ref_mv_stack[*refmv_count].cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
+#if CONFIG_SKIP_MODE_ENHANCEMENT
           if (xd->mi[0]->skip_mode) {
             ref_frame_idx0[*refmv_count] = rf[0];
             ref_frame_idx1[*refmv_count] = rf[1];
           }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
           ref_mv_weight[*refmv_count] = 2;
           ++*refmv_count;
         }
@@ -2541,9 +2636,9 @@
     }
   } else {
     // Handle single reference frame extension
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     assert(!xd->mi[0]->skip_mode);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if CONFIG_IBC_SR_EXT
     if (!xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART]) {
 #endif  // CONFIG_IBC_SR_EXT
@@ -2597,9 +2692,9 @@
     // If there is extra space in the stack, copy the GLOBALMV vector into it.
     // This also guarantees the existence of at least one vector to search.
     if (*refmv_count < MAX_REF_MV_STACK_SIZE
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
         && !xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART]
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
     ) {
       int stack_idx;
       for (stack_idx = 0; stack_idx < *refmv_count; ++stack_idx) {
@@ -2613,20 +2708,23 @@
         ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL;
         ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+        ref_mv_stack[*refmv_count].cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
         ref_mv_weight[*refmv_count] = REF_CAT_LEVEL;
         (*refmv_count)++;
       }
     }
   }
-#if CONFIG_REF_MV_BANK && !CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_REF_MV_BANK && !CONFIG_MVP_IMPROVEMENT
   if (!cm->seq_params.enable_refmvbank) return;
   const int ref_mv_limit =
       AOMMIN(cm->features.max_drl_bits + 1, MAX_REF_MV_STACK_SIZE);
   // If open slots are available, fetch reference MVs from the ref mv banks.
   if (*refmv_count < ref_mv_limit
-#if !CONFIG_BVP_IMPROVEMENT
+#if !CONFIG_IBC_BV_IMPROVEMENT
       && ref_frame != INTRA_FRAME
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
   ) {
     const REF_MV_BANK *ref_mv_bank = xd->ref_mv_bank_pt;
     const CANDIDATE_MV *queue = ref_mv_bank->rmb_buffer[ref_frame];
@@ -2640,21 +2738,21 @@
          ++idx_bank) {
       const int idx = (start_idx + count - 1 - idx_bank) % REF_MV_BANK_SIZE;
       const CANDIDATE_MV cand_mv = queue[idx];
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
       bool rmb_candi_exist =
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
           check_rmb_cand(cand_mv, ref_mv_stack, ref_mv_weight, refmv_count,
                          is_comp, xd->mi_row, xd->mi_col, block_width,
                          block_height, cm->width, cm->height);
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
       if (xd->mi[0]->skip_mode && rmb_candi_exist) {
         ref_frame_idx0[*refmv_count - 1] = rf[0];
         ref_frame_idx1[*refmv_count - 1] = rf[1];
       }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
     }
   }
-#endif  // CONFIG_REF_MV_BANK && !CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_REF_MV_BANK && !CONFIG_MVP_IMPROVEMENT
 
 #if CONFIG_WARP_REF_LIST
   if (warp_param_stack && valid_num_warp_candidates &&
@@ -2715,7 +2813,7 @@
 
 #endif  // CONFIG_WARP_REF_LIST
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   // If there are open slots in reference BV candidate list
   // fetch reference BVs from the default BVPs
   if (xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART]) {
@@ -2740,7 +2838,7 @@
       add_to_ref_bv_list(tmp_mv, ref_mv_stack, ref_mv_weight, refmv_count);
     }
   }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 }
 
 #if CONFIG_WARP_REF_LIST
@@ -2870,6 +2968,10 @@
   bool derive_wrl = (warp_param_stack && valid_num_warp_candidates &&
                      max_num_of_warp_candidates);
   derive_wrl &= (ref_frame < INTER_REFS_PER_FRAME);
+#if CONFIG_SEP_COMP_DRL
+  if (has_second_drl(mi)) derive_wrl = 0;
+#endif  // CONFIG_SEP_COMP_DRL
+
   derive_wrl &= is_motion_variation_allowed_bsize(mi->sb_type[PLANE_TYPE_Y],
                                                   mi_row, mi_col);
   if (derive_wrl && valid_num_warp_candidates) {
@@ -2878,7 +2980,7 @@
   }
 #endif  // CONFIG_WARP_REF_LIST
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   if (mi->skip_mode) {
     SKIP_MODE_MVP_LIST *skip_list =
         (SKIP_MODE_MVP_LIST *)&(xd->skip_mvp_candidate_list);
@@ -2896,6 +2998,73 @@
 #endif  // CONFIG_WARP_REF_LIST
     );
   } else {
+#if CONFIG_SEP_COMP_DRL
+    MV_REFERENCE_FRAME rf[2];
+    av1_set_ref_frame(rf, ref_frame);
+    if (!has_second_drl(mi))
+      rf[0] = ref_frame;
+    else {
+      const BLOCK_SIZE bsize = mi->sb_type[PLANE_TYPE_Y];
+#if CONFIG_FLEX_MVRES
+      const int fr_mv_precision = cm->features.fr_mv_precision;
+      gm_mv[0] = get_warp_motion_vector(xd, &cm->global_motion[rf[0]],
+                                        fr_mv_precision, bsize, mi_col, mi_row);
+#else
+      gm_mv[0] = get_warp_motion_vector(xd, &cm->global_motion[ref_frame],
+                                        allow_high_precision_mv, bsize, mi_col,
+                                        mi_row, force_integer_mv);
+#endif
+      gm_mv[1].as_int = 0;
+    }
+    setup_ref_mv_list(cm, xd, rf[0], &ref_mv_count[rf[0]], ref_mv_stack[rf[0]],
+                      ref_mv_weight[rf[0]], NULL, NULL,
+                      mv_ref_list ? mv_ref_list[rf[0]] : NULL, gm_mv, mi_row,
+                      mi_col
+#if !CONFIG_C076_INTER_MOD_CTX
+                      ,
+                      mode_context
+#endif  //! CONFIG_C076_INTER_MOD_CTX
+#if CONFIG_WARP_REF_LIST
+                      ,
+                      derive_wrl ? warp_param_stack[rf[0]] : NULL,
+                      derive_wrl ? max_num_of_warp_candidates : 0,
+                      derive_wrl ? &valid_num_warp_candidates[rf[0]] : NULL
+#endif  // CONFIG_WARP_REF_LIST
+    );
+
+    if (has_second_drl(mi)) {
+      assert(rf[0] == mi->ref_frame[0]);
+      assert(rf[1] == mi->ref_frame[1]);
+      const BLOCK_SIZE bsize = mi->sb_type[PLANE_TYPE_Y];
+#if CONFIG_FLEX_MVRES
+      const int fr_mv_precision = cm->features.fr_mv_precision;
+      gm_mv[0] = get_warp_motion_vector(xd, &cm->global_motion[rf[1]],
+                                        fr_mv_precision, bsize, mi_col, mi_row);
+#else
+      gm_mv[0] = get_warp_motion_vector(xd, &cm->global_motion[ref_frame],
+                                        allow_high_precision_mv, bsize, mi_col,
+                                        mi_row, force_integer_mv);
+#endif
+      gm_mv[1].as_int = 0;
+
+      setup_ref_mv_list(cm, xd, rf[1], &ref_mv_count[rf[1]],
+                        ref_mv_stack[rf[1]], ref_mv_weight[rf[1]], NULL, NULL,
+                        mv_ref_list ? mv_ref_list[rf[1]] : NULL, gm_mv, mi_row,
+                        mi_col
+#if !CONFIG_C076_INTER_MOD_CTX
+                        ,
+                        mode_context
+#endif  //! CONFIG_C076_INTER_MOD_CTX
+#if CONFIG_WARP_REF_LIST
+                        ,
+                        derive_wrl ? warp_param_stack[rf[1]] : NULL,
+                        derive_wrl ? max_num_of_warp_candidates : 0,
+                        derive_wrl ? &valid_num_warp_candidates[rf[1]] : NULL
+#endif  // CONFIG_WARP_REF_LIST
+      );
+    }
+    if (derive_wrl) assert(rf[0] == ref_frame);
+#else
     setup_ref_mv_list(cm, xd, ref_frame, &ref_mv_count[ref_frame],
                       ref_mv_stack[ref_frame], ref_mv_weight[ref_frame], NULL,
                       NULL, mv_ref_list ? mv_ref_list[ref_frame] : NULL, gm_mv,
@@ -2912,6 +3081,7 @@
 #endif  // CONFIG_WARP_REF_LIST
 
     );
+#endif  // CONFIG_SEP_COMP_DRL
   }
 #else
   setup_ref_mv_list(cm, xd, ref_frame, &ref_mv_count[ref_frame],
@@ -2929,7 +3099,7 @@
                     derive_wrl ? &valid_num_warp_candidates[ref_frame] : NULL
 #endif  // CONFIG_WARP_REF_LIST
   );
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 }
 
 #if CONFIG_FLEX_MVRES
@@ -3018,6 +3188,57 @@
 }
 #endif  // !CONFIG_TIP
 
+#if CONFIG_MF_IMPROVEMENT
+// Get the temporal distance of start_frame to its closest ref frame
+// that has interpolation property relative to current frame. Interpolation
+// means start_frame and its ref frame are on two sides of current frame
+static INLINE int get_dist_to_closest_interp_ref(const AV1_COMMON *const cm,
+                                                 MV_REFERENCE_FRAME start_frame,
+                                                 const int find_forward_ref) {
+  if (start_frame == -1) return INT_MAX;
+  const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
+
+  const RefCntBuffer *const start_frame_buf =
+      get_ref_frame_buf(cm, start_frame);
+
+  if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return INT_MAX;
+
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int start_frame_order_hint = start_frame_buf->display_order_hint;
+  const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
+  const int start_frame_order_hint = start_frame_buf->order_hint;
+  const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  int abs_closest_ref_offset = INT_MAX;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int *const ref_order_hints =
+      &start_frame_buf->ref_display_order_hint[0];
+#else
+  const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0];
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  for (MV_REFERENCE_FRAME ref = 0; ref < INTER_REFS_PER_FRAME; ++ref) {
+    if (ref_order_hints[ref] != -1) {
+      const int start_to_ref_offset = get_relative_dist(
+          order_hint_info, start_frame_order_hint, ref_order_hints[ref]);
+      const int cur_to_ref_offset = get_relative_dist(
+          order_hint_info, cur_order_hint, ref_order_hints[ref]);
+      const int abs_start_to_ref_offset = abs(start_to_ref_offset);
+      const int is_two_sides =
+          (start_to_ref_offset > 0 && cur_to_ref_offset > 0 &&
+           find_forward_ref == 1) ||
+          (start_to_ref_offset < 0 && cur_to_ref_offset < 0 &&
+           find_forward_ref == 0);
+      if (is_two_sides && abs_start_to_ref_offset < abs_closest_ref_offset) {
+        abs_closest_ref_offset = abs_start_to_ref_offset;
+      }
+    }
+  }
+
+  return abs_closest_ref_offset;
+}
+#endif  // CONFIG_MF_IMPROVEMENT
+
 #if CONFIG_TIP
 // Note: motion_filed_projection finds motion vectors of current frame's
 // reference frame, and projects them to current frame. To make it clear,
@@ -3035,8 +3256,13 @@
       get_ref_frame_buf(cm, start_frame);
   if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int start_frame_order_hint = start_frame_buf->display_order_hint;
+  const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
   const int start_frame_order_hint = start_frame_buf->order_hint;
   const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int start_to_current_frame_offset = get_relative_dist(
       &cm->seq_params.order_hint_info, start_frame_order_hint, cur_order_hint);
 
@@ -3053,7 +3279,12 @@
   assert(start_frame_buf->width == cm->width &&
          start_frame_buf->height == cm->height);
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int *const ref_order_hints =
+      &start_frame_buf->ref_display_order_hint[0];
+#else
   const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0];
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) {
     if (ref_order_hints[rf] != -1) {
       ref_offset[rf] =
@@ -3126,8 +3357,13 @@
       get_ref_frame_buf(cm, start_frame);
   if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int start_frame_order_hint = start_frame_buf->display_order_hint;
+  const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
   const int start_frame_order_hint = start_frame_buf->order_hint;
   const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int start_to_current_frame_offset = get_relative_dist(
       &cm->seq_params.order_hint_info, start_frame_order_hint, cur_order_hint);
 
@@ -3143,7 +3379,12 @@
   assert(start_frame_buf->width == cm->width &&
          start_frame_buf->height == cm->height);
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int *const ref_order_hints =
+      &start_frame_buf->ref_display_order_hint[0];
+#else
   const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0];
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) {
     if (ref_order_hints[rf] != -1) {
       ref_offset[rf] =
@@ -3206,7 +3447,7 @@
 // Call Start frame's reference frames as reference frames.
 // Call ref_offset as frame distances between start frame and its reference
 // frames.
-#if CONFIG_TMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
 static int motion_field_projection_bwd(AV1_COMMON *cm,
                                        MV_REFERENCE_FRAME start_frame, int dir,
                                        int overwrite_mv) {
@@ -3217,15 +3458,25 @@
       get_ref_frame_buf(cm, start_frame);
   if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int start_frame_order_hint = start_frame_buf->display_order_hint;
+  const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
   const int start_frame_order_hint = start_frame_buf->order_hint;
   const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int start_to_current_frame_offset = get_relative_dist(
       &cm->seq_params.order_hint_info, start_frame_order_hint, cur_order_hint);
 
   assert(start_frame_buf->width == cm->width &&
          start_frame_buf->height == cm->height);
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int *const ref_order_hints =
+      &start_frame_buf->ref_display_order_hint[0];
+#else
   const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0];
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) {
     if (ref_order_hints[rf] != -1)
       ref_offset[rf] =
@@ -3280,7 +3531,7 @@
 
   return 1;
 }
-#endif  // CONFIG_TMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 static int motion_field_projection(AV1_COMMON *cm,
                                    MV_REFERENCE_FRAME start_frame, int dir,
@@ -3292,15 +3543,25 @@
       get_ref_frame_buf(cm, start_frame);
   if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int start_frame_order_hint = start_frame_buf->display_order_hint;
+  const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
   const int start_frame_order_hint = start_frame_buf->order_hint;
   const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int start_to_current_frame_offset = get_relative_dist(
       &cm->seq_params.order_hint_info, start_frame_order_hint, cur_order_hint);
 
   assert(start_frame_buf->width == cm->width &&
          start_frame_buf->height == cm->height);
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int *const ref_order_hints =
+      &start_frame_buf->ref_display_order_hint[0];
+#else
   const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0];
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) {
     if (ref_order_hints[rf] != -1)
       ref_offset[rf] =
@@ -3360,6 +3621,16 @@
   if (!order_hint_info->enable_order_hint) return -1;
   const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
   if (buf == NULL) return -1;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int ref_order_hint = buf->display_order_hint;
+  for (int r = 0; r < INTER_REFS_PER_FRAME; ++r) {
+    if (buf->ref_display_order_hint[r] == -1) continue;
+    const int ref_ref_order_hint = buf->ref_display_order_hint[r];
+    if (get_relative_dist(order_hint_info, ref_order_hint,
+                          ref_ref_order_hint) == 0)
+      return 1;
+  }
+#else
   const int ref_order_hint = buf->order_hint;
   for (int r = 0; r < INTER_REFS_PER_FRAME; ++r) {
     if (buf->ref_order_hints[r] == -1) continue;
@@ -3368,6 +3639,7 @@
                           ref_ref_order_hint) == 0)
       return 1;
   }
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   return 0;
 }
 
@@ -3405,33 +3677,51 @@
     cm->ref_frame_side[ref_frame] = 0;
     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
     ref_buf[ref_frame] = buf;
-#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    const int relative_dist =
+        get_relative_dist(order_hint_info, buf->display_order_hint,
+                          cm->cur_frame->display_order_hint);
+#else
     const int relative_dist = get_relative_dist(
         order_hint_info, buf->order_hint, cm->cur_frame->order_hint);
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
     cm->ref_frame_relative_dist[ref_frame] = abs(relative_dist);
-#endif  // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#endif  // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
   }
   for (int index = 0; index < cm->ref_frames_info.num_future_refs; index++) {
     const int ref_frame = cm->ref_frames_info.future_refs[index];
     cm->ref_frame_side[ref_frame] = 1;
     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
     ref_buf[ref_frame] = buf;
-#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    const int relative_dist =
+        get_relative_dist(order_hint_info, buf->display_order_hint,
+                          cm->cur_frame->display_order_hint);
+#else
     const int relative_dist = get_relative_dist(
         order_hint_info, buf->order_hint, cm->cur_frame->order_hint);
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
     cm->ref_frame_relative_dist[ref_frame] = abs(relative_dist);
-#endif  // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#endif  // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
   }
   for (int index = 0; index < cm->ref_frames_info.num_cur_refs; index++) {
     const int ref_frame = cm->ref_frames_info.cur_refs[index];
     cm->ref_frame_side[ref_frame] = -1;
     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
     ref_buf[ref_frame] = buf;
-#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    const int relative_dist =
+        get_relative_dist(order_hint_info, buf->display_order_hint,
+                          cm->cur_frame->display_order_hint);
+#else
     const int relative_dist = get_relative_dist(
         order_hint_info, buf->order_hint, cm->cur_frame->order_hint);
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
     cm->ref_frame_relative_dist[ref_frame] = abs(relative_dist);
-#endif  // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#endif  // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
   }
 
 #if CONFIG_TIP
@@ -3468,7 +3758,51 @@
       closest_ref[dir][1] = ref_frame;
     }
   }
-#if CONFIG_TMVP_IMPROVEMENT || CONFIG_TIP
+#if CONFIG_MVP_IMPROVEMENT || CONFIG_TIP
+#if CONFIG_MF_IMPROVEMENT
+  // Do projection on group 0 (closest past (backward MV), closest future),
+  // group 1(second closest future, second closest past (backward MV)),
+  // closest past (forward MV), and then second closest past (forward MVs),
+  // without overwriting the MVs.
+  // The projection order of the ref frames in group 0 and group 1 depends
+  // on the ref frame to its own first ref frame that has interpolation
+  // property relative to current frame. Interpolation means two frames are on
+  // two sides of current frame
+  for (int group_idx = 0; group_idx < 2; ++group_idx) {
+    const int past_ref_to_its_ref_dist =
+        get_dist_to_closest_interp_ref(cm, closest_ref[0][group_idx], 0);
+    const int future_ref_to_its_ref_dist =
+        get_dist_to_closest_interp_ref(cm, closest_ref[1][group_idx], 1);
+    if (future_ref_to_its_ref_dist < past_ref_to_its_ref_dist) {
+      if (closest_ref[1][group_idx] != -1 && n_refs_used < MFMV_STACK_SIZE) {
+        n_refs_used +=
+            motion_field_projection(cm, closest_ref[1][group_idx], 0, 0);
+      }
+
+      if (closest_ref[0][group_idx] != -1 && n_refs_used < MFMV_STACK_SIZE) {
+        n_refs_used +=
+            motion_field_projection_bwd(cm, closest_ref[0][group_idx], 2, 0);
+      }
+    } else {
+      if (closest_ref[0][group_idx] != -1 && n_refs_used < MFMV_STACK_SIZE) {
+        n_refs_used +=
+            motion_field_projection_bwd(cm, closest_ref[0][group_idx], 2, 0);
+      }
+      if (closest_ref[1][group_idx] != -1 && n_refs_used < MFMV_STACK_SIZE) {
+        n_refs_used +=
+            motion_field_projection(cm, closest_ref[1][group_idx], 0, 0);
+      }
+    }
+  }
+
+  if (closest_ref[0][0] != -1 && n_refs_used < MFMV_STACK_SIZE) {
+    n_refs_used += motion_field_projection(cm, closest_ref[0][0], 2, 0);
+  }
+
+  if (closest_ref[0][1] != -1 && n_refs_used < MFMV_STACK_SIZE) {
+    motion_field_projection(cm, closest_ref[0][1], 2, 0);
+  }
+#else
   // Do projection on closest past (backward MV), closest future, second
   // closest future, second closest past (backward MV), closest path (forward
   // MV), and then second closest past (forward MVs), without overwriting
@@ -3496,6 +3830,7 @@
   if (closest_ref[0][1] != -1 && n_refs_used < MFMV_STACK_SIZE) {
     motion_field_projection(cm, closest_ref[0][1], 2, 0);
   }
+#endif  // CONFIG_MF_IMPROVEMENT
 #else
   // Do projection on closest past and future refs if they exist
   if (closest_ref[0][0] != -1) {
@@ -3516,24 +3851,32 @@
     const int ret = motion_field_projection(cm, closest_ref[0][1], 2, 1);
     n_refs_used += ret;
   }
-#endif  // CONFIG_TMVP_IMPROVEMENT || CONFIG_TIP
+#endif  // CONFIG_MVP_IMPROVEMENT || CONFIG_TIP
 }
 
-#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
 void av1_setup_ref_frame_sides(AV1_COMMON *cm) {
   const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
 
   memset(cm->ref_frame_side, 0, sizeof(cm->ref_frame_side));
   if (!order_hint_info->enable_order_hint) return;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
   const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
 
   for (int ref_frame = 0; ref_frame < cm->ref_frames_info.num_total_refs;
        ref_frame++) {
     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
     int order_hint = 0;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    if (buf != NULL) order_hint = buf->display_order_hint;
+#else
     if (buf != NULL) order_hint = buf->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
     const int relative_dist =
         get_relative_dist(order_hint_info, order_hint, cur_order_hint);
     if (relative_dist > 0) {
@@ -3544,7 +3887,7 @@
     cm->ref_frame_relative_dist[ref_frame] = abs(relative_dist);
   }
 }
-#endif  // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD
+#endif  // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD
 
 static INLINE void record_samples(const MB_MODE_INFO *mbmi,
 #if CONFIG_COMPOUND_WARP_SAMPLES
@@ -3847,7 +4190,11 @@
     skip_mode_info->ref_frame_idx_0 = 0;
   }
 #else
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int cur_order_hint = cm->current_frame.display_order_hint;
+#else
   const int cur_order_hint = cm->current_frame.order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int ref_order_hints[2] = { -1, INT_MAX };
   int ref_idx[2] = { INVALID_IDX, INVALID_IDX };
 
@@ -3856,7 +4203,11 @@
     const RefCntBuffer *const buf = get_ref_frame_buf(cm, i);
     if (buf == NULL) continue;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    const int ref_order_hint = buf->display_order_hint;
+#else
     const int ref_order_hint = buf->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
     if (get_relative_dist(order_hint_info, ref_order_hint, cur_order_hint) <
         0) {
       // Forward reference
@@ -3896,7 +4247,11 @@
       const RefCntBuffer *const buf = get_ref_frame_buf(cm, i);
       if (buf == NULL) continue;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      const int ref_order_hint = buf->display_order_hint;
+#else
       const int ref_order_hint = buf->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
       if ((ref_order_hints[0] != -1 &&
            get_relative_dist(order_hint_info, ref_order_hint,
                              ref_order_hints[0]) < 0) &&
@@ -3966,6 +4321,9 @@
   const int idx = (start_idx + count) % REF_MV_BANK_SIZE;
   queue[idx].this_mv = mbmi->mv[0];
   if (is_comp) queue[idx].comp_mv = mbmi->mv[1];
+#if CONFIG_CWP
+  queue[idx].cwp_idx = mbmi->cwp_idx;
+#endif  // CONFIG_CWP
   if (count < REF_MV_BANK_SIZE) {
     ++ref_mv_bank->rmb_count[ref_frame];
   } else {
@@ -4011,8 +4369,8 @@
           clamp(mv_row, MV_LOW + 1, MV_UPP - 1);
       submi[mi_y * mi_stride + mi_x]->mv[0].as_mv.col =
           clamp(mv_col, MV_LOW + 1, MV_UPP - 1);
-      span_submv(cm, (submi + mi_y * mi_stride + mi_x), mi_row, mi_col,
-                 BLOCK_8X8);
+      span_submv(cm, (submi + mi_y * mi_stride + mi_x), mi_row + mi_y,
+                 mi_col + mi_x, BLOCK_8X8);
     }
   }
 }
@@ -4136,7 +4494,14 @@
          num_wrl_cand * sizeof(wrl_list[0]));
   if (p_valid_num_candidates) {
     // for NEARMV mode, the maximum number of candidates is 1
-    *p_valid_num_candidates = (mbmi->mode == NEARMV) ? 1 : num_wrl_cand;
+    *p_valid_num_candidates = (mbmi->mode == NEARMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                               || mbmi->mode == AMVDNEWMV
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
+                               )
+                                  ? 1
+                                  : num_wrl_cand;
   }
 }
 
@@ -4472,3 +4837,98 @@
   return ctx;
 }
 #endif  // CONFIG_WARPMV
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
+// return 1 if valid point is found
+// return 0 if the point is not valid
+static int fill_warp_corner_projected_point(const MB_MODE_INFO *neighbor_mi,
+                                            MV_REFERENCE_FRAME this_ref,
+                                            const int pos_col,
+                                            const int pos_row, int *pts,
+                                            int *mvs, int *n_points) {
+  // return if the source point is invalid
+  if (pos_col < 0 || pos_row < 0) return 0;
+
+  if (!is_inter_ref_frame(neighbor_mi->ref_frame[0])) return 0;
+  if (neighbor_mi->ref_frame[0] != this_ref) return 0;
+  int mv_row;
+  int mv_col;
+  if (is_warp_mode(neighbor_mi->motion_mode)) {
+    int_mv warp_mv =
+        get_warp_motion_vector_xy_pos(&neighbor_mi->wm_params[0], pos_col,
+                                      pos_row, MV_PRECISION_ONE_EIGHTH_PEL);
+    mv_row = warp_mv.as_mv.row;
+    mv_col = warp_mv.as_mv.col;
+  } else {
+    mv_row = neighbor_mi->mv[0].as_mv.row;
+    mv_col = neighbor_mi->mv[0].as_mv.col;
+  }
+  pts[2 * (*n_points)] = pos_col;
+  pts[2 * (*n_points) + 1] = pos_row;
+  mvs[2 * (*n_points)] = mv_col;
+  mvs[2 * (*n_points) + 1] = mv_row;
+  ++(*n_points);
+  return 1;
+}
+// Check all 3 neighbors to generate projected points
+int generate_points_from_corners(const MACROBLOCKD *xd, int *pts, int *mvs,
+                                 int *np, MV_REFERENCE_FRAME ref_frame) {
+  const TileInfo *const tile = &xd->tile;
+  POSITION mi_pos;
+  int valid_points = 0;
+  MV_REFERENCE_FRAME rf[2];
+  av1_set_ref_frame(rf, ref_frame);
+  MV_REFERENCE_FRAME this_ref = rf[0];
+  const int bw = xd->width * MI_SIZE;
+  const int bh = xd->height * MI_SIZE;
+
+  // top-left
+  mi_pos.row = -1;
+  mi_pos.col = -1;
+  if (is_inside(tile, xd->mi_col, xd->mi_row, &mi_pos) && xd->up_available &&
+      xd->left_available) {
+    const MB_MODE_INFO *neighbor_mi =
+        xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+    int pos_row = xd->mi_row * MI_SIZE;
+    int pos_col = xd->mi_col * MI_SIZE;
+    int valid = fill_warp_corner_projected_point(neighbor_mi, this_ref, pos_col,
+                                                 pos_row, pts, mvs, np);
+    if (valid) {
+      valid_points++;
+    }
+  }
+
+  // top-right
+  mi_pos.row = -1;
+  mi_pos.col = xd->width - 1;
+  if (is_inside(tile, xd->mi_col, xd->mi_row, &mi_pos) && xd->up_available) {
+    const MB_MODE_INFO *neighbor_mi =
+        xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+    int pos_row = xd->mi_row * MI_SIZE;
+    int pos_col = xd->mi_col * MI_SIZE + bw;
+    int valid = fill_warp_corner_projected_point(neighbor_mi, this_ref, pos_col,
+                                                 pos_row, pts, mvs, np);
+    if (valid) {
+      valid_points++;
+    }
+  }
+
+  // bottom-left
+  mi_pos.row = xd->height - 1;
+  mi_pos.col = -1;
+  if (is_inside(tile, xd->mi_col, xd->mi_row, &mi_pos) && xd->left_available) {
+    const MB_MODE_INFO *neighbor_mi =
+        xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+    int pos_row = xd->mi_row * MI_SIZE + bh;
+    int pos_col = xd->mi_col * MI_SIZE;
+    int valid = fill_warp_corner_projected_point(neighbor_mi, this_ref, pos_col,
+                                                 pos_row, pts, mvs, np);
+    if (valid) {
+      valid_points++;
+    }
+  }
+
+  assert(valid_points <= 3);
+  return valid_points;
+}
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h
index 831869a..41f3452 100644
--- a/av1/common/mvref_common.h
+++ b/av1/common/mvref_common.h
@@ -22,12 +22,12 @@
 extern "C" {
 #endif
 
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
 #define MVREF_ROWS 1
 #define MVREF_COLS 3
 #else
 #define MVREF_ROW_COLS 3
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 // Set the upper limit of the motion vector component magnitude.
 // This would make a motion vector fit in 26 bits. Plus 3 bits for the
@@ -48,14 +48,26 @@
 static AOM_INLINE int get_block_position(AV1_COMMON *cm, int *mi_r, int *mi_c,
                                          int blk_row, int blk_col, MV mv,
                                          int sign_bias) {
+#if CONFIG_MF_IMPROVEMENT
+  const SequenceHeader *const seq_params = &cm->seq_params;
+  const int sb_size = block_size_high[seq_params->sb_size];
+  const int mf_sb_size_log2 = (sb_size <= 64 ? mi_size_high_log2[BLOCK_64X64]
+                                             : seq_params->mib_size_log2) +
+                              MI_SIZE_LOG2;
+  const int mf_sb_size = (1 << mf_sb_size_log2);
+  const int sb_tmvp_size = (mf_sb_size >> TMVP_MI_SZ_LOG2);
+  const int sb_tmvp_size_log2 = mf_sb_size_log2 - TMVP_MI_SZ_LOG2;
+  const int base_blk_row = (blk_row >> sb_tmvp_size_log2) << sb_tmvp_size_log2;
+  const int base_blk_col = (blk_col >> sb_tmvp_size_log2) << sb_tmvp_size_log2;
+#else
   const int base_blk_row = (blk_row >> TMVP_MI_SZ_LOG2) << TMVP_MI_SZ_LOG2;
   const int base_blk_col = (blk_col >> TMVP_MI_SZ_LOG2) << TMVP_MI_SZ_LOG2;
+#endif  // CONFIG_MF_IMPROVEMENT
 
   // The motion vector in units of 1/8-pel
   const int shift = (3 + TMVP_MI_SZ_LOG2);
   const int row_offset =
       (mv.row >= 0) ? (mv.row >> shift) : -((-mv.row) >> shift);
-
   const int col_offset =
       (mv.col >= 0) ? (mv.col >> shift) : -((-mv.col) >> shift);
 
@@ -68,10 +80,17 @@
       col >= (cm->mi_params.mi_cols >> TMVP_SHIFT_BITS))
     return 0;
 
+#if CONFIG_MF_IMPROVEMENT
+  if (row < base_blk_row - MAX_OFFSET_HEIGHT_LOG2 ||
+      row >= base_blk_row + sb_tmvp_size + MAX_OFFSET_HEIGHT_LOG2 ||
+      col < base_blk_col - sb_tmvp_size ||
+      col >= base_blk_col + (sb_tmvp_size << 1))
+#else
   if (row < base_blk_row - MAX_OFFSET_HEIGHT_LOG2 ||
       row >= base_blk_row + TMVP_MI_SIZE + MAX_OFFSET_HEIGHT_LOG2 ||
       col < base_blk_col - MAX_OFFSET_WIDTH_LOG2 ||
       col >= base_blk_col + TMVP_MI_SIZE + MAX_OFFSET_WIDTH_LOG2)
+#endif  // CONFIG_MF_IMPROVEMENT
     return 0;
 
   *mi_r = row;
@@ -84,15 +103,24 @@
 // clamp_mv_ref
 #define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+#define DISPLAY_ORDER_HINT_BITS 31
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+
 static INLINE int get_relative_dist(const OrderHintInfo *oh, int a, int b) {
   if (!oh->enable_order_hint) return 0;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  assert(a >= 0);
+  assert(b >= 0);
+  const int bits = DISPLAY_ORDER_HINT_BITS;
+#else
   const int bits = oh->order_hint_bits_minus_1 + 1;
 
   assert(bits >= 1);
   assert(a >= 0 && a < (1 << bits));
   assert(b >= 0 && b < (1 << bits));
-
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int diff = a - b;
   const int m = 1 << (bits - 1);
   diff = (diff & (m - 1)) - (diff & m);
@@ -399,6 +427,31 @@
   }
 }
 
+#if CONFIG_SEP_COMP_DRL
+/*!\brief Return ref_mv_idx_type of the current coding block
+ * conversion of two ref_mv_idx(s) into one value when there are two DRLs */
+static INLINE int av1_ref_mv_idx_type(const MB_MODE_INFO *mbmi,
+                                      const int *ref_mv_idx) {
+  assert(ref_mv_idx[0] < MAX_REF_MV_STACK_SIZE);
+  assert(ref_mv_idx[1] < MAX_REF_MV_STACK_SIZE);
+  if (has_second_drl(mbmi)) {
+    return ref_mv_idx[1] * MAX_REF_MV_STACK_SIZE + ref_mv_idx[0];
+  } else {
+    assert(0 == ref_mv_idx[1]);
+    return ref_mv_idx[0];
+  }
+}
+
+/*!\brief Reset ref_mv_idx(s) based on the ref_mv_idx_type value */
+static INLINE void av1_set_ref_mv_idx(int *ref_mv_idx, int ref_mv_idx_type) {
+  assert(ref_mv_idx_type >= 0 &&
+         ref_mv_idx_type < MAX_REF_MV_STACK_SIZE * MAX_REF_MV_STACK_SIZE);
+  ref_mv_idx[1] = ref_mv_idx_type / MAX_REF_MV_STACK_SIZE;
+  ref_mv_idx[0] = ref_mv_idx_type - ref_mv_idx[1] * MAX_REF_MV_STACK_SIZE;
+  return;
+}
+#endif  // CONFIG_SEP_COMP_DRL
+
 static INLINE void av1_set_ref_frame(MV_REFERENCE_FRAME *rf,
                                      MV_REFERENCE_FRAME ref_frame_type) {
   if (ref_frame_type == INTRA_FRAME ||
@@ -496,9 +549,9 @@
 void av1_setup_frame_sign_bias(AV1_COMMON *cm);
 void av1_setup_skip_mode_allowed(AV1_COMMON *cm);
 void av1_setup_motion_field(AV1_COMMON *cm);
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
 void av1_setup_ref_frame_sides(AV1_COMMON *cm);
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 static INLINE void av1_collect_neighbors_ref_counts(MACROBLOCKD *const xd) {
   av1_zero(xd->neighbors_ref_counts);
@@ -760,12 +813,32 @@
 
   // Special case for sub 8x8 chroma cases, to prevent referring to chroma
   // pixels outside current tile.
-  if (xd->is_chroma_ref && av1_num_planes(cm) > 1) {
-    const struct macroblockd_plane *const pd = &xd->plane[1];
-    if (bw < 8 && pd->subsampling_x)
-      if (src_left_edge < tile_left_edge + 4 * SCALE_PX_TO_MV) return 0;
-    if (bh < 8 && pd->subsampling_y)
-      if (src_top_edge < tile_top_edge + 4 * SCALE_PX_TO_MV) return 0;
+  if (!cm->seq_params.enable_sdp || !frame_is_intra_only(cm)) {
+    if (xd->is_chroma_ref && av1_num_planes(cm) > 1) {
+      const struct macroblockd_plane *const pd = &xd->plane[1];
+#if CONFIG_EXT_RECUR_PARTITIONS
+      if (xd->mi && xd->mi[0]) {
+        const CHROMA_REF_INFO *chroma_ref_info = &xd->mi[0]->chroma_ref_info;
+        const int src_left_edge_chroma =
+            chroma_ref_info->mi_col_chroma_base * MI_SIZE * SCALE_PX_TO_MV +
+            dv.col;
+        const int src_top_edge_chroma =
+            chroma_ref_info->mi_row_chroma_base * MI_SIZE * SCALE_PX_TO_MV +
+            dv.row;
+        if (bw < 8 && pd->subsampling_x)
+          if (src_left_edge_chroma < tile_left_edge) return 0;
+        if (bh < 8 && pd->subsampling_y)
+          if (src_top_edge_chroma < tile_top_edge) return 0;
+      } else {
+#endif
+        if (bw < 8 && pd->subsampling_x)
+          if (src_left_edge < tile_left_edge + 4 * SCALE_PX_TO_MV) return 0;
+        if (bh < 8 && pd->subsampling_y)
+          if (src_top_edge < tile_top_edge + 4 * SCALE_PX_TO_MV) return 0;
+#if CONFIG_EXT_RECUR_PARTITIONS
+      }
+#endif
+    }
   }
 
 #if CONFIG_IBC_SR_EXT
@@ -1134,6 +1207,13 @@
   return 1;
 }
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+// Check all 3 neighbors to generate projected points
+int generate_points_from_corners(const MACROBLOCKD *xd, int *pts, int *mvs,
+                                 int *np, MV_REFERENCE_FRAME ref_frame);
+
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/common/obmc.h b/av1/common/obmc.h
index a51820b..dfa5357 100644
--- a/av1/common/obmc.h
+++ b/av1/common/obmc.h
@@ -21,12 +21,14 @@
 static INLINE void foreach_overlappable_nb_above(const AV1_COMMON *cm,
                                                  MACROBLOCKD *xd, int nb_max,
                                                  overlappable_nb_visitor_t fun,
-                                                 void *fun_ctxt) {
+                                                 void *fun_ctxt,
+                                                 bool count_only) {
   if (!xd->up_available) return;
 
   const int num_planes = av1_num_planes(cm);
   int nb_count = 0;
   const int mi_col = xd->mi_col;
+
   // prev_row_mi points into the mi array, starting at the beginning of the
   // previous row.
   MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
@@ -35,8 +37,41 @@
   for (int above_mi_col = mi_col; above_mi_col < end_col && nb_count < nb_max;
        above_mi_col += mi_step) {
     MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
-    mi_step = AOMMIN(mi_size_wide[above_mi[0]->sb_type[PLANE_TYPE_Y]],
-                     mi_size_wide[BLOCK_64X64]);
+    mi_step = mi_size_wide[above_mi[0]->sb_type[PLANE_TYPE_Y]];
+#if CONFIG_UNEVEN_4WAY
+    if (count_only) {
+      // In this case, we may only be parsing without decoding (e.g. in case of
+      // row-baed multi-threading). Hence, we do not have access to variables
+      // `above_mi[0]->chroma_ref_info` and `above_mi[0]->mi_col_start`.
+      // Also, if mi_step = 1, it must be a non-chroma ref block. So, we use
+      // mi_step = 2.
+      if (mi_step == 1) {
+        mi_step = 2;
+      }
+    } else {
+      // If we're considering a block that is NOT a chroma ref:
+      // - Move above_mi_col back to the base mi col,
+      // - Set above_mbmi to point at the block with chroma information, and
+      // - Set mi_step to step over all blocks that the chroma block covers.
+      const CHROMA_REF_INFO *chroma_ref_info = &above_mi[0]->chroma_ref_info;
+      if (!chroma_ref_info->is_chroma_ref) {
+        above_mi_col = chroma_ref_info->mi_col_chroma_base;
+        mi_step = mi_size_wide[chroma_ref_info->bsize_base];
+        if (above_mi_col < mi_col) continue;
+        above_mi = prev_row_mi + above_mi_col;
+        assert(above_mi[0]->chroma_ref_info.bsize_base ==
+               chroma_ref_info->bsize_base);
+      }
+      // If above block's left boundary is to the left of current block's left
+      // boundary, we need to find the common overlap.
+      if (above_mi[0]->mi_col_start < above_mi_col) {
+        const int extra_cols = above_mi_col - above_mi[0]->mi_col_start;
+        mi_step -= extra_cols;
+        assert(mi_step > 0);
+      }
+    }
+#else
+    (void)count_only;
     // If we're considering a block with width 4, it should be treated as
     // half of a pair of blocks with chroma information in the second. Move
     // above_mi_col back to the start of the pair if needed, set above_mbmi
@@ -47,11 +82,25 @@
       above_mi = prev_row_mi + above_mi_col + 1;
       mi_step = 2;
     }
+#endif  // CONFIG_UNEVEN_4WAY
 
+    mi_step = AOMMIN(mi_step, mi_size_wide[BLOCK_64X64]);
+    int overlapped_mi_width = AOMMIN(xd->width, mi_step);
+#if CONFIG_UNEVEN_4WAY
+    if (!IS_POWER_OF_TWO(overlapped_mi_width)) {
+      assert(!IS_POWER_OF_TWO(mi_step));
+      const int mi_step_pow2 = 1 << get_msb(mi_step);
+      above_mi_col += (mi_step - mi_step_pow2);
+      mi_step = mi_step_pow2;
+      overlapped_mi_width = AOMMIN(xd->width, mi_step);
+    }
+#endif  // CONFIG_UNEVEN_4WAY
+    assert(IS_POWER_OF_TWO(overlapped_mi_width));
     if (is_neighbor_overlappable(*above_mi, xd->tree_type)) {
       ++nb_count;
-      fun(xd, 0, above_mi_col - mi_col, AOMMIN(xd->width, mi_step), 0,
-          *above_mi, fun_ctxt, num_planes);
+      assert(above_mi_col >= mi_col);
+      fun(xd, 0, above_mi_col - mi_col, overlapped_mi_width, 0, *above_mi,
+          fun_ctxt, num_planes);
     }
   }
 }
@@ -73,17 +122,52 @@
   for (int left_mi_row = mi_row; left_mi_row < end_row && nb_count < nb_max;
        left_mi_row += mi_step) {
     MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
-    mi_step = AOMMIN(mi_size_high[left_mi[0]->sb_type[PLANE_TYPE_Y]],
-                     mi_size_high[BLOCK_64X64]);
+    mi_step = mi_size_high[left_mi[0]->sb_type[PLANE_TYPE_Y]];
+#if CONFIG_UNEVEN_4WAY
+    // If we're considering a block that is NOT a chroma ref:
+    // - Move left_mi_col back to the base mi col,
+    // - Set left_mbmi to point at the block with chroma information, and
+    // - Set mi_step to step over all blocks that the chroma block covers.
+    const CHROMA_REF_INFO *chroma_ref_info = &left_mi[0]->chroma_ref_info;
+    if (!chroma_ref_info->is_chroma_ref) {
+      left_mi_row = chroma_ref_info->mi_row_chroma_base;
+      mi_step = mi_size_high[chroma_ref_info->bsize_base];
+      if (left_mi_row < mi_row) continue;
+      left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
+      assert(left_mi[0]->chroma_ref_info.bsize_base ==
+             chroma_ref_info->bsize_base);
+    }
+    // If left block's top boundary is above current block's top boundary, we
+    // need to find the common overlap.
+    if (left_mi[0]->mi_row_start < left_mi_row) {
+      const int extra_cols = left_mi_row - left_mi[0]->mi_row_start;
+      mi_step -= extra_cols;
+      assert(mi_step > 0);
+    }
+#else
     if (mi_step == 1) {
       left_mi_row &= ~1;
       left_mi = prev_col_mi + (left_mi_row + 1) * xd->mi_stride;
       mi_step = 2;
     }
+#endif  // CONFIG_UNEVEN_4WAY
 
+    mi_step = AOMMIN(mi_step, mi_size_high[BLOCK_64X64]);
+    int overlapped_mi_height = AOMMIN(xd->height, mi_step);
+#if CONFIG_UNEVEN_4WAY
+    if (!IS_POWER_OF_TWO(overlapped_mi_height)) {
+      assert(!IS_POWER_OF_TWO(mi_step));
+      const int mi_step_pow2 = 1 << get_msb(mi_step);
+      left_mi_row += (mi_step - mi_step_pow2);
+      mi_step = mi_step_pow2;
+      overlapped_mi_height = AOMMIN(xd->height, mi_step);
+    }
+#endif  // CONFIG_UNEVEN_4WAY
+    assert(IS_POWER_OF_TWO(overlapped_mi_height));
     if (is_neighbor_overlappable(*left_mi, xd->tree_type)) {
       ++nb_count;
-      fun(xd, left_mi_row - mi_row, 0, AOMMIN(xd->height, mi_step), 1, *left_mi,
+      assert(left_mi_row >= mi_row);
+      fun(xd, left_mi_row - mi_row, 0, overlapped_mi_height, 1, *left_mi,
           fun_ctxt, num_planes);
     }
   }
diff --git a/av1/common/pef.c b/av1/common/pef.c
index b84222c..c5587dd 100644
--- a/av1/common/pef.c
+++ b/av1/common/pef.c
@@ -169,6 +169,9 @@
 // setup PEF input structure
 void setup_pef_input(MACROBLOCKD *xd, int pef_mode, int plane, uint16_t *dst,
                      int dst_stride, int bw, int bh, int_mv *mv_refined,
+#if CONFIG_REFINEMV
+                     REFINEMV_SUBMB_INFO *refinemv_subinfo,
+#endif  // CONFIG_REFINEMV
                      PefFuncInput *pef_input) {
   pef_input->pef_mode = pef_mode;
   pef_input->plane = plane;
@@ -180,6 +183,9 @@
   pef_input->dst = dst;
   pef_input->dst_stride = dst_stride;
   pef_input->mv_refined = mv_refined;
+#if CONFIG_REFINEMV
+  pef_input->refinemv_subinfo = refinemv_subinfo;
+#endif  // CONFIG_REFINEMV
 }
 
 #if CONFIG_OPTFLOW_REFINEMENT
@@ -191,8 +197,17 @@
 // check if the neighboring mvs are the same
 void check_mv(bool *diff_mv, int pef_mode, int mv_rows, int mv_cols,
               int mvs_stride, const TPL_MV_REF *tpl_mvs, int tip_step,
-              int n_blocks, int_mv *mv_refined, int opfl_step) {
+              int n_blocks, int_mv *mv_refined, int opfl_step
+#if CONFIG_REFINEMV
+              ,
+              REFINEMV_SUBMB_INFO *refinemv_subinfo, int refinemv_step
+#endif  // CONFIG_REFINEMV
+) {
+#if CONFIG_REFINEMV
+  if (pef_mode < 0 || pef_mode > 3) return;
+#else
   if (pef_mode < 0 || pef_mode > 2) return;
+#endif                  // CONFIG_REFINEMV
   if (pef_mode == 0) {  // opfl mv
     const int_mv *cur_mv_refined_ref0 = &mv_refined[n_blocks * 2 + 0];
     const int_mv *cur_mv_refined_ref1 = &mv_refined[n_blocks * 2 + 1];
@@ -200,6 +215,17 @@
         cur_mv_refined_ref0[0].as_int != cur_mv_refined_ref0[-opfl_step].as_int;
     *diff_mv |=
         cur_mv_refined_ref1[0].as_int != cur_mv_refined_ref1[-opfl_step].as_int;
+#if CONFIG_REFINEMV
+  } else if (pef_mode == 3) {  // refinemv mv
+    const int_mv *cur_mv_refined_ref0 = &refinemv_subinfo->refinemv[0];
+    const int_mv *cur_mv_refined_ref1 = &refinemv_subinfo->refinemv[1];
+    const int_mv *prev_mv_refined_ref0 =
+        &refinemv_subinfo[-refinemv_step].refinemv[0];
+    const int_mv *prev_mv_refined_ref1 =
+        &refinemv_subinfo[-refinemv_step].refinemv[1];
+    *diff_mv = cur_mv_refined_ref0[0].as_int != prev_mv_refined_ref0[0].as_int;
+    *diff_mv |= cur_mv_refined_ref1[0].as_int != prev_mv_refined_ref1[0].as_int;
+#endif      // CONFIG_REFINEMV
   } else {  // tip mv
     const TPL_MV_REF *cur_tpl_mv = tpl_mvs + mv_rows * mvs_stride + mv_cols;
     const TPL_MV_REF *prev_tpl_mv = cur_tpl_mv - tip_step;
@@ -344,7 +370,16 @@
           AOMMIN(prev_x_step, x_step) >= filt_len) {
         bool diff_mv = 0;
         check_mv(&diff_mv, pef_mode, mv_rows, mv_cols, mvs_stride, tpl_mvs, 1,
-                 n_blocks, pef_input->mv_refined, 2);
+                 n_blocks, pef_input->mv_refined, 2
+#if CONFIG_REFINEMV
+                 ,
+                 (pef_mode == 3) ? (pef_input->refinemv_subinfo +
+                                    (j >> MI_SIZE_LOG2) * MAX_MIB_SIZE +
+                                    (i >> MI_SIZE_LOG2))
+                                 : NULL,
+                 1
+#endif  // CONFIG_REFINEMV
+        );
         if (diff_mv) {
           filt_func filt_vert_func =
               (y_step == PEF_MCU_SZ && x_step == PEF_MCU_SZ)
@@ -359,7 +394,16 @@
           AOMMIN(prev_y_step, y_step) >= filt_len) {
         bool diff_mv = 0;
         check_mv(&diff_mv, pef_mode, mv_rows, mv_cols, mvs_stride, tpl_mvs,
-                 mvs_stride, n_blocks, pef_input->mv_refined, wn);
+                 mvs_stride, n_blocks, pef_input->mv_refined, wn
+#if CONFIG_REFINEMV
+                 ,
+                 (pef_mode == 3) ? (pef_input->refinemv_subinfo +
+                                    (j >> MI_SIZE_LOG2) * MAX_MIB_SIZE +
+                                    (i >> MI_SIZE_LOG2))
+                                 : NULL,
+                 MAX_MIB_SIZE
+#endif  // CONFIG_REFINEMV
+        );
         if (diff_mv) {
           filt_func filt_horz_func = x_step == PEF_MCU_SZ
                                          ? highbd_filt_horz_pred
@@ -425,7 +469,12 @@
     const int dst_stride = dst_buf->stride;
     PefFuncInput pef_input;
     setup_pef_input(xd, 2, plane, dst, dst_stride, dst_buf->width,
-                    dst_buf->height, NULL, &pef_input);
+                    dst_buf->height, NULL,
+#if CONFIG_REFINEMV
+
+                    NULL,
+#endif  // CONFIG_REFINEMV
+                    &pef_input);
     enhance_sub_prediction_blocks(cm, xd, &pef_input);
   }
 }
@@ -437,6 +486,10 @@
                         ,
                         int_mv *const mv_refined, int use_opfl
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+                        ,
+                        int use_refinemv, REFINEMV_SUBMB_INFO *refinemv_subinfo
+#endif  // CONFIG_REFINEMV
 ) {
   if (!cm->seq_params.enable_pef) return;
   if (!cm->features.allow_pef) return;
@@ -445,7 +498,11 @@
   const int use_tip = is_tip_ref_frame(mbmi->ref_frame[0]);
   if (use_tip) {
     PefFuncInput pef_input;
-    setup_pef_input(xd, 1, plane, dst, dst_stride, bw, bh, NULL, &pef_input);
+    setup_pef_input(xd, 1, plane, dst, dst_stride, bw, bh, NULL,
+#if CONFIG_REFINEMV
+                    NULL,
+#endif  // CONFIG_REFINEMV
+                    &pef_input);
     enhance_sub_prediction_blocks(cm, xd, &pef_input);
     return;
   }
@@ -455,10 +512,22 @@
   if (use_opfl) {
     PefFuncInput pef_input;
     setup_pef_input(xd, 0, plane, dst, dst_stride, bw, bh, mv_refined,
+#if CONFIG_REFINEMV
+                    NULL,
+#endif  // CONFIG_REFINEMV
                     &pef_input);
     enhance_sub_prediction_blocks(cm, xd, &pef_input);
     return;
   }
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+  if (use_refinemv) {
+    PefFuncInput pef_input;
+    setup_pef_input(xd, 3, plane, dst, dst_stride, bw, bh, mv_refined,
+                    refinemv_subinfo, &pef_input);
+    enhance_sub_prediction_blocks(cm, xd, &pef_input);
+    return;
+  }
+#endif  // CONFIG_REFINEMV
   return;
 }
diff --git a/av1/common/pef.h b/av1/common/pef.h
index 1363c0f..1258970 100644
--- a/av1/common/pef.h
+++ b/av1/common/pef.h
@@ -53,7 +53,8 @@
 
 // Structure for PEF function input
 typedef struct {
-  // 0 for OPFL prediciton, 1 for TIP prediciton, 2 for TIP frame
+  // 0 for OPFL prediciton, 1 for TIP prediciton, 2 for TIP frame, 3 for
+  // refinemv prediction
   int pef_mode;
   int plane;
   int bw;
@@ -64,6 +65,9 @@
   uint16_t *dst;
   int dst_stride;
   int_mv *mv_refined;
+#if CONFIG_REFINEMV
+  REFINEMV_SUBMB_INFO *refinemv_subinfo;
+#endif  // CONFIG_REFINEMV
 } PefFuncInput;
 
 typedef void (*filt_func)(uint16_t *s, int stride, int bd, uint16_t q_thresh,
@@ -84,6 +88,10 @@
                         ,
                         int_mv *const mv_refined, int use_opfl
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+                        ,
+                        int use_refinemv, REFINEMV_SUBMB_INFO *refinemv_subinfo
+#endif  // CONFIG_REFINEMV
 );
 
 #ifdef __cplusplus
diff --git a/av1/common/pred_common.c b/av1/common/pred_common.c
index 39f5548..f90946b 100644
--- a/av1/common/pred_common.c
+++ b/av1/common/pred_common.c
@@ -256,9 +256,9 @@
 
 static void palette_add_to_cache(uint16_t *cache, int *n, uint16_t val) {
   // Do not add an already existing value
-#if !CONFIG_INDEP_PALETTE_PARSING
+#if !CONFIG_PALETTE_IMPROVEMENTS
   if (*n > 0 && val == cache[*n - 1]) return;
-#endif  //! CONFIG_INDEP_PALETTE_PARSING
+#endif  //! CONFIG_PALETTE_IMPROVEMENTS
 
   cache[(*n)++] = val;
 }
@@ -286,7 +286,7 @@
   while (above_n > 0 && left_n > 0) {
     uint16_t v_above = above_colors[above_idx];
     uint16_t v_left = left_colors[left_idx];
-#if CONFIG_INDEP_PALETTE_PARSING
+#if CONFIG_PALETTE_IMPROVEMENTS
     palette_add_to_cache(cache, &n, v_above);
     ++above_idx, --above_n;
     palette_add_to_cache(cache, &n, v_left);
@@ -300,7 +300,7 @@
       ++above_idx, --above_n;
       if (v_left == v_above) ++left_idx, --left_n;
     }
-#endif  // CONFIG_INDEP_PALETTE_PARSING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
   }
   while (above_n-- > 0) {
     uint16_t val = above_colors[above_idx++];
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h
index dc4eebb..3917992 100644
--- a/av1/common/pred_common.h
+++ b/av1/common/pred_common.h
@@ -283,11 +283,17 @@
   const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
   const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
   int bck_frame_index = 0, fwd_frame_index = 0;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  int cur_frame_index = cm->cur_frame->display_order_hint;
+
+  if (bck_buf != NULL) bck_frame_index = bck_buf->display_order_hint;
+  if (fwd_buf != NULL) fwd_frame_index = fwd_buf->display_order_hint;
+#else
   int cur_frame_index = cm->cur_frame->order_hint;
 
   if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
   if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
-
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int fwd = abs(get_relative_dist(&cm->seq_params.order_hint_info,
                                   fwd_frame_index, cur_frame_index));
   int bck = abs(get_relative_dist(&cm->seq_params.order_hint_info,
@@ -517,31 +523,34 @@
 // The prediction flags in these dummy entries are initialized to 0.
 static INLINE int get_tx_size_context(const MACROBLOCKD *xd) {
   const MB_MODE_INFO *mbmi = xd->mi[0];
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
   const TX_SIZE max_tx_size =
       max_txsize_rect_lookup[mbmi->sb_type[PLANE_TYPE_Y]];
   const int max_tx_wide = tx_size_wide[max_tx_size];
   const int max_tx_high = tx_size_high[max_tx_size];
-  const int default_ctx[MAX_NUM_NEIGHBORS] = {
-    xd->above_txfm_context[0] >= max_tx_wide,
-    xd->left_txfm_context[0] >= max_tx_high
-  };
+  const int has_above = xd->up_available;
+  const int has_left = xd->left_available;
 
-  const int max_tx_threshold[MAX_NUM_NEIGHBORS] = { max_tx_wide, max_tx_high };
+  int above = xd->above_txfm_context[0] >= max_tx_wide;
+  int left = xd->left_txfm_context[0] >= max_tx_high;
 
-  int ctx = 0;
-  for (int i = 0; i < MAX_NUM_NEIGHBORS; ++i) {
-    const MB_MODE_INFO *const neighbor = xd->neighbors[i];
-    if (neighbor != NULL) {
-      if (is_inter_block(neighbor, xd->tree_type)) {
-        const int block_size = neighbor->sb_type[PLANE_TYPE_Y];
-        ctx += (block_size_wide[block_size] >= max_tx_threshold[i]);
-      } else {
-        ctx += default_ctx[i];
-      }
-    }
-  }
+  if (has_above)
+    if (is_inter_block(above_mbmi, xd->tree_type))
+      above = block_size_wide[above_mbmi->sb_type[PLANE_TYPE_Y]] >= max_tx_wide;
 
-  return ctx;
+  if (has_left)
+    if (is_inter_block(left_mbmi, xd->tree_type))
+      left = block_size_high[left_mbmi->sb_type[PLANE_TYPE_Y]] >= max_tx_high;
+
+  if (has_above && has_left)
+    return (above + left);
+  else if (has_above)
+    return above;
+  else if (has_left)
+    return left;
+  else
+    return 0;
 }
 
 #ifdef __cplusplus
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c
index 2825186..99759b2 100644
--- a/av1/common/reconinter.c
+++ b/av1/common/reconinter.c
@@ -81,6 +81,12 @@
   inter_pred_params->orig_block_width = block_width;
   inter_pred_params->orig_block_height = block_height;
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#if CONFIG_REFINEMV
+  inter_pred_params->original_pu_width = block_width;
+  inter_pred_params->original_pu_height = block_height;
+#endif  // CONFIG_REFINEMV
+
   inter_pred_params->pix_row = pix_row;
   inter_pred_params->pix_col = pix_col;
   inter_pred_params->subsampling_x = subsampling_x;
@@ -92,6 +98,16 @@
   inter_pred_params->mode = TRANSLATION_PRED;
   inter_pred_params->comp_mode = UNIFORM_SINGLE;
 
+#if CONFIG_REFINEMV
+  inter_pred_params->use_ref_padding = 0;
+  inter_pred_params->ref_area = NULL;
+#endif  // CONFIG_REFINEMV
+
+#if CONFIG_D071_IMP_MSK_BLD
+  inter_pred_params->border_data.enable_bacp = 0;
+  inter_pred_params->border_data.bacp_block_data = NULL;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
   if (is_intrabc) {
     inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
     inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
@@ -119,6 +135,12 @@
   if (is_tip_ref_frame(mi->ref_frame[ref])) return;
 #endif  // CONFIG_TIP
 
+#if CONFIG_REFINEMV
+  // We do not do refineMV for warp blocks
+  // We may need to return from here.
+  if (mi->refinemv_flag) return;
+#endif  // CONFIG_REFINEMV
+
   if (xd->cur_frame_force_integer_mv) return;
 
   if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]],
@@ -179,6 +201,68 @@
 };
 #else
 /* clang-format off */
+#if WEDGE_BLD_SIG
+// rounded cosine and sine look-up tables given by round(32*cos(i))
+static const int8_t wedge_cos_lut[WEDGE_ANGLES] = {
+  //  0,  1,  2,  4,  6,
+     32, 31, 29, 23, 14,
+  //  8, 10, 12, 14, 15,
+      0,-14,-23,-29,-31,
+  // 16, 17, 18, 20, 22,
+    -32,-31,-29,-23,-14,
+  // 24, 26, 28, 30, 31
+      0, 14, 23, 29, 31
+};
+static const int8_t wedge_sin_lut[WEDGE_ANGLES] = {
+  //  0,  1,  2,  4,  6,
+      0, -8,-14,-23,-29,
+  //  8, 10, 12, 14, 15,
+    -32,-29,-23,-14, -8,
+  // 16, 17, 18, 20, 22,
+      0,  8, 14, 23, 29,
+  // 24, 26, 28, 30, 31
+     32, 29, 23, 14,  8
+};
+
+// rounded sigmoid function look-up talbe given by round(1/(1+exp(-x)))
+static const int8_t pos_dist_2_bld_weight[WEDGE_BLD_LUT_SIZE]={
+  32, 32, 33, 33, 34, 34, 35, 35,
+  36, 36, 37, 37, 38, 38, 39, 39,
+  40, 40, 41, 41, 42, 42, 43, 43,
+  43, 44, 44, 45, 45, 46, 46, 46,
+  47, 47, 48, 48, 48, 49, 49, 49,
+  50, 50, 50, 51, 51, 51, 52, 52,
+  52, 53, 53, 53, 53, 54, 54, 54,
+  55, 55, 55, 55, 55, 56, 56, 56,
+  56, 57, 57, 57, 57, 57, 58, 58,
+  58, 58, 58, 58, 59, 59, 59, 59,
+  59, 59, 59, 60, 60, 60, 60, 60,
+  60, 60, 60, 60, 61, 61, 61, 61,
+  61, 61, 61, 61, 61, 61, 61, 62,
+  62, 62, 62, 62, 62, 62, 62, 62,
+  62, 62, 62, 62, 62, 62, 62, 62,
+  63, 63, 63, 63, 63, 63, 63, 64
+};
+
+static const int8_t neg_dist_2_bld_weight[WEDGE_BLD_LUT_SIZE]={
+  32, 32, 31, 31, 30, 30, 29, 29,
+  28, 28, 27, 27, 26, 26, 25, 25,
+  24, 24, 23, 23, 22, 22, 21, 21,
+  21, 20, 20, 19, 19, 18, 18, 18,
+  17, 17, 16, 16, 16, 15, 15, 15,
+  14, 14, 14, 13, 13, 13, 12, 12,
+  12, 11, 11, 11, 11, 10, 10, 10,
+   9,  9,  9,  9,  9,  8,  8,  8,
+   8,  7,  7,  7,  7,  7,  6,  6,
+   6,  6,  6,  6,  5,  5,  5,  5,
+   5,  5,  5,  4,  4,  4,  4,  4,
+   4,  4,  4,  4,  3,  3,  3,  3,
+   3,  3,  3,  3,  3,  3,  3,  2,
+   2,  2,  2,  2,  2,  2,  2,  2,
+   2,  2,  2,  2,  2,  2,  2,  2,
+   1,  1,  1,  1,  1,  1,  1,  0
+};
+#else
 static const int8_t wedge_cos_lut[WEDGE_ANGLES] = {
   //  0,  1,  2,  4,  6,
       8,  8,  8,  4,  4,
@@ -199,6 +283,7 @@
   // 24, 26, 28, 30, 31
       8,  8,  4,  4,  2
 };
+#endif
 /* clang-format on */
 #endif  // !CONFIG_WEDGE_MOD_EXT
 
@@ -273,6 +358,10 @@
                 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
                                           [MAX_WEDGE_SQUARE]);
 
+#if CONFIG_CWP
+DECLARE_ALIGNED(16, static int8_t, cwp_mask[2][MAX_CWP_NUM][MAX_SB_SQUARE]);
+#endif  // CONFIG_CWP
+
 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
 
 #if CONFIG_WEDGE_MOD_EXT
@@ -403,6 +492,34 @@
 };
 #endif
 
+#if CONFIG_CWP
+// Init the cwp masks, called by init_cwp_masks
+static AOM_INLINE void build_cwp_mask(int8_t *mask, int stride,
+                                      BLOCK_SIZE plane_bsize, int8_t w) {
+  const int bw = block_size_wide[plane_bsize];
+  const int bh = block_size_high[plane_bsize];
+  for (int i = 0; i < bh; ++i) {
+    for (int j = 0; j < bw; ++j) mask[j] = w;
+    mask += stride;
+  }
+}
+// Init the cwp masks
+void init_cwp_masks() {
+  const int bs = BLOCK_128X128;
+  const int bw = block_size_wide[bs];
+  for (int list_idx = 0; list_idx < 2; ++list_idx) {
+    for (int idx = 0; idx < MAX_CWP_NUM; ++idx) {
+      int8_t weight = cwp_weighting_factor[list_idx][idx] * 4;
+      build_cwp_mask(cwp_mask[list_idx][idx], bw, bs, weight);
+    }
+  }
+}
+// Return the associated cwp mask
+const int8_t *av1_get_cwp_mask(int list_idx, int idx) {
+  return cwp_mask[list_idx][idx];
+}
+#endif  // CONFIG_CWP
+
 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
                                              BLOCK_SIZE sb_type) {
   const uint8_t *master;
@@ -433,12 +550,17 @@
 
 const uint8_t *av1_get_compound_type_mask(
     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
+#if !CONFIG_D071_IMP_MSK_BLD
   assert(is_masked_compound_type(comp_data->type));
+#endif  // !CONFIG_D071_IMP_MSK_BLD
   (void)sb_type;
   switch (comp_data->type) {
     case COMPOUND_WEDGE:
       return av1_get_contiguous_soft_mask(comp_data->wedge_index,
                                           comp_data->wedge_sign, sb_type);
+#if CONFIG_D071_IMP_MSK_BLD
+    case COMPOUND_AVERAGE:
+#endif  // CONFIG_D071_IMP_MSK_BLD
     case COMPOUND_DIFFWTD: return comp_data->seg_mask;
     default: assert(0); return NULL;
   }
@@ -569,7 +691,14 @@
       int y = ((n << 1) - h + 1) * wedge_sin_lut[angle];
       for (int m = 0; m < w; m++, idx++) {
         int d = ((m << 1) - w + 1) * wedge_cos_lut[angle] + y;
+#if WEDGE_BLD_SIG
+        const int clamp_d = clamp(d, -127, 127);
+        wedge_master_mask[0][angle][idx] =
+            clamp_d >= 0 ? pos_dist_2_bld_weight[clamp_d]
+                         : neg_dist_2_bld_weight[-clamp_d];
+#else
         wedge_master_mask[0][angle][idx] = clamp((d + 32), 0, 64);
+#endif
         wedge_master_mask[1][angle][idx] =
             64 - wedge_master_mask[0][angle][idx];
       }
@@ -722,6 +851,14 @@
   }
 }
 
+#if CONFIG_REFINEMV
+// Compute the SAD values for refineMV modes
+int get_refinemv_sad(uint16_t *src1, uint16_t *src2, int width, int height,
+                     int bd) {
+  return get_highbd_sad(src1, width, src2, width, bd, width, height);
+}
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_OPTFLOW_REFINEMENT
 // Restrict MV delta to 1 or 2 pixels. This restriction would reduce complexity
 // in hardware.
@@ -745,7 +882,12 @@
     const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
     int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf,
     InterPredParams *inter_pred_params,
-    CalcSubpelParamsFunc calc_subpel_params_func, int ref, uint16_t *pred_dst) {
+    CalcSubpelParamsFunc calc_subpel_params_func, int ref, uint16_t *pred_dst
+#if CONFIG_REFINEMV
+    ,
+    const MV *const src_mv, int pu_width, int pu_height
+#endif  // CONFIG_REFINEMV
+) {
   assert(cm->seq_params.order_hint_info.enable_order_hint);
   const int is_intrabc = is_intrabc_block(mi, xd->tree_type);
 #if CONFIG_OPTFLOW_ON_TIP
@@ -769,11 +911,18 @@
   const struct scale_factors *const sf =
       is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref];
 #endif  // CONFIG_OPTFLOW_ON_TIP
-  const BLOCK_SIZE bsize = mi->sb_type[PLANE_TYPE_Y];
+
   const int ss_x = pd->subsampling_x;
   const int ss_y = pd->subsampling_y;
+#if CONFIG_REFINEMV
+  const int row_start = (bw == 4) && ss_y ? -1 : 0;
+  const int col_start = (bh == 4) && ss_x ? -1 : 0;
+#else
+  const BLOCK_SIZE bsize = mi->sb_type[PLANE_TYPE_Y];
   const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
   const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
+#endif  // CONFIG_REFINEMV
+
   const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
   const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
 
@@ -788,6 +937,10 @@
   av1_init_inter_params(inter_pred_params, bw, bh, pre_y, pre_x,
                         pd->subsampling_x, pd->subsampling_y, xd->bd,
                         mi->use_intrabc[0], sf, pre_buf, mi->interp_fltr);
+#if CONFIG_REFINEMV
+  inter_pred_params->original_pu_width = pu_width;
+  inter_pred_params->original_pu_height = pu_height;
+#endif  // CONFIG_REFINEMV
 
 #if CONFIG_TIP
   const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2);
@@ -805,7 +958,13 @@
   if (inter_pred_params->mode == WARP_PRED) return;
 
   assert(mi->interinter_comp.type == COMPOUND_AVERAGE);
-  av1_build_one_inter_predictor(pred_dst, bw, &mi->mv[ref].as_mv,
+
+  av1_build_one_inter_predictor(pred_dst, bw,
+#if CONFIG_REFINEMV
+                                src_mv,
+#else
+                                &mi->mv[ref].as_mv,
+#endif  // CONFIG_REFINEMV
                                 inter_pred_params, xd, mi_x, mi_y, ref, mc_buf,
                                 calc_subpel_params_func);
 }
@@ -1222,6 +1381,10 @@
     ,
     int do_pred, int use_4x4
 #endif  // CONFIG_OPTFLOW_ON_TIP
+#if CONFIG_REFINEMV
+    ,
+    MV *best_mv_ref, int pu_width, int pu_height
+#endif  // CONFIG_REFINEMV
 ) {
   const int target_prec = MV_REFINE_PREC_BITS;
   const int n = opfl_get_subblock_size(bw, bh, plane
@@ -1252,10 +1415,19 @@
         get_ref_frame_buf(cm, mbmi->ref_frame[0]);
     const RefCntBuffer *const r1_buf =
         get_ref_frame_buf(cm, mbmi->ref_frame[1]);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
     d0 = get_relative_dist(&cm->seq_params.order_hint_info,
-                           cm->cur_frame->order_hint, r0_buf->order_hint);
+                           cm->cur_frame->display_order_hint,
+                           r0_buf->display_order_hint);
     d1 = get_relative_dist(&cm->seq_params.order_hint_info,
-                           cm->cur_frame->order_hint, r1_buf->order_hint);
+                           cm->cur_frame->display_order_hint,
+                           r1_buf->display_order_hint);
+#else
+  d0 = get_relative_dist(&cm->seq_params.order_hint_info,
+                         cm->cur_frame->order_hint, r0_buf->order_hint);
+  d1 = get_relative_dist(&cm->seq_params.order_hint_info,
+                         cm->cur_frame->order_hint, r1_buf->order_hint);
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
 #if CONFIG_OPTFLOW_ON_TIP
   }
 #endif  // CONFIG_OPTFLOW_ON_TIP
@@ -1268,10 +1440,20 @@
     InterPredParams params0, params1;
     av1_opfl_build_inter_predictor(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y,
                                    mc_buf, &params0, calc_subpel_params_func, 0,
-                                   dst0);
+                                   dst0
+#if CONFIG_REFINEMV
+                                   ,
+                                   &best_mv_ref[0], pu_width, pu_height
+#endif  // CONFIG_REFINEMV
+    );
     av1_opfl_build_inter_predictor(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y,
                                    mc_buf, &params1, calc_subpel_params_func, 1,
-                                   dst1);
+                                   dst1
+#if CONFIG_REFINEMV
+                                   ,
+                                   &best_mv_ref[1], pu_width, pu_height
+#endif  // CONFIG_REFINEMV
+    );
 #if CONFIG_OPTFLOW_ON_TIP
   }
 #endif  // CONFIG_OPTFLOW_ON_TIP
@@ -1356,7 +1538,26 @@
 
   return target_prec;
 }
+#if CONFIG_D071_IMP_MSK_BLD
+int is_out_of_frame_block(const InterPredParams *inter_pred_params,
+                          int frame_width, int frame_height, int sub_block_id) {
+  for (int ref = 0; ref < 2; ref++) {
+    const BacpBlockData *const b_data =
+        &inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref];
+    if (b_data->x0 < 0 || b_data->x0 > frame_width - 1 || b_data->x1 < 0 ||
+        b_data->x1 > frame_width
 
+        || b_data->y0 < 0 || b_data->y0 > frame_height - 1 || b_data->y1 < 0 ||
+        b_data->y1 > frame_height) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
+#if !CONFIG_D071_IMP_MSK_BLD
 // Makes the interpredictor for the region by dividing it up into nxn blocks
 // and running the interpredictor code on each one.
 void make_inter_pred_of_nxn(uint16_t *dst, int dst_stride,
@@ -1383,6 +1584,7 @@
       calc_subpel_params_func(&(mv_refined[n_blocks * 2 + ref].as_mv),
                               inter_pred_params, xd, mi_x + i, mi_y + j, ref, 1,
                               mc_buf, &pre, subpel_params, &src_stride);
+
       av1_make_inter_predictor(pre, src_stride, dst, dst_stride,
                                inter_pred_params, subpel_params);
       n_blocks++;
@@ -1427,6 +1629,7 @@
                          &subpel_params);
 }
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#endif  // !CONFIG_D071_IMP_MSK_BLD
 
 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
 void av1_init_wedge_masks() {
@@ -1440,8 +1643,19 @@
     const CONV_BUF_TYPE *src1, int src1_stride,
     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
     int w, InterPredParams *inter_pred_params) {
+#if CONFIG_D071_IMP_MSK_BLD
+  const int ssy = (inter_pred_params->conv_params.plane &&
+                   comp_data->type == COMPOUND_AVERAGE)
+                      ? 0
+                      : inter_pred_params->subsampling_y;
+  const int ssx = (inter_pred_params->conv_params.plane &&
+                   comp_data->type == COMPOUND_AVERAGE)
+                      ? 0
+                      : inter_pred_params->subsampling_x;
+#else
   const int ssy = inter_pred_params->subsampling_y;
   const int ssx = inter_pred_params->subsampling_x;
+#endif  // CONFIG_D071_IMP_MSK_BLD
   const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
   const int mask_stride = block_size_wide[sb_type];
   aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
@@ -1449,11 +1663,19 @@
                                 &inter_pred_params->conv_params,
                                 inter_pred_params->bit_depth);
 }
-
-static void make_masked_inter_predictor(const uint16_t *pre, int pre_stride,
-                                        uint16_t *dst, int dst_stride,
-                                        InterPredParams *inter_pred_params,
-                                        const SubpelParams *subpel_params) {
+#if !CONFIG_D071_IMP_MSK_BLD
+static
+#endif
+    void
+    make_masked_inter_predictor(const uint16_t *pre, int pre_stride,
+                                uint16_t *dst, int dst_stride,
+                                InterPredParams *inter_pred_params,
+                                const SubpelParams *subpel_params
+#if CONFIG_D071_IMP_MSK_BLD
+                                ,
+                                int use_bacp, int sub_block_id
+#endif  // CONFIG_D071_IMP_MSK_BLD
+    ) {
   const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
   BLOCK_SIZE sb_type = inter_pred_params->sb_type;
 
@@ -1482,12 +1704,183 @@
         inter_pred_params->block_width, &inter_pred_params->conv_params,
         inter_pred_params->bit_depth);
   }
+
+#if CONFIG_D071_IMP_MSK_BLD
+  // Mask is generated from luma and reuse for chroma
+  const int generate_mask_for_this_plane =
+      (!inter_pred_params->conv_params.plane ||
+       comp_data->type == COMPOUND_AVERAGE);
+  if (use_bacp && generate_mask_for_this_plane) {
+    uint8_t *mask = comp_data->seg_mask;
+    int mask_stride = block_size_wide[sb_type];
+    BacpBlockData *b_data_0 =
+        &inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + 0];
+    BacpBlockData *b_data_1 =
+        &inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + 1];
+
+    for (int i = 0; i < inter_pred_params->block_height; ++i) {
+      for (int j = 0; j < inter_pred_params->block_width; ++j) {
+        int x = b_data_0->x0 + j;
+        int y = b_data_0->y0 + i;
+
+        int p0_available =
+            (x >= 0 && x < inter_pred_params->ref_frame_buf.width && y >= 0 &&
+             y < inter_pred_params->ref_frame_buf.height);
+
+        x = b_data_1->x0 + j;
+        y = b_data_1->y0 + i;
+        int p1_available =
+            (x >= 0 && x < inter_pred_params->ref_frame_buf.width && y >= 0 &&
+             y < inter_pred_params->ref_frame_buf.height);
+
+        if (p0_available && !p1_available) {
+          mask[j] = AOM_BLEND_A64_MAX_ALPHA - DEFAULT_IMP_MSK_WT;
+        } else if (!p0_available && p1_available) {
+          mask[j] = DEFAULT_IMP_MSK_WT;
+        } else if (comp_data->type == COMPOUND_AVERAGE) {
+          mask[j] = AOM_BLEND_A64_MAX_ALPHA >> 1;
+        }
+      }
+      mask += mask_stride;
+    }
+  }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
   build_masked_compound_no_round(
       dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
       comp_data, sb_type, inter_pred_params->block_height,
       inter_pred_params->block_width, inter_pred_params);
+
+#if CONFIG_D071_IMP_MSK_BLD
+  // restore to previous state
+  inter_pred_params->conv_params.dst = org_dst;
+  inter_pred_params->conv_params.dst_stride = org_dst_stride;
+#endif  // CONFIG_D071_IMP_MSK_BLD
 }
 
+#if CONFIG_D071_IMP_MSK_BLD && CONFIG_OPTFLOW_REFINEMENT
+// Makes the interpredictor for the region by dividing it up into nxn blocks
+// and running the interpredictor code on each one.
+void make_inter_pred_of_nxn(uint16_t *dst, int dst_stride,
+                            int_mv *const mv_refined,
+                            InterPredParams *inter_pred_params, MACROBLOCKD *xd,
+                            int mi_x, int mi_y, int ref, uint16_t **mc_buf,
+                            CalcSubpelParamsFunc calc_subpel_params_func, int n,
+                            SubpelParams *subpel_params) {
+  int n_blocks = 0;
+  int w = inter_pred_params->orig_block_width;
+  int h = inter_pred_params->orig_block_height;
+  assert(w % n == 0);
+  assert(h % n == 0);
+  CONV_BUF_TYPE *orig_conv_dst = inter_pred_params->conv_params.dst;
+  inter_pred_params->block_width = n;
+  inter_pred_params->block_height = n;
+
+  uint16_t *pre;
+  int src_stride = 0;
+
+  // Process whole nxn blocks.
+  for (int j = 0; j <= h - n; j += n) {
+    for (int i = 0; i <= w - n; i += n) {
+      calc_subpel_params_func(&(mv_refined[n_blocks * 2 + ref].as_mv),
+                              inter_pred_params, xd, mi_x + i, mi_y + j, ref, 1,
+                              mc_buf, &pre, subpel_params, &src_stride);
+
+#if CONFIG_D071_IMP_MSK_BLD
+      int use_bacp = 0;
+      assert(inter_pred_params->mask_comp.type == COMPOUND_AVERAGE);
+      assert(inter_pred_params->comp_mode == UNIFORM_COMP);
+      int stored_do_average = inter_pred_params->conv_params.do_average;
+      InterCompMode stored_comp_mode = inter_pred_params->comp_mode;
+      uint8_t *stored_seg_mask = inter_pred_params->mask_comp.seg_mask;
+
+      if (inter_pred_params->border_data.enable_bacp) {
+        inter_pred_params->border_data.bacp_block_data[n_blocks * 2 + ref].x0 =
+            subpel_params->x0;
+        inter_pred_params->border_data.bacp_block_data[n_blocks * 2 + ref].x1 =
+            subpel_params->x1;
+        inter_pred_params->border_data.bacp_block_data[n_blocks * 2 + ref].y0 =
+            subpel_params->y0;
+        inter_pred_params->border_data.bacp_block_data[n_blocks * 2 + ref].y1 =
+            subpel_params->y1;
+        if (ref == 1) {
+          use_bacp = is_out_of_frame_block(
+              inter_pred_params, inter_pred_params->ref_frame_buf.width,
+              inter_pred_params->ref_frame_buf.height, n_blocks);
+
+          if (use_bacp &&
+              inter_pred_params->mask_comp.type == COMPOUND_AVERAGE) {
+            inter_pred_params->conv_params.do_average = 0;
+            inter_pred_params->comp_mode = MASK_COMP;
+            inter_pred_params->mask_comp.seg_mask = xd->seg_mask;
+          }
+        }
+      }
+
+      assert(IMPLIES(ref == 0, !use_bacp));
+      if (use_bacp) {
+        assert(inter_pred_params->comp_mode == MASK_COMP);
+        make_masked_inter_predictor(pre, src_stride, dst, dst_stride,
+                                    inter_pred_params, subpel_params, use_bacp,
+                                    n_blocks);
+
+      } else {
+#endif
+
+        av1_make_inter_predictor(pre, src_stride, dst, dst_stride,
+                                 inter_pred_params, subpel_params);
+#if CONFIG_D071_IMP_MSK_BLD
+      }
+
+      // Restored to original inter_pred_params
+      if (use_bacp && inter_pred_params->mask_comp.type == COMPOUND_AVERAGE) {
+        inter_pred_params->conv_params.do_average = stored_do_average;
+        inter_pred_params->comp_mode = stored_comp_mode;
+        inter_pred_params->mask_comp.seg_mask = stored_seg_mask;
+      }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+      n_blocks++;
+      dst += n;
+      inter_pred_params->conv_params.dst += n;
+      inter_pred_params->pix_col += n;
+    }
+    dst -= w;
+    inter_pred_params->conv_params.dst -= w;
+    inter_pred_params->pix_col -= w;
+
+    dst += n * dst_stride;
+    inter_pred_params->conv_params.dst +=
+        n * inter_pred_params->conv_params.dst_stride;
+    inter_pred_params->pix_row += n;
+  }
+
+  inter_pred_params->conv_params.dst = orig_conv_dst;
+}
+// Use a second pass of motion compensation to rebuild inter predictor
+void av1_opfl_rebuild_inter_predictor(
+    uint16_t *dst, int dst_stride, int plane, int_mv *const mv_refined,
+    InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
+    int ref, uint16_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func
+#if CONFIG_OPTFLOW_ON_TIP
+    ,
+    int use_4x4
+#endif  // CONFIG_OPTFLOW_ON_TIP
+) {
+  SubpelParams subpel_params;
+  int w = inter_pred_params->block_width;
+  int h = inter_pred_params->block_height;
+  int n = opfl_get_subblock_size(w, h, plane
+#if CONFIG_OPTFLOW_ON_TIP
+                                 ,
+                                 use_4x4
+#endif  // CONFIG_OPTFLOW_ON_TIP
+  );
+  make_inter_pred_of_nxn(dst, dst_stride, mv_refined, inter_pred_params, xd,
+                         mi_x, mi_y, ref, mc_buf, calc_subpel_params_func, n,
+                         &subpel_params);
+}
+#endif  // CONFIG_D071_IMP_MSK_BLD && CONFIG_OPTFLOW_REFINEMENT
+
 void av1_build_one_inter_predictor(
     uint16_t *dst, int dst_stride, const MV *const src_mv,
     InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
@@ -1501,13 +1894,52 @@
 #endif                       // CONFIG_OPTFLOW_REFINEMENT
                           mc_buf, &src, &subpel_params, &src_stride);
 
+#if CONFIG_D071_IMP_MSK_BLD
+  int use_bacp = 0;
+  int sub_block_id = 0;
+  if (inter_pred_params->border_data.enable_bacp) {
+    inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref].x0 =
+        subpel_params.x0;
+    inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref].x1 =
+        subpel_params.x1;
+    inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref].y0 =
+        subpel_params.y0;
+    inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref].y1 =
+        subpel_params.y1;
+    if (ref == 1) {
+      use_bacp = is_out_of_frame_block(
+          inter_pred_params, inter_pred_params->ref_frame_buf.width,
+          inter_pred_params->ref_frame_buf.height, sub_block_id);
+      if (use_bacp && inter_pred_params->mask_comp.type == COMPOUND_AVERAGE) {
+        inter_pred_params->conv_params.do_average = 0;
+        inter_pred_params->comp_mode = MASK_COMP;
+        inter_pred_params->mask_comp.seg_mask = xd->seg_mask;
+      }
+    }
+  }
+
+  assert(IMPLIES(ref == 0, !use_bacp));
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
   if (inter_pred_params->comp_mode == UNIFORM_SINGLE ||
       inter_pred_params->comp_mode == UNIFORM_COMP) {
     av1_make_inter_predictor(src, src_stride, dst, dst_stride,
                              inter_pred_params, &subpel_params);
+#if CONFIG_D071_IMP_MSK_BLD
+    assert(IMPLIES(use_bacp, ref == 0));
+    assert(use_bacp == 0);
+#endif  // CONFIG_D071_IMP_MSK_BLD
   } else {
     make_masked_inter_predictor(src, src_stride, dst, dst_stride,
-                                inter_pred_params, &subpel_params);
+                                inter_pred_params, &subpel_params
+#if CONFIG_D071_IMP_MSK_BLD
+                                ,
+                                use_bacp, 0
+#endif  // CONFIG_D071_IMP_MSK_BLD
+    );
+#if CONFIG_D071_IMP_MSK_BLD
+    assert(IMPLIES(inter_pred_params->border_data.enable_bacp, ref == 1));
+#endif  // CONFIG_D071_IMP_MSK_BLD
   }
 }
 
@@ -1595,7 +2027,12 @@
                              inter_pred_params, &subpel_params);
   } else {
     make_masked_inter_predictor(src, src_stride, dst, dst_stride,
-                                inter_pred_params, &subpel_params);
+                                inter_pred_params, &subpel_params
+#if CONFIG_D071_IMP_MSK_BLD
+                                ,
+                                0, 0
+#endif  // CONFIG_D071_IMP_MSK_BLD
+    );
   }
 
   int shift = 8;
@@ -1754,13 +2191,1088 @@
   }
 }
 
+#if CONFIG_REFINEMV
+// Padding if the pixel position falls outside of the defined reference area
+static void refinemv_highbd_pad_mc_border(const uint16_t *src, int src_stride,
+                                          uint16_t *dst, int dst_stride, int x0,
+                                          int y0, int b_w, int b_h,
+                                          const ReferenceArea *ref_area) {
+  // Get a pointer to the start of the real data for this row.
+  const uint16_t *ref_row = src - x0 - y0 * src_stride;
+
+  if (y0 >= ref_area->pad_block.y1)
+    ref_row += (ref_area->pad_block.y1 - 1) * src_stride;
+  else if (y0 >= ref_area->pad_block.y0)
+    ref_row += y0 * src_stride;
+  else
+    ref_row += ref_area->pad_block.y0 * src_stride;
+
+  do {
+    int right = 0, copy;
+    int left = x0 < ref_area->pad_block.x0 ? ref_area->pad_block.x0 - x0 : 0;
+
+    if (left > b_w) left = b_w;
+
+    if (x0 + b_w > ref_area->pad_block.x1)
+      right = x0 + b_w - ref_area->pad_block.x1;
+
+    if (right > b_w) right = b_w;
+
+    copy = b_w - left - right;
+
+    if (left) aom_memset16(dst, ref_row[0], left);
+
+    if (copy) memcpy(dst + left, ref_row + x0 + left, copy * sizeof(uint16_t));
+
+    if (right)
+      aom_memset16(dst + left + copy, ref_row[ref_area->pad_block.x1 - 1],
+                   right);
+
+    dst += dst_stride;
+    ++y0;
+
+    if (y0 > ref_area->pad_block.y0 && y0 < ref_area->pad_block.y1)
+      ref_row += src_stride;
+  } while (--b_h);
+}
+// check if padding is required during motion compensation
+// return 1 means reference pixel is outside of the reference range and padding
+// is required return 0 means no padding.
+int update_extend_mc_border_params(const struct scale_factors *const sf,
+                                   struct buf_2d *const pre_buf, MV32 scaled_mv,
+                                   PadBlock *block, int subpel_x_mv,
+                                   int subpel_y_mv, int do_warp, int is_intrabc,
+                                   int *x_pad, int *y_pad,
+                                   const ReferenceArea *ref_area) {
+  // Get reference width and height.
+  int frame_width = pre_buf->width;
+  int frame_height = pre_buf->height;
+
+  // Do border extension if there is motion or
+  // width/height is not a multiple of 8 pixels.
+#if CONFIG_OPTFLOW_REFINEMENT || CONFIG_TIP
+  // Extension is needed in optical flow refinement to obtain MV offsets
+  (void)scaled_mv;
+  if (!is_intrabc && !do_warp) {
+#else
+  const int is_scaled = av1_is_scaled(sf);
+  if ((!is_intrabc) && (!do_warp) &&
+      (is_scaled || scaled_mv.col || scaled_mv.row || (frame_width & 0x7) ||
+       (frame_height & 0x7))) {
+#endif  // CONFIG_OPTFLOW_REFINEMENT || CONFIG_TIP
+    if (subpel_x_mv || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
+      block->x0 -= AOM_INTERP_EXTEND - 1;
+      block->x1 += AOM_INTERP_EXTEND;
+      *x_pad = 1;
+    }
+
+    if (subpel_y_mv || (sf->y_step_q4 != SUBPEL_SHIFTS)) {
+      block->y0 -= AOM_INTERP_EXTEND - 1;
+      block->y1 += AOM_INTERP_EXTEND;
+      *y_pad = 1;
+    }
+
+    // Skip border extension if block is inside the frame.
+    if (block->x0 < 0 || block->x1 > frame_width - 1 || block->y0 < 0 ||
+        block->y1 > frame_height - 1) {
+      return 1;
+    }
+
+    if (ref_area) {
+      // Skip border extension if block is in the reference area.
+      if (block->x0 < ref_area->pad_block.x0 ||
+          block->x1 > ref_area->pad_block.x1 ||
+          block->y0 < ref_area->pad_block.y0 ||
+          block->y1 > ref_area->pad_block.y1) {
+        return 1;
+      }
+    }
+  }
+  return 0;
+};
+
+// perform padding of the motion compensated block if requires.
+// Padding is performed if the motion compensated block is partially out of the
+// reference area.
+static void refinemv_extend_mc_border(
+    const struct scale_factors *const sf, struct buf_2d *const pre_buf,
+    MV32 scaled_mv, PadBlock block, int subpel_x_mv, int subpel_y_mv,
+    int do_warp, int is_intrabc, uint16_t *paded_ref_buf,
+    int paded_ref_buf_stride, uint16_t **pre, int *src_stride,
+    const ReferenceArea *ref_area) {
+  int x_pad = 0, y_pad = 0;
+  if (update_extend_mc_border_params(sf, pre_buf, scaled_mv, &block,
+                                     subpel_x_mv, subpel_y_mv, do_warp,
+                                     is_intrabc, &x_pad, &y_pad, ref_area)) {
+    // printf(" Out of border \n");
+    // Get reference block pointer.
+    const uint16_t *const buf_ptr =
+        pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0;
+    int buf_stride = pre_buf->stride;
+    const int b_w = block.x1 - block.x0;
+    const int b_h = block.y1 - block.y0;
+
+    refinemv_highbd_pad_mc_border(buf_ptr, buf_stride, paded_ref_buf,
+                                  paded_ref_buf_stride, block.x0, block.y0, b_w,
+                                  b_h, ref_area);
+    *src_stride = paded_ref_buf_stride;
+    *pre = paded_ref_buf +
+           y_pad * (AOM_INTERP_EXTEND - 1) * paded_ref_buf_stride +
+           x_pad * (AOM_INTERP_EXTEND - 1);
+  }
+}
+
+#if CONFIG_TIP
+// Derive the sub-pixel related parameters of TIP blocks
+// Sub-pel related parameters are stored in the structures pointed by
+// "subpel_params" and "block"
+void tip_dec_calc_subpel_params(const MV *const src_mv,
+                                InterPredParams *const inter_pred_params,
+                                int mi_x, int mi_y, uint16_t **pre,
+                                SubpelParams *subpel_params, int *src_stride,
+                                PadBlock *block,
+#if CONFIG_OPTFLOW_REFINEMENT
+                                int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                                MV32 *scaled_mv, int *subpel_x_mv,
+                                int *subpel_y_mv) {
+  const struct scale_factors *sf = inter_pred_params->scale_factors;
+  struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf;
+
+#if CONFIG_REFINEMV
+  const int bw = inter_pred_params->original_pu_width;
+  const int bh = inter_pred_params->original_pu_height;
+#else
+#if CONFIG_OPTFLOW_REFINEMENT
+  // Use original block size to clamp MV and to extend block boundary
+  const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width
+                                        : inter_pred_params->block_width;
+  const int bh = use_optflow_refinement ? inter_pred_params->orig_block_height
+                                        : inter_pred_params->block_height;
+#else
+  const int bw = inter_pred_params->block_width;
+  const int bh = inter_pred_params->block_height;
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+#endif  // CONFIG_REFINEMV
+
+  const int is_scaled = av1_is_scaled(sf);
+  if (is_scaled) {
+    const int ssx = inter_pred_params->subsampling_x;
+    const int ssy = inter_pred_params->subsampling_y;
+    int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+    int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
+#if CONFIG_OPTFLOW_REFINEMENT
+    if (use_optflow_refinement) {
+      orig_pos_y += ROUND_POWER_OF_TWO_SIGNED(src_mv->row * (1 << SUBPEL_BITS),
+                                              MV_REFINE_PREC_BITS + ssy);
+      orig_pos_x += ROUND_POWER_OF_TWO_SIGNED(src_mv->col * (1 << SUBPEL_BITS),
+                                              MV_REFINE_PREC_BITS + ssx);
+    } else {
+      orig_pos_y += src_mv->row * (1 << (1 - ssy));
+      orig_pos_x += src_mv->col * (1 << (1 - ssx));
+    }
+#else
+    orig_pos_y += src_mv->row * (1 << (1 - ssy));
+    orig_pos_x += src_mv->col * (1 << (1 - ssx));
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+    int pos_y = sf->scale_value_y(orig_pos_y, sf);
+    int pos_x = sf->scale_value_x(orig_pos_x, sf);
+    pos_x += SCALE_EXTRA_OFF;
+    pos_y += SCALE_EXTRA_OFF;
+
+    const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
+    const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
+    const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
+                       << SCALE_SUBPEL_BITS;
+    const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
+    pos_y = clamp(pos_y, top, bottom);
+    pos_x = clamp(pos_x, left, right);
+
+    subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
+    subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+    subpel_params->xs = sf->x_step_q4;
+    subpel_params->ys = sf->y_step_q4;
+
+    // Get reference block top left coordinate.
+    block->x0 = pos_x >> SCALE_SUBPEL_BITS;
+    block->y0 = pos_y >> SCALE_SUBPEL_BITS;
+
+#if CONFIG_D071_IMP_MSK_BLD
+    block->x1 =
+        ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+         SCALE_SUBPEL_BITS) +
+        1;
+    block->y1 =
+        ((pos_y + (inter_pred_params->block_height - 1) * subpel_params->ys) >>
+         SCALE_SUBPEL_BITS) +
+        1;
+#else
+    // Get reference block bottom right coordinate.
+    block->x1 =
+        ((pos_x + (bw - 1) * subpel_params->xs) >> SCALE_SUBPEL_BITS) + 1;
+    block->y1 =
+        ((pos_y + (bh - 1) * subpel_params->ys) >> SCALE_SUBPEL_BITS) + 1;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
+    MV temp_mv;
+    temp_mv = tip_clamp_mv_to_umv_border_sb(inter_pred_params, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+                                            use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                                            inter_pred_params->subsampling_x,
+                                            inter_pred_params->subsampling_y);
+    *scaled_mv = av1_scale_mv(&temp_mv, mi_x, mi_y, sf);
+    scaled_mv->row += SCALE_EXTRA_OFF;
+    scaled_mv->col += SCALE_EXTRA_OFF;
+
+    *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
+    *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+  } else {
+    // Get block position in current frame.
+    int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
+    int pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+
+    const MV mv_q4 = tip_clamp_mv_to_umv_border_sb(
+        inter_pred_params, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+        use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+        inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+    subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS;
+    subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
+    subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
+
+    // Get reference block top left coordinate.
+    pos_x += mv_q4.col;
+    pos_y += mv_q4.row;
+    pos_x = (pos_x >> SUBPEL_BITS);
+    pos_y = (pos_y >> SUBPEL_BITS);
+    block->x0 = pos_x;
+    block->y0 = pos_y;
+
+    // Get reference block bottom right coordinate.
+#if CONFIG_D071_IMP_MSK_BLD
+    block->x1 = pos_x + inter_pred_params->block_width;
+    block->y1 = pos_y + inter_pred_params->block_height;
+#else
+    block->x1 = pos_x + bw;
+    block->y1 = pos_y + bh;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
+    scaled_mv->row = mv_q4.row;
+    scaled_mv->col = mv_q4.col;
+    *subpel_x_mv = scaled_mv->col & SUBPEL_MASK;
+    *subpel_y_mv = scaled_mv->row & SUBPEL_MASK;
+  }
+  *pre = pre_buf->buf0 + block->y0 * pre_buf->stride + block->x0;
+  *src_stride = pre_buf->stride;
+#if CONFIG_D071_IMP_MSK_BLD
+  if (inter_pred_params->border_data.enable_bacp) {
+    subpel_params->x0 = block->x0;
+    subpel_params->x1 = block->x1;
+    subpel_params->y0 = block->y0;
+    subpel_params->y1 = block->y1;
+  }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+}
+
+void tip_common_calc_subpel_params_and_extend(
+    const MV *const src_mv, InterPredParams *const inter_pred_params,
+    MACROBLOCKD *const xd, int mi_x, int mi_y, int ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+    int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+    uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params,
+    int *src_stride) {
+  (void)ref;
+  (void)mc_buf;
+  (void)xd;
+
+  PadBlock block;
+  MV32 scaled_mv;
+  int subpel_x_mv, subpel_y_mv;
+  assert(inter_pred_params->use_ref_padding);
+
+  tip_dec_calc_subpel_params(src_mv, inter_pred_params, mi_x, mi_y, pre,
+                             subpel_params, src_stride, &block,
+#if CONFIG_OPTFLOW_REFINEMENT
+                             use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                             &scaled_mv, &subpel_x_mv, &subpel_y_mv);
+
+  const int paded_ref_buf_stride =
+      inter_pred_params->ref_area->paded_ref_buf_stride;
+  refinemv_extend_mc_border(
+      inter_pred_params->scale_factors, &inter_pred_params->ref_frame_buf,
+      scaled_mv, block, subpel_x_mv, subpel_y_mv,
+      inter_pred_params->mode == WARP_PRED, inter_pred_params->is_intrabc,
+      &inter_pred_params->ref_area->paded_ref_buf[0], paded_ref_buf_stride, pre,
+      src_stride, inter_pred_params->ref_area);
+}
+#endif
+
+void dec_calc_subpel_params(const MV *const src_mv,
+                            InterPredParams *const inter_pred_params,
+                            const MACROBLOCKD *const xd, int mi_x, int mi_y,
+                            uint16_t **pre, SubpelParams *subpel_params,
+                            int *src_stride, PadBlock *block,
+#if CONFIG_OPTFLOW_REFINEMENT
+                            int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                            MV32 *scaled_mv, int *subpel_x_mv,
+                            int *subpel_y_mv) {
+  const struct scale_factors *sf = inter_pred_params->scale_factors;
+  struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf;
+
+#if CONFIG_REFINEMV
+  const int bw = inter_pred_params->original_pu_width;
+  const int bh = inter_pred_params->original_pu_height;
+#else
+
+#if CONFIG_OPTFLOW_REFINEMENT
+  // Use original block size to clamp MV and to extend block boundary
+  const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width
+                                        : inter_pred_params->block_width;
+  const int bh = use_optflow_refinement ? inter_pred_params->orig_block_height
+                                        : inter_pred_params->block_height;
+#else
+  const int bw = inter_pred_params->block_width;
+  const int bh = inter_pred_params->block_height;
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+#endif  // CONFIG_REFINEMV
+
+  const int is_scaled = av1_is_scaled(sf);
+  if (is_scaled) {
+    int ssx = inter_pred_params->subsampling_x;
+    int ssy = inter_pred_params->subsampling_y;
+    int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+    int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
+#if CONFIG_OPTFLOW_REFINEMENT
+    if (use_optflow_refinement) {
+      orig_pos_y += ROUND_POWER_OF_TWO_SIGNED(src_mv->row * (1 << SUBPEL_BITS),
+                                              MV_REFINE_PREC_BITS + ssy);
+      orig_pos_x += ROUND_POWER_OF_TWO_SIGNED(src_mv->col * (1 << SUBPEL_BITS),
+                                              MV_REFINE_PREC_BITS + ssx);
+    } else {
+      orig_pos_y += src_mv->row * (1 << (1 - ssy));
+      orig_pos_x += src_mv->col * (1 << (1 - ssx));
+    }
+#else
+    orig_pos_y += src_mv->row * (1 << (1 - ssy));
+    orig_pos_x += src_mv->col * (1 << (1 - ssx));
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+    int pos_y = sf->scale_value_y(orig_pos_y, sf);
+    int pos_x = sf->scale_value_x(orig_pos_x, sf);
+    pos_x += SCALE_EXTRA_OFF;
+    pos_y += SCALE_EXTRA_OFF;
+
+    const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy);
+    const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx);
+    const int bottom = (pre_buf->height + AOM_INTERP_EXTEND)
+                       << SCALE_SUBPEL_BITS;
+    const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS;
+    pos_y = clamp(pos_y, top, bottom);
+    pos_x = clamp(pos_x, left, right);
+
+    subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK;
+    subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
+    subpel_params->xs = sf->x_step_q4;
+    subpel_params->ys = sf->y_step_q4;
+
+    // Get reference block top left coordinate.
+    block->x0 = pos_x >> SCALE_SUBPEL_BITS;
+    block->y0 = pos_y >> SCALE_SUBPEL_BITS;
+
+    // Get reference block bottom right coordinate.
+    block->x1 =
+        ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+         SCALE_SUBPEL_BITS) +
+        1;
+    block->y1 =
+        ((pos_y + (inter_pred_params->block_height - 1) * subpel_params->ys) >>
+         SCALE_SUBPEL_BITS) +
+        1;
+
+    MV temp_mv;
+    temp_mv = clamp_mv_to_umv_border_sb(xd, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+                                        use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                                        inter_pred_params->subsampling_x,
+                                        inter_pred_params->subsampling_y);
+    *scaled_mv = av1_scale_mv(&temp_mv, mi_x, mi_y, sf);
+    scaled_mv->row += SCALE_EXTRA_OFF;
+    scaled_mv->col += SCALE_EXTRA_OFF;
+
+    *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK;
+    *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK;
+  } else {
+    // Get block position in current frame.
+    int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
+    int pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+
+    const MV mv_q4 = clamp_mv_to_umv_border_sb(
+        xd, src_mv, bw, bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+        use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+        inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+    subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS;
+    subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
+    subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
+
+    // Get reference block top left coordinate.
+    pos_x += mv_q4.col;
+    pos_y += mv_q4.row;
+    block->x0 = pos_x >> SUBPEL_BITS;
+    block->y0 = pos_y >> SUBPEL_BITS;
+
+    // Get reference block bottom right coordinate.
+    block->x1 =
+        (pos_x >> SUBPEL_BITS) + (inter_pred_params->block_width - 1) + 1;
+    block->y1 =
+        (pos_y >> SUBPEL_BITS) + (inter_pred_params->block_height - 1) + 1;
+
+    scaled_mv->row = mv_q4.row;
+    scaled_mv->col = mv_q4.col;
+    *subpel_x_mv = scaled_mv->col & SUBPEL_MASK;
+    *subpel_y_mv = scaled_mv->row & SUBPEL_MASK;
+  }
+  *pre = pre_buf->buf0 + block->y0 * pre_buf->stride + block->x0;
+  *src_stride = pre_buf->stride;
+
+#if CONFIG_D071_IMP_MSK_BLD
+  if (inter_pred_params->border_data.enable_bacp) {
+    subpel_params->x0 = block->x0;
+    subpel_params->x1 = block->x1;
+    subpel_params->y0 = block->y0;
+    subpel_params->y1 = block->y1;
+  }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+}
+
+void common_calc_subpel_params_and_extend(
+    const MV *const src_mv, InterPredParams *const inter_pred_params,
+    MACROBLOCKD *const xd, int mi_x, int mi_y, int ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+    int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+    uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params,
+    int *src_stride) {
+  (void)ref;
+  (void)mc_buf;
+
+  PadBlock block;
+  MV32 scaled_mv;
+  int subpel_x_mv, subpel_y_mv;
+  assert(inter_pred_params->use_ref_padding);
+  dec_calc_subpel_params(src_mv, inter_pred_params, xd, mi_x, mi_y, pre,
+                         subpel_params, src_stride, &block,
+#if CONFIG_OPTFLOW_REFINEMENT
+                         use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                         &scaled_mv, &subpel_x_mv, &subpel_y_mv);
+
+  // printf(" Use ref padding \n");
+  const int paded_ref_buf_stride =
+      inter_pred_params->ref_area->paded_ref_buf_stride;
+  refinemv_extend_mc_border(
+      inter_pred_params->scale_factors, &inter_pred_params->ref_frame_buf,
+      scaled_mv, block, subpel_x_mv, subpel_y_mv,
+      inter_pred_params->mode == WARP_PRED, inter_pred_params->is_intrabc,
+      &inter_pred_params->ref_area->paded_ref_buf[0], paded_ref_buf_stride, pre,
+      src_stride, inter_pred_params->ref_area);
+}
+
+static void get_ref_area_info(const MV *const src_mv,
+                              InterPredParams *const inter_pred_params,
+                              MACROBLOCKD *const xd, int mi_x, int mi_y,
+#if CONFIG_OPTFLOW_REFINEMENT
+                              int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                              uint16_t **pre, SubpelParams *subpel_params,
+                              int *src_stride, ReferenceArea *ref_area,
+                              int is_tip) {
+  PadBlock block;
+  MV32 scaled_mv;
+  int subpel_x_mv, subpel_y_mv;
+
+  if (is_tip) {
+    tip_dec_calc_subpel_params(src_mv, inter_pred_params, mi_x, mi_y, pre,
+                               subpel_params, src_stride, &block,
+#if CONFIG_OPTFLOW_REFINEMENT
+                               use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                               &scaled_mv, &subpel_x_mv, &subpel_y_mv);
+
+  } else {
+    dec_calc_subpel_params(src_mv, inter_pred_params, xd, mi_x, mi_y, pre,
+                           subpel_params, src_stride, &block,
+#if CONFIG_OPTFLOW_REFINEMENT
+                           use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                           &scaled_mv, &subpel_x_mv, &subpel_y_mv);
+  }
+
+  struct buf_2d *const pre_buf = &inter_pred_params->ref_frame_buf;
+  int frame_height = pre_buf->height;
+  int frame_width = pre_buf->width;
+  block.x0 -= REF_LEFT_BORDER;
+  block.x1 += REF_RIGHT_BORDER;
+  block.y0 -= REF_TOP_BORDER;
+  block.y1 += REF_BOTTOM_BORDER;
+
+  ref_area->pad_block.x0 = CLIP(block.x0, 0, frame_width - 1);
+  ref_area->pad_block.y0 = CLIP(block.y0, 0, frame_height - 1);
+  ref_area->pad_block.x1 = CLIP(block.x1, 0, frame_width);
+  ref_area->pad_block.y1 = CLIP(block.y1, 0, frame_height);
+}
+
+void av1_get_reference_area_with_padding(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                         int plane, MB_MODE_INFO *mi, int bw,
+                                         int bh, int mi_x, int mi_y,
+                                         ReferenceArea ref_area[2],
+                                         const int comp_pixel_x,
+                                         const int comp_pixel_y) {
+  const int is_tip = mi->ref_frame[0] == TIP_FRAME;
+  assert(IMPLIES(!is_tip, has_second_ref(mi)));
+  assert(!is_intrabc_block(mi, xd->tree_type));
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+
+  int row_start = 0;
+  int col_start = 0;
+  const int mi_row = -xd->mb_to_top_edge >> MI_SUBPEL_SIZE_LOG2;
+  const int mi_col = -xd->mb_to_left_edge >> MI_SUBPEL_SIZE_LOG2;
+  row_start = plane ? (mi->chroma_ref_info.mi_row_chroma_base - mi_row) : 0;
+  col_start = plane ? (mi->chroma_ref_info.mi_col_chroma_base - mi_col) : 0;
+
+  const int pre_x = is_tip
+                        ? comp_pixel_x
+                        : ((mi_x + MI_SIZE * col_start) >> pd->subsampling_x);
+  const int pre_y = is_tip
+                        ? comp_pixel_y
+                        : ((mi_y + MI_SIZE * row_start) >> pd->subsampling_y);
+
+  for (int ref = 0; ref < 2; ++ref) {
+    const struct scale_factors *const sf =
+        is_tip ? cm->tip_ref.ref_scale_factor[ref]
+               : xd->block_ref_scale_factors[ref];
+    const struct buf_2d *const pre_buf =
+        is_tip ? &cm->tip_ref.tip_plane[plane].pred[ref] : &pd->pre[ref];
+
+    // initialize the reference buffer
+    ref_area[ref].pad_block.x0 = 0;
+    ref_area[ref].pad_block.y0 = 0;
+    ref_area[ref].pad_block.x1 = cm->width;
+    ref_area[ref].pad_block.y1 = cm->height;
+    ref_area[ref].paded_ref_buf_stride = REF_BUFFER_WIDTH;
+
+    InterPredParams inter_pred_params;
+    av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
+                          pd->subsampling_x, pd->subsampling_y, xd->bd,
+                          mi->use_intrabc[0], sf, pre_buf,
+                          is_tip ? MULTITAP_SHARP : mi->interp_fltr);
+
+    inter_pred_params.original_pu_width = bw;
+    inter_pred_params.original_pu_height = bh;
+
+#if CONFIG_TIP
+    const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2);
+    const int height = (cm->mi_params.mi_rows << MI_SIZE_LOG2);
+    inter_pred_params.dist_to_top_edge = -GET_MV_SUBPEL(pre_y);
+    inter_pred_params.dist_to_bottom_edge = GET_MV_SUBPEL(height - bh - pre_y);
+    inter_pred_params.dist_to_left_edge = -GET_MV_SUBPEL(pre_x);
+    inter_pred_params.dist_to_right_edge = GET_MV_SUBPEL(width - bw - pre_x);
+#endif
+
+    SubpelParams subpel_params;
+    uint16_t *src;
+    int src_stride;
+
+    assert(!inter_pred_params.use_ref_padding);
+
+    MV *src_mv = ref == 0 ? &mi->mv[0].as_mv : &mi->mv[1].as_mv;
+    get_ref_area_info(src_mv, &inter_pred_params, xd, mi_x, mi_y,
+#if CONFIG_OPTFLOW_REFINEMENT
+                      0, /* use_optflow_refinement */
+#endif                   // CONFIG_OPTFLOW_REFINEMENT
+                      &src, &subpel_params, &src_stride, &ref_area[ref],
+                      is_tip);
+  }
+}
+
+int av1_refinemv_build_predictors_and_get_sad(
+    MACROBLOCKD *xd, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf,
+    CalcSubpelParamsFunc calc_subpel_params_func, uint16_t *dst_ref0,
+    uint16_t *dst_ref1, MV mv0, MV mv1, InterPredParams *inter_pred_params) {
+  for (int ref = 0; ref < 2; ref++) {
+    SubpelParams subpel_params;
+    uint16_t *src;
+    int src_stride;
+    uint16_t *dst_ref = ref == 0 ? dst_ref0 : dst_ref1;
+    MV *src_mv = ref == 0 ? &mv0 : &mv1;
+    calc_subpel_params_func(src_mv, &inter_pred_params[ref], xd, mi_x, mi_y,
+                            ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+                            0, /* use_optflow_refinement */
+#endif                         // CONFIG_OPTFLOW_REFINEMENT
+                            mc_buf, &src, &subpel_params, &src_stride);
+    assert(inter_pred_params[ref].comp_mode == UNIFORM_SINGLE ||
+           inter_pred_params[ref].comp_mode == UNIFORM_COMP);
+    av1_make_inter_predictor(src, src_stride, dst_ref, bw,
+                             &inter_pred_params[ref], &subpel_params);
+  }
+
+  return get_refinemv_sad(dst_ref0, dst_ref1, bw, bh, xd->bd);
+}
+void apply_mv_refinement(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
+                         MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y,
+                         uint16_t **mc_buf,
+                         CalcSubpelParamsFunc calc_subpel_params_func,
+                         int pre_x, int pre_y, uint16_t *dst_ref0,
+                         uint16_t *dst_ref1, MV *best_mv_ref, int pu_width,
+                         int pu_height) {
+  // initialize basemv as best MV
+  best_mv_ref[0] = mi->mv[0].as_mv;
+  best_mv_ref[1] = mi->mv[1].as_mv;
+
+  const MV center_mvs[2] = { best_mv_ref[0], best_mv_ref[1] };
+  assert(mi->refinemv_flag < REFINEMV_NUM_MODES);
+  assert(cm->seq_params.enable_refinemv);
+
+  // Generate MV independent inter_pred_params for both references
+  InterPredParams inter_pred_params[2];
+  for (int ref = 0; ref < 2; ref++) {
+    const int is_compound = 0;
+    const int is_intrabc = is_intrabc_block(mi, xd->tree_type);
+    const int is_tip = mi->ref_frame[0] == TIP_FRAME;
+
+    assert(is_intrabc == 0);
+    assert(plane == 0);
+    struct macroblockd_plane *const pd = &xd->plane[plane];
+    struct buf_2d *const dst_buf = &pd->dst;
+
+    const struct scale_factors *const sf =
+        is_tip ? cm->tip_ref.ref_scale_factor[ref]
+               : (is_intrabc ? &cm->sf_identity
+                             : xd->block_ref_scale_factors[ref]);
+    const struct buf_2d *const pre_buf =
+        is_tip ? &cm->tip_ref.tip_plane[plane].pred[ref]
+               : (is_intrabc ? dst_buf : &pd->pre[ref]);
+
+    av1_init_inter_params(&inter_pred_params[ref], bw, bh, pre_y, pre_x,
+                          pd->subsampling_x, pd->subsampling_y, xd->bd,
+                          mi->use_intrabc[0], sf, pre_buf, BILINEAR);
+
+#if CONFIG_REFINEMV
+    inter_pred_params[ref].original_pu_width = pu_width;
+    inter_pred_params[ref].original_pu_height = pu_height;
+#endif  // CONFIG_REFINEMV
+
+#if CONFIG_TIP
+    const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2);
+    const int height = (cm->mi_params.mi_rows << MI_SIZE_LOG2);
+    inter_pred_params[ref].dist_to_top_edge = -GET_MV_SUBPEL(pre_y);
+    inter_pred_params[ref].dist_to_bottom_edge =
+        GET_MV_SUBPEL(height - bh - pre_y);
+    inter_pred_params[ref].dist_to_left_edge = -GET_MV_SUBPEL(pre_x);
+    inter_pred_params[ref].dist_to_right_edge =
+        GET_MV_SUBPEL(width - bw - pre_x);
+#endif
+
+    inter_pred_params[ref].conv_params = get_conv_params_no_round(
+        0, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
+
+    assert(inter_pred_params[ref].mode == TRANSLATION_PRED);
+    assert(inter_pred_params[ref].comp_mode == UNIFORM_SINGLE);
+    assert(inter_pred_params[ref].conv_params.is_compound == 0);
+    assert(inter_pred_params[ref].conv_params.do_average == 0);
+    assert(mi->interinter_comp.type == COMPOUND_AVERAGE);
+  }
+
+#if !SINGLE_STEP_SEARCH
+  // Search integer-delta values
+  int search_range = 2;
+#endif
+
+  int switchable_refinemv_flags =
+      (mi->ref_frame[0] != TIP_FRAME) && switchable_refinemv_flag(cm, mi);
+  assert(mi->refinemv_flag);
+
+  // If we signal the refinemv_flags we do not select sad0
+  // Set sad0 a large value so that it does not be selected
+  int sad0 = switchable_refinemv_flags
+                 ? (INT32_MAX >> 1)
+                 : av1_refinemv_build_predictors_and_get_sad(
+                       xd, bw, bh, mi_x, mi_y, mc_buf, calc_subpel_params_func,
+                       dst_ref0, dst_ref1, center_mvs[0], center_mvs[1],
+                       inter_pred_params);
+
+  assert(IMPLIES(mi->ref_frame[0] == TIP_FRAME, bw == 8 && bh == 8));
+  if (mi->ref_frame[0] == TIP_FRAME) {
+    const int tip_sad_thres = bw * bh;
+    if (!switchable_refinemv_flags && sad0 < tip_sad_thres) return;
+  }
+
+  if (!switchable_refinemv_flags) {
+    int shift = 3;
+    int th = (bw * bh) << 1;
+    sad0 -= (sad0 >> shift);
+    assert(sad0 >= 0);
+    if (sad0 < th) return;
+  }
+
+  int min_sad = sad0;
+  MV refined_mv0, refined_mv1;
+  refined_mv0 = center_mvs[0];
+  refined_mv1 = center_mvs[1];
+  int et_sad_th = (bw * bh) << 1;
+
+#if !SINGLE_STEP_SEARCH
+  uint8_t already_searched[5][5];
+  for (int i = 0; i < 5; i++) {
+    for (int j = 0; j < 5; j++) {
+      already_searched[i][j] = 0;
+    }
+  }
+#endif
+
+  MV best_offset = { 0, 0 };
+
+#if SINGLE_STEP_SEARCH
+  const int num_neighbors = 24;
+  static const MV neighbors[24] = {
+    { -1, -1 }, { -1, 0 }, { -1, 1 }, { 0, 1 },   { 1, 1 },   { 1, 0 },
+    { 1, -1 },  { 0, -1 }, { 0, -2 }, { -1, -2 }, { -2, -2 }, { -2, -1 },
+    { -2, 0 },  { -2, 1 }, { -2, 2 }, { -1, 2 },  { 0, 2 },   { 1, 2 },
+    { 2, 2 },   { 2, 1 },  { 2, 0 },  { 2, -1 },  { 2, -2 },  { 1, -2 }
+
+  };
+
+#else
+  const int num_neighbors = 8;
+  // Apply two-step full pel refinement
+  static const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 },   { -1, 0 },
+                                   { 1, -1 }, { 1, 1 }, { -1, -1 }, { -1, 1 } };
+
+  const int num_iterations = search_range;
+  already_searched[0 + search_range][0 + search_range] =
+      1;  // center point is already searched before
+  for (int ite = 0; ite < num_iterations; ++ite) {
+#endif  // SINGLE_STEP_SEARCH
+
+  int best_idx = -1;
+
+  for (int idx = 0; idx < num_neighbors; ++idx) {
+    MV offset = { best_offset.row + neighbors[idx].row,
+                  best_offset.col + neighbors[idx].col };
+#if !SINGLE_STEP_SEARCH
+    if (already_searched[offset.row + search_range][offset.col + search_range])
+      continue;
+#endif
+    refined_mv0.row = center_mvs[0].row + 8 * offset.row;
+    refined_mv0.col = center_mvs[0].col + 8 * offset.col;
+    refined_mv1.row = center_mvs[1].row - 8 * offset.row;
+    refined_mv1.col = center_mvs[1].col - 8 * offset.col;
+
+    int this_sad = av1_refinemv_build_predictors_and_get_sad(
+        xd, bw, bh, mi_x, mi_y, mc_buf, calc_subpel_params_func, dst_ref0,
+        dst_ref1, refined_mv0, refined_mv1, inter_pred_params);
+
+#if !SINGLE_STEP_SEARCH
+    already_searched[offset.row + search_range][offset.col + search_range] = 1;
+#endif
+
+    if (this_sad < min_sad) {
+      min_sad = this_sad;
+      best_idx = idx;
+      // if the SAD is less than predefined threshold consider this candidate
+      // as good enough to skip rest of the search.
+      if (min_sad < et_sad_th) {
+        best_mv_ref[0] = refined_mv0;
+        best_mv_ref[1] = refined_mv1;
+        return;
+      }
+    }
+  }
+
+  // if the center is best, skip rest of the search.
+  if (best_idx == -1) {
+    best_mv_ref[0].row = center_mvs[0].row + 8 * best_offset.row;
+    best_mv_ref[0].col = center_mvs[0].col + 8 * best_offset.col;
+    best_mv_ref[1].row = center_mvs[1].row - 8 * best_offset.row;
+    best_mv_ref[1].col = center_mvs[1].col - 8 * best_offset.col;
+
+    return;
+  }
+
+  if (best_idx >= 0) {
+    best_offset.row += neighbors[best_idx].row;
+    best_offset.col += neighbors[best_idx].col;
+  }
+#if !SINGLE_STEP_SEARCH
+}
+#endif
+
+best_mv_ref[0].row = center_mvs[0].row + 8 * best_offset.row;
+best_mv_ref[0].col = center_mvs[0].col + 8 * best_offset.col;
+best_mv_ref[1].row = center_mvs[1].row - 8 * best_offset.row;
+best_mv_ref[1].col = center_mvs[1].col - 8 * best_offset.col;
+
+assert(min_sad <= sad0);
+
+assert(IMPLIES(switchable_refinemv_flags,
+               !(best_mv_ref[0].row == center_mvs[0].row &&
+                 best_mv_ref[0].col == center_mvs[0].col &&
+                 best_mv_ref[1].row == center_mvs[1].row &&
+                 best_mv_ref[1].col == center_mvs[1].col)));
+}
+
+static void build_inter_predictors_8x8_and_bigger_refinemv(
+    const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, MB_MODE_INFO *mi,
+    int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf,
+    CalcSubpelParamsFunc calc_subpel_params_func, uint16_t *dst, int dst_stride,
+    int pu_width, int pu_height, uint16_t *dst0_16_refinemv,
+    uint16_t *dst1_16_refinemv, int16_t *opt_gx0, int16_t *opt_gx1,
+    int row_start, int col_start, MV *sb_refined_mv, MV *chroma_refined_mv,
+    int build_for_refine_mv_only, ReferenceArea ref_area[2]) {
+  const int is_compound = has_second_ref(mi);
+  struct macroblockd_plane *const pd = &xd->plane[plane];
+  assert(!is_intrabc_block(mi, xd->tree_type));
+  assert(is_compound);
+  assert(!mi->bawp_flag);
+  assert(!build_for_obmc);
+  assert(!is_masked_compound_type(mi->interinter_comp.type));
+  assert(!is_tip_ref_frame(mi->ref_frame[0]));
+
+#if CONFIG_CWP
+  assert(mi->cwp_idx == CWP_EQUAL);
+#endif
+
+  int is_global[2] = { 0, 0 };
+  for (int ref = 0; ref < 1 + is_compound; ++ref) {
+#if CONFIG_TIP
+    if (!is_tip_ref_frame(mi->ref_frame[ref])) {
+#endif  // CONFIG_TIP
+      const WarpedMotionParams *const wm =
+          &xd->global_motion[mi->ref_frame[ref]];
+      is_global[ref] = is_global_mv_block(mi, wm->wmtype);
+#if CONFIG_TIP
+    }
+#endif  // CONFIG_TIP
+  }
+
+  assert(!is_global[0] && !is_global[1]);
+
+  const int pre_x = (mi_x + MI_SIZE * col_start) >> pd->subsampling_x;
+  const int pre_y = (mi_y + MI_SIZE * row_start) >> pd->subsampling_y;
+
+  int apply_refinemv = (plane == 0);
+
+  MV best_mv_ref[2] = { { mi->mv[0].as_mv.row, mi->mv[0].as_mv.col },
+                        { mi->mv[1].as_mv.row, mi->mv[1].as_mv.col } };
+  if (apply_refinemv) {
+    uint16_t *dst_ref0 = NULL, *dst_ref1 = NULL;
+    dst_ref0 = &dst0_16_refinemv[0];
+    dst_ref1 = &dst1_16_refinemv[0];
+
+    assert(IMPLIES(!mi->skip_mode,
+                   is_refinemv_allowed(cm, mi, mi->sb_type[PLANE_TYPE_Y])));
+    assert(IMPLIES(mi->skip_mode, is_refinemv_allowed_skip_mode(cm, mi)));
+    apply_mv_refinement(cm, xd, plane, mi, bw, bh, mi_x, mi_y, mc_buf,
+                        calc_subpel_params_func, pre_x, pre_y, dst_ref0,
+                        dst_ref1, best_mv_ref, pu_width, pu_height);
+    if (sb_refined_mv) {
+      // store the DMVR refined MV so that chroma can use it
+      sb_refined_mv[0] = best_mv_ref[0];
+      sb_refined_mv[1] = best_mv_ref[1];
+    }
+    assert(IMPLIES(plane, !build_for_refine_mv_only));
+    // if build_for_refine_mv_only is non-zero, we build only to get the
+    // refinemv values The actual prediction values are not necessary
+    if (build_for_refine_mv_only) {
+      return;
+    }
+  } else {
+    best_mv_ref[0] = chroma_refined_mv[0];
+    best_mv_ref[1] = chroma_refined_mv[1];
+  }
+
+#if CONFIG_OPTFLOW_REFINEMENT
+  int_mv mv_refined[2 * N_OF_OFFSETS];
+  const int use_optflow_refinement =
+      (mi->mode >= NEAR_NEARMV_OPTFLOW ||
+       (cm->features.opfl_refine_type == REFINE_ALL &&
+        mi->mode != GLOBAL_GLOBALMV &&
+        mi->interinter_comp.type == COMPOUND_AVERAGE)) &&
+      is_compound && is_opfl_refine_allowed(cm, mi);
+  assert(IMPLIES(use_optflow_refinement,
+                 cm->features.opfl_refine_type != REFINE_NONE));
+  assert(IMPLIES(use_optflow_refinement, !build_for_obmc));
+
+  // Optical flow refinement with masked comp types or with non-sharp
+  // interpolation filter should only exist in REFINE_ALL.
+  assert(IMPLIES(
+      use_optflow_refinement && mi->interinter_comp.type != COMPOUND_AVERAGE,
+      cm->features.opfl_refine_type == REFINE_ALL));
+  assert(IMPLIES(use_optflow_refinement && mi->interp_fltr != MULTITAP_SHARP,
+                 cm->features.opfl_refine_type == REFINE_ALL));
+
+  // Arrays to hold optical flow offsets.
+  int vx0[N_OF_OFFSETS] = { 0 };
+  int vx1[N_OF_OFFSETS] = { 0 };
+  int vy0[N_OF_OFFSETS] = { 0 };
+  int vy1[N_OF_OFFSETS] = { 0 };
+
+  // Pointers to gradient and dst buffers
+  int16_t *gx0, *gy0, *gx1, *gy1;
+  uint16_t *dst0 = NULL, *dst1 = NULL;
+
+  if (use_optflow_refinement && plane == 0) {
+    // Allocate gradient and dst buffers
+    // gx0 = aom_memalign(32, 2 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx0));
+    // gx1 = aom_memalign(32, 2 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx1));
+    gx0 = &opt_gx0[0];
+    gx1 = &opt_gx1[0];
+    gy0 = gx0 + (REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT);
+    gy1 = gx1 + (REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT);
+
+    // Initialize refined mv
+    const MV mv0 = best_mv_ref[0];
+    const MV mv1 = best_mv_ref[1];
+
+    for (int mvi = 0; mvi < N_OF_OFFSETS; mvi++) {
+      mv_refined[mvi * 2].as_mv = mv0;
+      mv_refined[mvi * 2 + 1].as_mv = mv1;
+    }
+    // Refine MV using optical flow. The final output MV will be in 1/16
+    // precision.
+    dst0 = &dst0_16_refinemv[0];
+    dst1 = &dst1_16_refinemv[0];
+    // dst0 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t));
+    // dst1 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t));
+
+    av1_get_optflow_based_mv_highbd(cm, xd, plane, mi, mv_refined, bw, bh, mi_x,
+                                    mi_y, mc_buf, calc_subpel_params_func, gx0,
+                                    gy0, gx1, gy1, vx0, vy0, vx1, vy1, dst0,
+                                    dst1
+#if CONFIG_OPTFLOW_ON_TIP
+                                    ,
+                                    1, 1
+#endif  // CONFIG_OPTFLOW_ON_TIP
+                                    ,
+                                    best_mv_ref, pu_width, pu_height);
+  }
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#if CONFIG_D071_IMP_MSK_BLD
+  BacpBlockData bacp_block_data[2 * N_OF_OFFSETS];
+  uint8_t use_bacp = !build_for_obmc && use_border_aware_compound(cm, mi) &&
+                     mi->cwp_idx == CWP_EQUAL &&
+                     cm->features.enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
+  for (int ref = 0; ref < 1 + is_compound; ++ref) {
+    const struct scale_factors *const sf = xd->block_ref_scale_factors[ref];
+    struct buf_2d *const pre_buf = &pd->pre[ref];
+
+    const MV mv = best_mv_ref[ref];
+    const WarpTypesAllowed warp_types = { is_global[ref],
+                                          is_warp_mode(mi->motion_mode) };
+    InterPredParams inter_pred_params;
+    av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
+                          pd->subsampling_x, pd->subsampling_y, xd->bd,
+                          mi->use_intrabc[0], sf, pre_buf, mi->interp_fltr);
+
+#if CONFIG_REFINEMV
+    inter_pred_params.use_ref_padding = 1;
+    inter_pred_params.ref_area = &ref_area[ref];
+#endif  // CONFIG_REFINEMV
+
+    inter_pred_params.original_pu_width = pu_width;
+    inter_pred_params.original_pu_height = pu_height;
+
+    if (is_compound) av1_init_comp_mode(&inter_pred_params);
+#if CONFIG_D071_IMP_MSK_BLD
+    inter_pred_params.border_data.enable_bacp = use_bacp;
+    inter_pred_params.border_data.bacp_block_data =
+        &bacp_block_data[0];  // Always point to the first ref
+#endif                        // CONFIG_D071_IMP_MSK_BLD
+    inter_pred_params.conv_params = get_conv_params_no_round(
+        ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
+
+    if (!build_for_obmc)
+      av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
+
+#if CONFIG_D071_IMP_MSK_BLD
+    if (is_compound) {
+      inter_pred_params.sb_type = mi->sb_type[PLANE_TYPE_Y];
+      inter_pred_params.mask_comp = mi->interinter_comp;
+    }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
+#if CONFIG_OPTFLOW_REFINEMENT
+    if (use_optflow_refinement && plane == 0) {
+      int n = opfl_get_subblock_size(bw, bh, plane
+#if CONFIG_OPTFLOW_ON_TIP
+                                     ,
+                                     1
+#endif  // CONFIG_OPTFLOW_ON_TIP
+      );
+      inter_pred_params.interp_filter_params[0] =
+          av1_get_interp_filter_params_with_block_size(mi->interp_fltr, n);
+
+      inter_pred_params.interp_filter_params[1] =
+          av1_get_interp_filter_params_with_block_size(mi->interp_fltr, n);
+
+      av1_opfl_rebuild_inter_predictor(dst, dst_stride, plane, mv_refined,
+                                       &inter_pred_params, xd, mi_x, mi_y, ref,
+                                       mc_buf, calc_subpel_params_func
+#if CONFIG_OPTFLOW_ON_TIP
+                                       ,
+                                       1
+#endif  // CONFIG_OPTFLOW_ON_TIP
+      );
+      continue;
+    }
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+
+    av1_build_one_inter_predictor(dst, dst_stride, &mv, &inter_pred_params, xd,
+                                  mi_x, mi_y, ref, mc_buf,
+                                  calc_subpel_params_func);
+  }
+
+#if CONFIG_PEF
+  if (use_optflow_refinement && plane == 0) {
+    enhance_prediction(cm, xd, plane, dst, dst_stride, bw, bh
+#if CONFIG_OPTFLOW_REFINEMENT
+                       ,
+                       mv_refined, use_optflow_refinement
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#if CONFIG_REFINEMV
+                       ,
+                       0, NULL
+#endif  // CONFIG_REFINEMV
+    );
+  }
+#endif  // CONFIG_PEF
+}
+
+#endif  // CONFIG_REFINEMV
+
 static void build_inter_predictors_8x8_and_bigger(
     const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, MB_MODE_INFO *mi,
 #if CONFIG_BAWP
     const BUFFER_SET *dst_orig,
 #endif  // CONFIG_BAWP
     int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf,
-    CalcSubpelParamsFunc calc_subpel_params_func) {
+    CalcSubpelParamsFunc calc_subpel_params_func
+#if CONFIG_REFINEMV
+    ,
+    int build_for_refine_mv_only
+#endif  // CONFIG_REFINEMV
+) {
   const int is_compound = has_second_ref(mi);
   const int is_intrabc = is_intrabc_block(mi, xd->tree_type);
   assert(IMPLIES(is_intrabc, !is_compound));
@@ -1768,6 +3280,115 @@
   struct buf_2d *const dst_buf = &pd->dst;
   uint16_t *const dst = dst_buf->buf;
 
+#if CONFIG_REFINEMV
+  assert(IMPLIES(mi->refinemv_flag, !is_intrabc));
+  assert(IMPLIES(mi->refinemv_flag && !build_for_obmc, is_compound));
+  assert(IMPLIES(
+      !build_for_obmc && mi->refinemv_flag && switchable_refinemv_flag(cm, mi),
+      mi->interinter_comp.type == COMPOUND_AVERAGE));
+  assert(IMPLIES(mi->refinemv_flag, mi->bawp_flag == 0));
+  assert(IMPLIES(mi->refinemv_flag, mi->interp_fltr == MULTITAP_SHARP));
+
+  int apply_sub_block_refinemv = mi->refinemv_flag && (!build_for_obmc) &&
+                                 !is_tip_ref_frame(mi->ref_frame[0]);
+
+  if (apply_sub_block_refinemv && default_refinemv_modes(mi))
+    apply_sub_block_refinemv &= (mi->comp_group_idx == 0 &&
+                                 mi->interinter_comp.type == COMPOUND_AVERAGE);
+
+  if (apply_sub_block_refinemv) {
+#if CONFIG_CWP
+    assert(IMPLIES(mi->refinemv_flag, mi->cwp_idx == CWP_EQUAL));
+#endif
+    int refinemv_sb_size_width =
+        AOMMIN((REFINEMV_SUBBLOCK_WIDTH >> pd->subsampling_x), bw);
+    int refinemv_sb_size_height =
+        AOMMIN(REFINEMV_SUBBLOCK_HEIGHT >> pd->subsampling_y, bh);
+    uint16_t
+        dst0_16_refinemv[REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT];
+    uint16_t
+        dst1_16_refinemv[REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT];
+    DECLARE_ALIGNED(
+        32, int16_t,
+        opt_gx0[2 * REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT]);
+    DECLARE_ALIGNED(
+        32, int16_t,
+        opt_gx1[2 * REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT]);
+
+    ReferenceArea ref_area[2];
+    av1_get_reference_area_with_padding(cm, xd, plane, mi, bw, bh, mi_x, mi_y,
+                                        ref_area, 0, 0);
+
+    int dst_stride = dst_buf->stride;
+    CONV_BUF_TYPE *tmp_conv_dst = xd->tmp_conv_dst;
+    assert(bw % refinemv_sb_size_width == 0);
+    assert(bh % refinemv_sb_size_height == 0);
+    for (int h = 0; h < bh; h += refinemv_sb_size_height) {
+      for (int w = 0; w < bw; w += refinemv_sb_size_width) {
+        dst_buf->buf = dst + h * dst_stride + w;
+        xd->tmp_conv_dst = tmp_conv_dst + h * MAX_SB_SIZE + w;
+
+        const int mi_row = -xd->mb_to_top_edge >> MI_SUBPEL_SIZE_LOG2;
+        const int mi_col = -xd->mb_to_left_edge >> MI_SUBPEL_SIZE_LOG2;
+        int row_start =
+            plane ? (mi->chroma_ref_info.mi_row_chroma_base - mi_row) : 0;
+        int col_start =
+            plane ? (mi->chroma_ref_info.mi_col_chroma_base - mi_col) : 0;
+        MV luma_refined_mv[2] = { { mi->mv[0].as_mv.row, mi->mv[0].as_mv.col },
+                                  { mi->mv[1].as_mv.row,
+                                    mi->mv[1].as_mv.col } };
+
+        MV chroma_refined_mv[2] = {
+          { mi->mv[0].as_mv.row, mi->mv[0].as_mv.col },
+          { mi->mv[1].as_mv.row, mi->mv[1].as_mv.col }
+        };
+
+        if (plane != 0) {
+          int luma_h = (h << pd->subsampling_y);
+          int luma_w = (w << pd->subsampling_x);
+          REFINEMV_SUBMB_INFO *refinemv_subinfo =
+              &xd->refinemv_subinfo[(luma_h >> MI_SIZE_LOG2) * MAX_MIB_SIZE +
+                                    (luma_w >> MI_SIZE_LOG2)];
+          chroma_refined_mv[0] = refinemv_subinfo->refinemv[0].as_mv;
+          chroma_refined_mv[1] = refinemv_subinfo->refinemv[1].as_mv;
+        }
+        // mi_x, and mi_y are the top-left position of the luma samples of the
+        // sub-block
+        build_inter_predictors_8x8_and_bigger_refinemv(
+            cm, xd, plane, mi, build_for_obmc, refinemv_sb_size_width,
+            refinemv_sb_size_height, mi_x + w * (1 << pd->subsampling_x),
+            mi_y + h * (1 << pd->subsampling_y), mc_buf,
+            calc_subpel_params_func, dst_buf->buf, dst_stride, bw, bh,
+            dst0_16_refinemv, dst1_16_refinemv, opt_gx0, opt_gx1, row_start,
+            col_start, plane == 0 ? luma_refined_mv : NULL, chroma_refined_mv,
+            build_for_refine_mv_only, ref_area);
+
+        if (plane == 0) {
+          REFINEMV_SUBMB_INFO *refinemv_subinfo =
+              &xd->refinemv_subinfo[(h >> MI_SIZE_LOG2) * MAX_MIB_SIZE +
+                                    (w >> MI_SIZE_LOG2)];
+          fill_subblock_refine_mv(refinemv_subinfo, refinemv_sb_size_width,
+                                  refinemv_sb_size_height, luma_refined_mv[0],
+                                  luma_refined_mv[1]);
+        }
+      }
+    }
+
+#if CONFIG_PEF
+    enhance_prediction(cm, xd, plane, dst, dst_stride, bw, bh
+#if CONFIG_OPTFLOW_REFINEMENT
+                       ,
+                       NULL, 0
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                       ,
+                       apply_sub_block_refinemv, &xd->refinemv_subinfo[0]);
+#endif  // CONFIG_PEF
+    dst_buf->buf = dst;
+    xd->tmp_conv_dst = tmp_conv_dst;
+    return;
+  }
+#endif  // CONFIG_REFINEMV
+
   int is_global[2] = { 0, 0 };
   for (int ref = 0; ref < 1 + is_compound; ++ref) {
 #if CONFIG_TIP
@@ -1791,17 +3412,23 @@
   }
   const int pre_x = (mi_x + MI_SIZE * col_start) >> pd->subsampling_x;
   const int pre_y = (mi_y + MI_SIZE * row_start) >> pd->subsampling_y;
-
+#if CONFIG_REFINEMV
+  MV best_mv_ref[2] = { { mi->mv[0].as_mv.row, mi->mv[0].as_mv.col },
+                        { mi->mv[1].as_mv.row, mi->mv[1].as_mv.col } };
+#endif  // CONFIG_REFINEMV
 #if CONFIG_OPTFLOW_REFINEMENT
   int_mv mv_refined[2 * N_OF_OFFSETS];
   const int use_optflow_refinement =
       (mi->mode >= NEAR_NEARMV_OPTFLOW ||
        (cm->features.opfl_refine_type == REFINE_ALL &&
         mi->mode != GLOBAL_GLOBALMV &&
+#if CONFIG_CWP
+        mi->cwp_idx == CWP_EQUAL &&
+#endif  // CONFIG_CWP
         mi->interinter_comp.type == COMPOUND_AVERAGE)) &&
       is_compound && is_opfl_refine_allowed(cm, mi);
   assert(IMPLIES(use_optflow_refinement,
-                 cm->features.opfl_refine_type == REFINE_SWITCHABLE));
+                 cm->features.opfl_refine_type != REFINE_NONE));
   assert(IMPLIES(use_optflow_refinement, !build_for_obmc));
 
   // Optical flow refinement with masked comp types or with non-sharp
@@ -1838,8 +3465,13 @@
     gy1 = g1_buf + MAX_SB_SQUARE;
 
     // Initialize refined mv
-    const MV mv0 = mi->mv[0].as_mv;
-    const MV mv1 = mi->mv[1].as_mv;
+#if CONFIG_REFINEMV
+    const MV mv0 = best_mv_ref[0];
+    const MV mv1 = best_mv_ref[1];
+#else
+      const MV mv0 = mi->mv[0].as_mv;
+      const MV mv1 = mi->mv[1].as_mv;
+#endif  // CONFIG_REFINEMV
     for (int mvi = 0; mvi < n_blocks; mvi++) {
       mv_refined[mvi * 2].as_mv = mv0;
       mv_refined[mvi * 2 + 1].as_mv = mv1;
@@ -1855,10 +3487,21 @@
                                     ,
                                     1, 1
 #endif  // CONFIG_OPTFLOW_ON_TIP
+#if CONFIG_REFINEMV
+                                    ,
+                                    best_mv_ref, bw, bh
+#endif  // CONFIG_REFINEMV
     );
   }
 #endif  // CONFIG_OPTFLOW_REFINEMENT
 
+#if CONFIG_D071_IMP_MSK_BLD
+  BacpBlockData bacp_block_data[2 * N_OF_OFFSETS];
+  uint8_t use_bacp = !build_for_obmc && use_border_aware_compound(cm, mi) &&
+                     mi->cwp_idx == CWP_EQUAL &&
+                     cm->features.enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
   for (int ref = 0; ref < 1 + is_compound; ++ref) {
     const struct scale_factors *const sf =
         is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref];
@@ -1872,15 +3515,31 @@
                           pd->subsampling_x, pd->subsampling_y, xd->bd,
                           mi->use_intrabc[0], sf, pre_buf, mi->interp_fltr);
     if (is_compound) av1_init_comp_mode(&inter_pred_params);
+#if CONFIG_D071_IMP_MSK_BLD
+    inter_pred_params.border_data.enable_bacp = use_bacp;
+    inter_pred_params.border_data.bacp_block_data =
+        &bacp_block_data[0];  // Always point to the first ref
+#endif                        // CONFIG_D071_IMP_MSK_BLD
+
     inter_pred_params.conv_params = get_conv_params_no_round(
         ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
 
     if (!build_for_obmc)
       av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
 
-    if (is_masked_compound_type(mi->interinter_comp.type)) {
+#if CONFIG_D071_IMP_MSK_BLD
+    if (is_compound) {
       inter_pred_params.sb_type = mi->sb_type[PLANE_TYPE_Y];
       inter_pred_params.mask_comp = mi->interinter_comp;
+    }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
+    if (is_masked_compound_type(mi->interinter_comp.type)) {
+#if !CONFIG_D071_IMP_MSK_BLD
+      inter_pred_params.sb_type = mi->sb_type[PLANE_TYPE_Y];
+      inter_pred_params.mask_comp = mi->interinter_comp;
+#endif  // !CONFIG_D071_IMP_MSK_BLD
+
       if (ref == 1) {
         inter_pred_params.conv_params.do_average = 0;
         inter_pred_params.comp_mode = MASK_COMP;
@@ -1889,6 +3548,18 @@
       inter_pred_params.mask_comp.seg_mask = xd->seg_mask;
     }
 
+#if CONFIG_CWP
+    if (ref == 1 && inter_pred_params.conv_params.do_average == 1) {
+      if (get_cwp_idx(mi) != CWP_EQUAL) {
+        int8_t weight = get_cwp_idx(mi);
+        assert(mi->cwp_idx >= CWP_MIN && mi->cwp_idx <= CWP_MAX);
+        inter_pred_params.conv_params.fwd_offset = weight;
+        inter_pred_params.conv_params.bck_offset =
+            (1 << CWP_WEIGHT_BITS) - weight;
+      }
+    }
+#endif  // CONFIG_CWP
+
 #if CONFIG_OPTFLOW_REFINEMENT
     if (use_optflow_refinement && plane == 0) {
       const int n = opfl_get_subblock_size(bw, bh, plane
@@ -1930,6 +3601,10 @@
                      ,
                      mv_refined, use_optflow_refinement
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+                     ,
+                     0, NULL
+#endif  // CONFIG_REFINEMV
   );
 #endif  // CONFIG_PEF
 }
@@ -1939,6 +3614,9 @@
 #if CONFIG_BAWP
                                 const BUFFER_SET *dst_orig,
 #endif
+#if CONFIG_REFINEMV
+                                int build_for_refine_mv_only,
+#endif  // CONFIG_REFINEMV
                                 int build_for_obmc, int bw, int bh, int mi_x,
                                 int mi_y, uint16_t **mc_buf,
                                 CalcSubpelParamsFunc calc_subpel_params_func) {
@@ -1946,7 +3624,12 @@
   // just for debugging purpose
   // Can be removed later on
   if (mi->mode == WARPMV) {
-    assert(mi->ref_mv_idx == 0);
+#if CONFIG_SEP_COMP_DRL
+    assert(mi->ref_mv_idx[0] == 0);
+    assert(mi->ref_mv_idx[1] == 0);
+#else
+      assert(mi->ref_mv_idx == 0);
+#endif  // CONFIG_SEP_COMP_DRL
     assert(mi->motion_mode == WARP_DELTA || mi->motion_mode == WARPED_CAUSAL);
   }
 #endif  // CONFIG_WARPMV
@@ -1963,7 +3646,12 @@
                                           dst_orig,
 #endif
                                           build_for_obmc, bw, bh, mi_x, mi_y,
-                                          mc_buf, calc_subpel_params_func);
+                                          mc_buf, calc_subpel_params_func
+#if CONFIG_REFINEMV
+                                          ,
+                                          build_for_refine_mv_only
+#endif  // CONFIG_REFINEMV
+    );
   }
 }
 
@@ -2061,7 +3749,7 @@
     return;
 
   foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_uint8_t_ptr,
-                                &mbmi->overlappable_neighbors[0]);
+                                &mbmi->overlappable_neighbors[0], true);
   if (mbmi->overlappable_neighbors[0]) return;
   foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_uint8_t_ptr,
                                &mbmi->overlappable_neighbors[1]);
@@ -2174,9 +3862,9 @@
 
   // handle above row
   struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
-  foreach_overlappable_nb_above(cm, xd,
-                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
-                                build_obmc_inter_pred_above, &ctxt_above);
+  foreach_overlappable_nb_above(
+      cm, xd, max_neighbor_obmc[mi_size_wide_log2[bsize]],
+      build_obmc_inter_pred_above, &ctxt_above, false);
 
   // handle left column
   struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
@@ -2387,6 +4075,18 @@
   return (above_mpp_flag + left_mpp_flag);
 }
 
+#if CONFIG_REFINEMV
+// Derive the context index for refinemv flag
+int av1_get_refinemv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                             BLOCK_SIZE bsize) {
+  (void)cm;
+  (void)bsize;
+  const MB_MODE_INFO *const mbmi = xd->mi[0];
+  if (mbmi->skip_mode) return 0;
+  return (1 + (mbmi->mode - NEAR_NEARMV));
+}
+#endif  // CONFIG_REFINEMV
+
 int av1_get_pb_mv_precision_down_context(const AV1_COMMON *cm,
                                          const MACROBLOCKD *xd) {
   (void)cm;
@@ -2480,7 +4180,11 @@
 }
 void set_precision_set(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
                        MB_MODE_INFO *mbmi, const BLOCK_SIZE bsize,
-                       uint8_t ref_mv_idx) {
+#if CONFIG_SEP_COMP_DRL
+                       int *ref_mv_idx) {
+#else
+                         uint8_t ref_mv_idx) {
+#endif  // CONFIG_SEP_COMP_DRL
   (void)bsize;
   (void)cm;
   (void)xd;
@@ -2527,5 +4231,22 @@
          cm->features.use_pb_mv_precision &&
          have_newmv_in_inter_mode(mbmi->mode);
 }
-
 #endif
+
+#if CONFIG_REFINEMV
+// Copy mv0 and mv1 to the sub-blocks
+// submi is the top-left corner of the sub-block need to fill
+// bw is the block width in the unit of pixel
+// bh is the block height in unit of pixel
+void fill_subblock_refine_mv(REFINEMV_SUBMB_INFO *refinemv_subinfo, int bw,
+                             int bh, MV mv0, MV mv1) {
+  const int stride = MAX_MIB_SIZE;
+  for (int y = 0; y < (bh >> MI_SIZE_LOG2); y++) {
+    for (int x = 0; x < (bw >> MI_SIZE_LOG2); x++) {
+      refinemv_subinfo[x].refinemv[0].as_mv = mv0;
+      refinemv_subinfo[x].refinemv[1].as_mv = mv1;
+    }
+    refinemv_subinfo += stride;
+  }
+}
+#endif  // CONFIG_REFINEMV
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h
index 18ea840..5a5a146 100644
--- a/av1/common/reconinter.h
+++ b/av1/common/reconinter.h
@@ -167,6 +167,13 @@
   int ys;
   int subpel_x;
   int subpel_y;
+#if CONFIG_D071_IMP_MSK_BLD
+  int x0;  // top left sample horizontal cood.
+  int y0;  // top left sample vertical cood.
+  int x1;  // x0 + bw
+  int y1;  // y0 + bh
+#endif     // CONFIG_D071_IMP_MSK_BLD
+
 } SubpelParams;
 
 struct build_prediction_ctxt {
@@ -179,6 +186,14 @@
   void *dcb;  // Decoder-only coding block.
 };
 
+#if CONFIG_REFINEMV
+#define REFINE_MV_MAX_OFFSET 1
+#define REF_TOP_BORDER (AOM_INTERP_EXTEND - 1 + REFINE_MV_MAX_OFFSET)
+#define REF_LEFT_BORDER (AOM_INTERP_EXTEND - 1 + REFINE_MV_MAX_OFFSET)
+#define REF_RIGHT_BORDER (AOM_INTERP_EXTEND + REFINE_MV_MAX_OFFSET)
+#define REF_BOTTOM_BORDER (AOM_INTERP_EXTEND + REFINE_MV_MAX_OFFSET)
+#endif  // CONFIG_REFINEMV
+
 typedef enum InterPredMode {
   TRANSLATION_PRED,
   WARP_PRED,
@@ -206,6 +221,15 @@
   int orig_block_width;
   int orig_block_height;
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#if CONFIG_REFINEMV
+  // In refinemV, the prediction is generated maximum 16x16 sub-block basis
+  // original_pu_width and  original_pu_height represents the width and height
+  // of the original block.
+  int original_pu_width;
+  int original_pu_height;
+#endif  // CONFIG_REFINEMV
+
   int pix_row;
   int pix_col;
   struct buf_2d ref_frame_buf;
@@ -226,6 +250,15 @@
   int dist_to_top_edge;    /*!< Distance from top edge */
   int dist_to_bottom_edge; /*!< Distance from bottom edge */
 #endif                     // CONFIG_TIP
+
+#if CONFIG_REFINEMV
+  int use_ref_padding;
+  ReferenceArea *ref_area;
+#endif  // CONFIG_REFINEMV
+
+#if CONFIG_D071_IMP_MSK_BLD
+  INTERINTER_COMPOUND_BORDER_DATA border_data;
+#endif  // CONFIG_D071_IMP_MSK_BLD
 } InterPredParams;
 
 #if CONFIG_OPTFLOW_REFINEMENT
@@ -300,15 +333,49 @@
 
 #if CONFIG_WARP_REF_LIST
 // Check if the signaling of the warp delta parameters are allowed
-static INLINE int allow_warp_parameter_signaling(const MB_MODE_INFO *mbmi) {
+static INLINE int allow_warp_parameter_signaling(
+#if CONFIG_CWG_D067_IMPROVED_WARP
+    const AV1_COMMON *const cm,
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+    const MB_MODE_INFO *mbmi) {
   return (
 #if CONFIG_WARPMV
       mbmi->mode != WARPMV &&
 #endif  // CONFIG_WARPMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      cm->features.allow_warpmv_mode &&
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
       mbmi->motion_mode == WARP_DELTA && mbmi->warp_ref_idx == 1);
 }
 #endif  // CONFIG_WARP_REF_LIST
 
+#if CONFIG_CWP
+// Map the index to weighting factor for compound weighted prediction
+static INLINE int get_cwp_coding_idx(int val, int encode,
+                                     const AV1_COMMON *const cm,
+                                     const MB_MODE_INFO *const mbmi) {
+  int is_same_side = 0;
+  int cur_ref_side = 0;
+  int other_ref_side = 0;
+  if (has_second_ref(mbmi)) {
+    cur_ref_side = cm->ref_frame_side[mbmi->ref_frame[0]];
+    other_ref_side = cm->ref_frame_side[mbmi->ref_frame[1]];
+
+    is_same_side = (cur_ref_side > 0 && other_ref_side > 0) ||
+                   (cur_ref_side == 0 && other_ref_side == 0);
+  }
+
+  if (encode) {
+    for (int i = 0; i < MAX_CWP_NUM; i++) {
+      if (cwp_weighting_factor[is_same_side][i] == val) return i;
+    }
+    return 0;
+  } else {
+    return cwp_weighting_factor[is_same_side][val];
+  }
+}
+#endif  // CONFIG_CWP
+
 #if CONFIG_ADAPTIVE_MVD
 static INLINE int enable_adaptive_mvd_resolution(const AV1_COMMON *const cm,
                                                  const MB_MODE_INFO *mbmi) {
@@ -474,10 +541,316 @@
 #if CONFIG_BAWP
                                 const BUFFER_SET *dst_orig,
 #endif
+#if CONFIG_REFINEMV
+                                int build_for_refine_mv_only,
+#endif  // CONFIG_REFINEMV
                                 int build_for_obmc, int bw, int bh, int mi_x,
                                 int mi_y, uint16_t **mc_buf,
                                 CalcSubpelParamsFunc calc_subpel_params_func);
 
+#if CONFIG_REFINEMV
+// Generate one prediction signal for a TIP block
+void tip_build_one_inter_predictor(
+    uint16_t *dst, int dst_stride, const MV *const src_mv,
+    InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
+    int ref, uint16_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func);
+
+// Compute the SAD between the two predictors when refinemv is ON
+int get_refinemv_sad(uint16_t *src1, uint16_t *src2, int width, int height,
+                     int bd);
+// Genrate two prediction signals and compute SAD of a given mv0 and mv1
+int av1_refinemv_build_predictors_and_get_sad(
+    MACROBLOCKD *xd, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf,
+    CalcSubpelParamsFunc calc_subpel_params_func, uint16_t *dst_ref0,
+    uint16_t *dst_ref1, MV mv0, MV mv1, InterPredParams *inter_pred_params);
+
+// Get the context index to code refinemv flag
+int av1_get_refinemv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                             BLOCK_SIZE bsize);
+
+// Full blocks refine MVs are stored in 4x4 grid so that the MVs can be reused
+// for chroma
+void fill_subblock_refine_mv(REFINEMV_SUBMB_INFO *refinemv_subinfo, int bw,
+                             int bh, MV mv0, MV mv1);
+
+// Generate the reference area ( bounding box) based on the signaled MV
+void av1_get_reference_area_with_padding(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                         int plane, MB_MODE_INFO *mi, int bw,
+                                         int bh, int mi_x, int mi_y,
+                                         ReferenceArea ref_area[2],
+                                         const int comp_pixel_x,
+                                         const int comp_pixel_y);
+
+// Derive the sub-pixel related parameters of TIP blocks
+// Sub-pel related parameters are stored in the structures pointed by
+// "subpel_params" and "block"
+void tip_dec_calc_subpel_params(const MV *const src_mv,
+                                InterPredParams *const inter_pred_params,
+                                int mi_x, int mi_y, uint16_t **pre,
+                                SubpelParams *subpel_params, int *src_stride,
+                                PadBlock *block,
+#if CONFIG_OPTFLOW_REFINEMENT
+                                int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                                MV32 *scaled_mv, int *subpel_x_mv,
+                                int *subpel_y_mv);
+
+// Derive the sub-pixel related parameters of non-TIP blocks
+// Sub-pel related parameters are stored in the structures pointed by
+// "subpel_params" and "block"
+void dec_calc_subpel_params(const MV *const src_mv,
+                            InterPredParams *const inter_pred_params,
+                            const MACROBLOCKD *const xd, int mi_x, int mi_y,
+                            uint16_t **pre, SubpelParams *subpel_params,
+                            int *src_stride, PadBlock *block,
+#if CONFIG_OPTFLOW_REFINEMENT
+                            int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+                            MV32 *scaled_mv, int *subpel_x_mv,
+                            int *subpel_y_mv);
+
+// check if the refinemv mode is allwed for a given blocksize
+static INLINE int is_refinemv_allowed_bsize(BLOCK_SIZE bsize) {
+  assert(bsize < BLOCK_SIZES_ALL);
+  return (block_size_wide[bsize] >= 16 || block_size_high[bsize] >= 16);
+}
+
+// check if the refinemv mode is allwed for a given mode and precision
+static INLINE int is_refinemv_allowed_mode_precision(
+    PREDICTION_MODE mode, MvSubpelPrecision precision,
+    const AV1_COMMON *const cm) {
+  (void)precision;
+  if (mode == GLOBAL_GLOBALMV) return 0;
+  if (cm->features.opfl_refine_type == REFINE_SWITCHABLE &&
+      (mode == JOINT_NEWMV || mode == JOINT_AMVDNEWMV || mode == NEAR_NEWMV ||
+       mode == NEW_NEARMV || mode == NEW_NEWMV))
+    return 0;
+  return (mode >= NEAR_NEARMV && mode <= JOINT_AMVDNEWMV_OPTFLOW);
+}
+// check if the prediction mode infered to refimemv to always 1.
+static INLINE int default_refinemv_modes(const MB_MODE_INFO *mbmi) {
+  return (mbmi->skip_mode || mbmi->mode == NEAR_NEARMV ||
+          mbmi->mode == NEAR_NEARMV_OPTFLOW ||
+          mbmi->mode == JOINT_NEWMV_OPTFLOW);
+}
+// Check if the compound and equal distance references
+static INLINE int is_refinemv_allowed_reference(const AV1_COMMON *cm,
+                                                const MB_MODE_INFO *mbmi) {
+  if (!cm->seq_params.enable_refinemv) return 0;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const unsigned int cur_index = cm->cur_frame->display_order_hint;
+#else
+  const unsigned int cur_index = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  int d0, d1;
+  int is_tip = (mbmi->ref_frame[0] == TIP_FRAME);
+
+  if (is_tip) {
+    d0 = cm->tip_ref.ref_offset[0];
+    d1 = cm->tip_ref.ref_offset[1];
+  } else {
+    if (!mbmi->ref_frame[1]) return 0;
+    const RefCntBuffer *const ref0 = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
+    const RefCntBuffer *const ref1 = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    d0 = get_relative_dist(&cm->seq_params.order_hint_info, cur_index,
+                           ref0->display_order_hint);
+    d1 = get_relative_dist(&cm->seq_params.order_hint_info, cur_index,
+                           ref1->display_order_hint);
+#else
+    d0 = (int)cur_index - (int)ref0->order_hint;
+    d1 = (int)cur_index - (int)ref1->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  }
+
+  // reference frame has to be both sides to apply dmvr
+  if (!((d0 <= 0) ^ (d1 <= 0))) return 0;
+
+  // Current implementation only supports when both has the same distance
+  if (abs(d0) != abs(d1)) return 0;
+
+  return 1;
+}
+
+// check if the refinemv mode is allowed for a given block
+static INLINE int is_refinemv_allowed(const AV1_COMMON *const cm,
+                                      const MB_MODE_INFO *mbmi,
+                                      BLOCK_SIZE bsize) {
+  if (!cm->seq_params.enable_refinemv ||
+      cm->superres_scale_denominator != SCALE_NUMERATOR)
+    return 0;
+  int is_tip = is_tip_ref_frame(mbmi->ref_frame[0]);
+  if (is_tip) return 0;
+  assert(!mbmi->skip_mode);
+  int is_compound = has_second_ref(mbmi);
+  return is_compound && is_refinemv_allowed_bsize(bsize) &&
+         is_refinemv_allowed_mode_precision(mbmi->mode, mbmi->pb_mv_precision,
+                                            cm) &&
+         is_refinemv_allowed_reference(cm, mbmi);
+}
+
+// check if the refinemv mode is allowed for a given block for TIP mode
+static INLINE int is_refinemv_allowed_tip_blocks(const AV1_COMMON *const cm,
+                                                 const MB_MODE_INFO *mbmi) {
+  assert(is_tip_ref_frame(mbmi->ref_frame[0]));
+  return cm->seq_params.enable_refinemv &&
+         cm->superres_scale_denominator == SCALE_NUMERATOR &&
+         is_refinemv_allowed_reference(cm, mbmi);
+}
+
+// check if the refinemv mode is allowed for a given block for skip mode
+static INLINE int is_refinemv_allowed_skip_mode(const AV1_COMMON *const cm,
+                                                const MB_MODE_INFO *mbmi) {
+  assert(mbmi->skip_mode);
+  return cm->seq_params.enable_refinemv &&
+         cm->superres_scale_denominator == SCALE_NUMERATOR &&
+         is_refinemv_allowed_bsize(mbmi->sb_type[PLANE_TYPE_Y]) &&
+         is_refinemv_allowed_reference(cm, mbmi);
+}
+static INLINE int get_default_refinemv_flag(const AV1_COMMON *const cm,
+                                            const MB_MODE_INFO *mbmi) {
+  if (!cm->seq_params.enable_refinemv ||
+      cm->superres_scale_denominator != SCALE_NUMERATOR)
+    return 0;
+  int is_refinemv =
+      (mbmi->skip_mode
+           ? is_refinemv_allowed_skip_mode(cm, mbmi)
+           : is_refinemv_allowed(cm, mbmi, mbmi->sb_type[PLANE_TYPE_Y]));
+  if (is_refinemv) {
+    if (default_refinemv_modes(mbmi)) return 1;
+  }
+  return 0;
+}
+
+// check if the refinemv mode is switchable for a given block
+static INLINE int switchable_refinemv_flag(const AV1_COMMON *const cm,
+                                           const MB_MODE_INFO *mbmi) {
+  if (!cm->seq_params.enable_refinemv) return 0;
+  int is_refinemv =
+      (mbmi->skip_mode
+           ? is_refinemv_allowed_skip_mode(cm, mbmi)
+           : is_refinemv_allowed(cm, mbmi, mbmi->sb_type[PLANE_TYPE_Y]));
+  if (is_refinemv && !is_tip_ref_frame(mbmi->ref_frame[0])) {
+    if (default_refinemv_modes(mbmi)) return 0;
+    return 1;
+  }
+
+  return 0;
+}
+
+// Precision of refined MV returned, 0 being integer pel. For now, only 1/8 or
+// 1/16-pel can be used.
+#define MV_REFINE_PREC_BITS 4  // (1/16-pel)
+
+// Clamp MV to UMV border based on its distance to left/right/top/bottom edge
+static AOM_INLINE MV tip_clamp_mv_to_umv_border_sb(
+    InterPredParams *const inter_pred_params, const MV *src_mv, int bw, int bh,
+#if CONFIG_OPTFLOW_REFINEMENT
+    int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+    int ss_x, int ss_y) {
+  // If the MV points so far into the UMV border that no visible pixels
+  // are used for reconstruction, the subpel part of the MV can be
+  // discarded and the MV limited to 16 pixels with equivalent results.
+  const int spel_left = (AOM_INTERP_EXTEND + bw) << SUBPEL_BITS;
+  const int spel_right = spel_left - SUBPEL_SHIFTS;
+  const int spel_top = (AOM_INTERP_EXTEND + bh) << SUBPEL_BITS;
+  const int spel_bottom = spel_top - SUBPEL_SHIFTS;
+#if CONFIG_OPTFLOW_REFINEMENT
+  MV clamped_mv;
+  if (use_optflow_refinement) {
+    // optflow refinement always returns MVs with 1/16 precision so it is not
+    // necessary to shift the MV before clamping
+    // Here it should be:
+    // clamped_mv.row = (int16_t)ROUND_POWER_OF_TWO_SIGNED(
+    //     src_mv->row * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + ss_y);
+    // But currently SUBPEL_BITS == MV_REFINE_PREC_BITS
+    assert(SUBPEL_BITS == MV_REFINE_PREC_BITS);
+
+    if (ss_y || ss_x) {
+      clamped_mv.row = (int16_t)ROUND_POWER_OF_TWO_SIGNED(
+          src_mv->row * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + ss_y);
+      clamped_mv.col = (int16_t)ROUND_POWER_OF_TWO_SIGNED(
+          src_mv->col * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + ss_x);
+    } else {
+      clamped_mv = *src_mv;
+    }
+  } else {
+    clamped_mv.row = (int16_t)(src_mv->row * (1 << (1 - ss_y)));
+    clamped_mv.col = (int16_t)(src_mv->col * (1 << (1 - ss_x)));
+  }
+#else
+  MV clamped_mv = { (int16_t)(src_mv->row * (1 << (1 - ss_y))),
+                    (int16_t)(src_mv->col * (1 << (1 - ss_x))) };
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+  assert(ss_x <= 1);
+  assert(ss_y <= 1);
+  const SubpelMvLimits mv_limits = {
+    inter_pred_params->dist_to_left_edge * (1 << (1 - ss_x)) - spel_left,
+    inter_pred_params->dist_to_right_edge * (1 << (1 - ss_x)) + spel_right,
+    inter_pred_params->dist_to_top_edge * (1 << (1 - ss_y)) - spel_top,
+    inter_pred_params->dist_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom
+  };
+
+  clamp_mv(&clamped_mv, &mv_limits);
+
+  return clamped_mv;
+}
+
+// This function conduct the SAD search between two predictors and find the best
+// MVs
+void apply_mv_refinement(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
+                         MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y,
+                         uint16_t **mc_buf,
+                         CalcSubpelParamsFunc calc_subpel_params_func,
+                         int pre_x, int pre_y, uint16_t *dst_ref0,
+                         uint16_t *dst_ref1, MV *best_mv_ref, int pu_width,
+                         int pu_height);
+
+// check if padding is required during motion compensation
+// return 1 means reference pixel is outside of the reference range and padding
+// is required return 0 means no padding.
+int update_extend_mc_border_params(const struct scale_factors *const sf,
+                                   struct buf_2d *const pre_buf, MV32 scaled_mv,
+                                   PadBlock *block, int subpel_x_mv,
+                                   int subpel_y_mv, int do_warp, int is_intrabc,
+                                   int *x_pad, int *y_pad,
+                                   const ReferenceArea *ref_area);
+
+// Derive the sub-pixel related parameters of refinemv non-TIP blocks
+// Sub-pel related parameters are stored in the structures pointed by
+// "subpel_params" Also do padding if required This function is used for both
+// encoder and decoder
+void common_calc_subpel_params_and_extend(
+    const MV *const src_mv, InterPredParams *const inter_pred_params,
+    MACROBLOCKD *const xd, int mi_x, int mi_y, int ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+    int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+    uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params,
+    int *src_stride);
+
+// Derive the sub-pixel related parameters of refinemv TIP blocks
+// Sub-pel related parameters are stored in the structures pointed by
+// "subpel_params" Also do padding if required This function is used for both
+// encoder and decoder
+void tip_common_calc_subpel_params_and_extend(
+    const MV *const src_mv, InterPredParams *const inter_pred_params,
+    MACROBLOCKD *const xd, int mi_x, int mi_y, int ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+    int use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+    uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params,
+    int *src_stride);
+#endif  // CONFIG_REFINEMV
+
+#if CONFIG_REFINEMV || CONFIG_OPTFLOW_ON_TIP
+
+unsigned int get_highbd_sad(const uint16_t *src_ptr, int source_stride,
+                            const uint16_t *ref_ptr, int ref_stride, int bd,
+                            int bw, int bh);
+#endif  // CONFIG_REFINEMV || CONFIG_OPTFLOW_ON_TIP
+
 #if CONFIG_OPTFLOW_REFINEMENT
 // This parameter k=OPFL_DIST_RATIO_THR is used to prune MV refinement for the
 // case where d0 and d1 are very different. Assuming a = max(|d0|, |d1|) and
@@ -497,9 +870,11 @@
 #define OPFL_COV_CLAMP_BITS 28
 #define OPFL_COV_CLAMP_VAL (1 << OPFL_COV_CLAMP_BITS)
 
+#if !CONFIG_REFINEMV
 // Precision of refined MV returned, 0 being integer pel. For now, only 1/8 or
 // 1/16-pel can be used.
 #define MV_REFINE_PREC_BITS 4  // (1/16-pel)
+#endif                         //! CONFIG_REFINEMV
 void av1_opfl_mv_refinement_highbd(const uint16_t *p0, int pstride0,
                                    const uint16_t *p1, int pstride1,
                                    const int16_t *gx0, const int16_t *gy0,
@@ -512,14 +887,23 @@
     const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
     int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf,
     InterPredParams *inter_pred_params,
-    CalcSubpelParamsFunc calc_subpel_params_func, int ref, uint16_t *pred_dst);
+    CalcSubpelParamsFunc calc_subpel_params_func, int ref, uint16_t *pred_dst
+#if CONFIG_REFINEMV
+    ,
+    const MV *const src_mv, int pu_width, int pu_height
+#endif  // CONFIG_REFINEMV
+);
 
 static INLINE int is_opfl_refine_allowed(const AV1_COMMON *cm,
                                          const MB_MODE_INFO *mbmi) {
   if (cm->seq_params.enable_opfl_refine == AOM_OPFL_REFINE_NONE ||
       cm->features.opfl_refine_type == REFINE_NONE)
     return 0;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const unsigned int cur_index = cm->cur_frame->display_order_hint;
+#else
   const unsigned int cur_index = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int d0, d1;
 #if CONFIG_OPTFLOW_ON_TIP
   if (mbmi->ref_frame[0] == TIP_FRAME) {
@@ -530,8 +914,15 @@
     if (!mbmi->ref_frame[1]) return 0;
     const RefCntBuffer *const ref0 = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
     const RefCntBuffer *const ref1 = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
-    d0 = (int)cur_index - (int)ref0->order_hint;
-    d1 = (int)cur_index - (int)ref1->order_hint;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    d0 = get_relative_dist(&cm->seq_params.order_hint_info, cur_index,
+                           ref0->display_order_hint);
+    d1 = get_relative_dist(&cm->seq_params.order_hint_info, cur_index,
+                           ref1->display_order_hint);
+#else
+  d0 = (int)cur_index - (int)ref0->order_hint;
+  d1 = (int)cur_index - (int)ref1->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
 #if CONFIG_OPTFLOW_ON_TIP
   }
 #endif  // CONFIG_OPTFLOW_ON_TIP
@@ -553,6 +944,10 @@
     ,
     int do_pred, int use_4x4
 #endif  // CONFIG_OPTFLOW_ON_TIP
+#if CONFIG_REFINEMV
+    ,
+    MV *best_mv_ref, int pu_width, int pu_height
+#endif  // CONFIG_REFINEMV
 );
 
 // With the refined MVs, generate the inter prediction for the block.
@@ -669,6 +1064,30 @@
   return clamped_mv;
 }
 
+#if CONFIG_D071_IMP_MSK_BLD
+void make_masked_inter_predictor(const uint16_t *pre, int pre_stride,
+                                 uint16_t *dst, int dst_stride,
+                                 InterPredParams *inter_pred_params,
+                                 const SubpelParams *subpel_params,
+                                 int use_bacp, int sub_block_id);
+
+static INLINE int use_border_aware_compound(const AV1_COMMON *cm,
+                                            const MB_MODE_INFO *mbmi) {
+  if (is_masked_compound_type(mbmi->interinter_comp.type) ||
+      mbmi->mode == GLOBAL_GLOBALMV)
+    return 0;
+
+  (void)cm;
+  return has_second_ref(mbmi) &&
+         (mbmi->mode >= COMP_INTER_MODE_START &&
+          mbmi->mode < COMP_INTER_MODE_END) &&
+         (mbmi->interinter_comp.type == COMPOUND_DIFFWTD ||
+          mbmi->interinter_comp.type == COMPOUND_AVERAGE);
+}
+int is_out_of_frame_block(InterPredParams const *inter_pred_params,
+                          int frame_width, int frame_height, int sub_block_id);
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
 static INLINE int64_t scaled_buffer_offset(int x_offset, int y_offset,
                                            int stride,
                                            const struct scale_factors *sf) {
@@ -725,7 +1144,11 @@
 #endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if CONFIG_OPTFLOW_REFINEMENT
   mbmi->interp_fltr =
-      (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi))
+      (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)
+#if CONFIG_REFINEMV
+       || mbmi->refinemv_flag
+#endif  // CONFIG_REFINEMV
+       )
           ? MULTITAP_SHARP
           : av1_unswitchable_filter(frame_interp_filter);
 #else
@@ -748,6 +1171,12 @@
   if (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi))
     return 0;
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#if CONFIG_REFINEMV
+  // No interpolation filter search when MV refinement is used.
+  if (mbmi->refinemv_flag) return 0;
+#endif  // CONFIG_REFINEMV
+
   if (is_warp_mode(mbmi->motion_mode)) return 0;
   if (is_nontrans_global_motion(xd, xd->mi[0])) return 0;
   return 1;
@@ -790,6 +1219,13 @@
 const uint8_t *av1_get_compound_type_mask(
     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type);
 
+#if CONFIG_CWP
+// Init the masks for compound weighted prediction
+void init_cwp_masks();
+// Get the mask for compound weighted prediction
+const int8_t *av1_get_cwp_mask(int list_idx, int idx);
+#endif  // CONFIG_CWP
+
 // build interintra_predictors for one plane
 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
                                     uint16_t *pred, int stride,
@@ -852,7 +1288,11 @@
 // Set the precision set of the block. Currently, the value is 0.
 void set_precision_set(const AV1_COMMON *const cm, MACROBLOCKD *const xd,
                        MB_MODE_INFO *mbmi, const BLOCK_SIZE bsize,
+#if CONFIG_SEP_COMP_DRL
+                       int *ref_mv_idx);
+#else
                        uint8_t ref_mv_idx);
+#endif  // CONFIG_SEP_COMP_DRL
 // Get the index of the precision
 // this index is signalled when precision is not same as the most probable
 // precision
@@ -874,7 +1314,6 @@
 // check if pb_mv_precision is allowed or not
 int is_pb_mv_precision_active(const AV1_COMMON *const cm,
                               const MB_MODE_INFO *mbmi, const BLOCK_SIZE bsize);
-
 #endif
 
 #if CONFIG_WARPMV
@@ -888,17 +1327,31 @@
 static INLINE int is_warpmv_mode_allowed(const AV1_COMMON *const cm,
                                          const MB_MODE_INFO *mbmi,
                                          BLOCK_SIZE bsize) {
-  if (has_second_ref(mbmi) || !cm->features.enabled_motion_modes
+  int frame_warp_delta_allowed =
+      (cm->features.enabled_motion_modes & (1 << WARP_DELTA)) != 0;
+
+  if (has_second_ref(mbmi) || !frame_warp_delta_allowed
 #if CONFIG_TIP
       || is_tip_ref_frame(mbmi->ref_frame[0])
 #endif  // CONFIG_TIP
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      || !cm->features.allow_warpmv_mode
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
   )
     return 0;
 
-  int frame_warp_delta_allowed =
-      cm->features.enabled_motion_modes & (1 << WARP_DELTA);
   return frame_warp_delta_allowed && is_warpmv_allowed_bsize(bsize);
 }
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
+// check if warpmv with mvd is allowed or not
+static INLINE int allow_warpmv_with_mvd_coding(const AV1_COMMON *const cm,
+                                               const MB_MODE_INFO *mbmi) {
+  if (!cm->features.allow_warpmv_mode) return 0;
+  return (mbmi->mode == WARPMV && mbmi->warp_ref_idx < 2);
+}
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
 #endif  // CONFIG_WARPMV
 
 #ifdef __cplusplus
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c
index 6f4b4b0..36206f3 100644
--- a/av1/common/reconintra.c
+++ b/av1/common/reconintra.c
@@ -56,9 +56,20 @@
 #if CONFIG_ORIP
       | NEED_ABOVELEFT
 #endif
-  ,                                         // SMOOTH
-  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_V
-  NEED_LEFT | NEED_ABOVE,                   // SMOOTH_H
+#if CONFIG_BLEND_MODE
+      | NEED_ABOVERIGHT | NEED_BOTTOMLEFT
+#endif  // CONFIG_BLEND_MODE
+  ,     // SMOOTH
+  NEED_LEFT | NEED_ABOVE
+#if CONFIG_BLEND_MODE
+      | NEED_BOTTOMLEFT
+#endif  // CONFIG_BLEND_MODE
+  ,     // SMOOTH_V
+  NEED_LEFT | NEED_ABOVE
+#if CONFIG_BLEND_MODE
+      | NEED_ABOVERIGHT
+#endif                                      // CONFIG_BLEND_MODE
+  ,                                         // SMOOTH_H
   NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT,  // PAETH
 };
 
@@ -793,6 +804,210 @@
   }
 }
 
+#if CONFIG_IDIF
+// Directional prediction, zone 1: 0 < angle < 90 using IDIF
+void av1_highbd_dr_prediction_z1_idif_c(uint16_t *dst, ptrdiff_t stride, int bw,
+                                        int bh, const uint16_t *above,
+                                        const uint16_t *left, int dx, int dy,
+                                        int bd, int mrl_index) {
+  int r, c, x, base, shift, val;
+
+  uint16_t ref[4] = { 0 };
+
+  (void)left;
+  (void)dy;
+  (void)bd;
+  assert(dy == 1);
+  assert(dx > 0);
+
+  const int max_base_x = (bw + bh) - 1 + (mrl_index << 1);
+  const int frac_bits = 6;
+  const int base_inc = 1;
+
+  x = dx * (1 + mrl_index);
+  for (r = 0; r < bh; ++r, dst += stride, x += dx) {
+    base = x >> frac_bits;
+    shift = (x & 0x3F) >> 1;
+
+    if (base >= max_base_x) {
+      for (int i = r; i < bh; ++i) {
+        aom_memset16(dst, above[max_base_x], bw);
+        dst += stride;
+      }
+      return;
+    }
+
+    for (c = 0; c < bw; ++c, base += base_inc) {
+      if (base < max_base_x) {
+        // 4-tap filter
+        ref[0] = above[base - 1];
+        ref[1] = above[base];
+        ref[2] = above[base + 1];
+        ref[3] = above[base + 2];
+
+        val = av1_dr_interp_filter[shift][0] * ref[0] +
+              av1_dr_interp_filter[shift][1] * ref[1] +
+              av1_dr_interp_filter[shift][2] * ref[2] +
+              av1_dr_interp_filter[shift][3] * ref[3];
+
+        dst[c] = clip_pixel_highbd(
+            ROUND_POWER_OF_TWO(val, POWER_DR_INTERP_FILTER), bd);
+      } else {
+        dst[c] = above[max_base_x];
+      }
+    }
+  }
+}
+
+// Directional prediction, zone 2: 90 < angle < 180 using IDIF
+void av1_highbd_dr_prediction_z2_idif_c(uint16_t *dst, ptrdiff_t stride, int bw,
+                                        int bh, const uint16_t *above,
+                                        const uint16_t *left, int dx, int dy,
+                                        int bd, int mrl_index) {
+  (void)bd;
+  assert(dx > 0);
+  assert(dy > 0);
+
+  const int min_base_x = -1 - mrl_index;
+  const int min_base_y = -1 - mrl_index;
+
+  (void)min_base_y;
+  const int frac_bits_x = 6;
+  const int frac_bits_y = 6;
+
+  uint16_t ref[4] = { 0 };
+
+  for (int r = 0; r < bh; ++r) {
+    for (int c = 0; c < bw; ++c) {
+      int val;
+      int y = r + 1;
+      int x = (c << 6) - (y + mrl_index) * dx;
+      const int base_x = x >> frac_bits_x;
+      if (base_x >= min_base_x) {
+        const int shift = (x & 0x3F) >> 1;
+        // 4-tap filter
+        ref[0] = above[base_x - 1];
+        ref[1] = above[base_x];
+        ref[2] = above[base_x + 1];
+        ref[3] = above[base_x + 2];
+
+        val = av1_dr_interp_filter[shift][0] * ref[0] +
+              av1_dr_interp_filter[shift][1] * ref[1] +
+              av1_dr_interp_filter[shift][2] * ref[2] +
+              av1_dr_interp_filter[shift][3] * ref[3];
+
+        val = clip_pixel_highbd(ROUND_POWER_OF_TWO(val, POWER_DR_INTERP_FILTER),
+                                bd);
+      } else {
+        x = c + 1;
+        y = (r << 6) - (x + mrl_index) * dy;
+        const int base_y = y >> frac_bits_y;
+        assert(base_y >= min_base_y);
+        const int shift = (y & 0x3F) >> 1;
+        // 4-tap filter
+        ref[0] = left[base_y - 1];
+        ref[1] = left[base_y];
+        ref[2] = left[base_y + 1];
+        ref[3] = left[base_y + 2];
+
+        val = av1_dr_interp_filter[shift][0] * ref[0] +
+              av1_dr_interp_filter[shift][1] * ref[1] +
+              av1_dr_interp_filter[shift][2] * ref[2] +
+              av1_dr_interp_filter[shift][3] * ref[3];
+
+        val = clip_pixel_highbd(ROUND_POWER_OF_TWO(val, POWER_DR_INTERP_FILTER),
+                                bd);
+      }
+      dst[c] = val;
+    }
+    dst += stride;
+  }
+}
+
+// Directional prediction, zone 3: 180 < angle < 270 using IDIF
+void av1_highbd_dr_prediction_z3_idif_c(uint16_t *dst, ptrdiff_t stride, int bw,
+                                        int bh, const uint16_t *above,
+                                        const uint16_t *left, int dx, int dy,
+                                        int bd, int mrl_index) {
+  int r, c, y, base, shift, val;
+
+  (void)above;
+  (void)dx;
+  (void)bd;
+  assert(dx == 1);
+  assert(dy > 0);
+
+  uint16_t ref[4] = { 0 };
+
+  const int max_base_y = (bw + bh) - 1 + (mrl_index << 1);
+  const int frac_bits = 6;
+  const int base_inc = 1;
+
+  y = dy * (1 + mrl_index);
+  for (c = 0; c < bw; ++c, y += dy) {
+    base = y >> frac_bits;
+    shift = (y & 0x3F) >> 1;
+
+    for (r = 0; r < bh; ++r, base += base_inc) {
+      if (base < max_base_y) {
+        // 4-tap filter
+        ref[0] = left[base - 1];
+        ref[1] = left[base];
+        ref[2] = left[base + 1];
+        ref[3] = left[base + 2];
+
+        val = av1_dr_interp_filter[shift][0] * ref[0] +
+              av1_dr_interp_filter[shift][1] * ref[1] +
+              av1_dr_interp_filter[shift][2] * ref[2] +
+              av1_dr_interp_filter[shift][3] * ref[3];
+
+        dst[r * stride + c] = clip_pixel_highbd(
+            ROUND_POWER_OF_TWO(val, POWER_DR_INTERP_FILTER), bd);
+      } else {
+        for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y];
+        break;
+      }
+    }
+  }
+}
+
+static void highbd_dr_predictor_idif(uint16_t *dst, ptrdiff_t stride,
+                                     TX_SIZE tx_size, uint16_t *above,
+                                     uint16_t *left, int angle, int bd,
+                                     int mrl_index) {
+  const int dx = av1_get_dx(angle);
+  const int dy = av1_get_dy(angle);
+  const int bw = tx_size_wide[tx_size];
+  const int bh = tx_size_high[tx_size];
+  assert(angle > 0 && angle < 270);
+
+  const int min_base = -((1 + mrl_index));
+  const int max_base = ((bw + bh) - 1 + (mrl_index << 1));
+
+  if (angle > 0 && angle < 90) {
+    above[max_base + 1] = above[max_base];
+    av1_highbd_dr_prediction_z1_idif(dst, stride, bw, bh, above, left, dx, dy,
+                                     bd, mrl_index);
+
+  } else if (angle > 90 && angle < 180) {
+    above[min_base - 1] = above[min_base];
+    left[min_base - 1] = left[min_base];
+    av1_highbd_dr_prediction_z2_idif(dst, stride, bw, bh, above, left, dx, dy,
+                                     bd, mrl_index);
+
+  } else if (angle > 180 && angle < 270) {
+    left[max_base + 1] = left[max_base];
+    av1_highbd_dr_prediction_z3_idif(dst, stride, bw, bh, above, left, dx, dy,
+                                     bd, mrl_index);
+
+  } else if (angle == 90) {
+    pred_high[V_PRED][tx_size](dst, stride, above, left, bd);
+  } else if (angle == 180) {
+    pred_high[H_PRED][tx_size](dst, stride, above, left, bd);
+  }
+}
+#endif  // CONFIG_IDIF
+
 static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride,
                                 TX_SIZE tx_size, const uint16_t *above,
                                 const uint16_t *left, int upsample_above,
@@ -830,18 +1045,60 @@
   const int bh = tx_size_high[tx_size];
 
   if (angle > 0 && angle < 90) {
+#if CONFIG_EXT_DIR
+    int dy = dr_intra_derivative[90 - angle];
+#else
     int dy = second_dr_intra_derivative[angle];
+#endif  // CONFIG_EXT_DIR
     int dx = 1;
     av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left,
                                 dx, dy, bd, 0);
   } else if (angle > 180 && angle < 270) {
+#if CONFIG_EXT_DIR
+    int dx = dr_intra_derivative[angle - 180];
+#else
     int dx = second_dr_intra_derivative[270 - angle];
+#endif  // CONFIG_EXT_DIR
     int dy = 1;
     av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left,
                                 upsample_above, dx, dy, bd, 0);
   }
 }
 
+#if CONFIG_IDIF
+// Generate the second directional predictor for IBP
+static void highbd_second_dr_predictor_idif(uint16_t *dst, ptrdiff_t stride,
+                                            TX_SIZE tx_size, uint16_t *above,
+                                            uint16_t *left, int angle, int bd) {
+  const int bw = tx_size_wide[tx_size];
+  const int bh = tx_size_high[tx_size];
+
+  const int max_base = ((bw + bh) - 1);
+
+  if (angle > 0 && angle < 90) {
+#if CONFIG_EXT_DIR
+    int dy = dr_intra_derivative[90 - angle];
+#else
+    int dy = second_dr_intra_derivative[angle];
+#endif  // CONFIG_EXT_DIR
+    int dx = 1;
+    left[max_base + 1] = left[max_base];
+    av1_highbd_dr_prediction_z3_idif(dst, stride, bw, bh, above, left, dx, dy,
+                                     bd, 0);
+  } else if (angle > 180 && angle < 270) {
+#if CONFIG_EXT_DIR
+    int dx = dr_intra_derivative[angle - 180];
+#else
+    int dx = second_dr_intra_derivative[270 - angle];
+#endif  // CONFIG_EXT_DIR
+    int dy = 1;
+    above[max_base + 1] = above[max_base];
+    av1_highbd_dr_prediction_z1_idif(dst, stride, bw, bh, above, left, dx, dy,
+                                     bd, 0);
+  }
+}
+#endif  // CONFIG_IDIF
+
 DECLARE_ALIGNED(16, const int8_t,
                 av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = {
   {
@@ -1174,7 +1431,12 @@
 #endif
     ,
     const int seq_ibp_flag,
-    uint8_t *const ibp_weights[TX_SIZES_ALL][DIR_MODES_0_90]) {
+    uint8_t *const ibp_weights[TX_SIZES_ALL][DIR_MODES_0_90]
+#if CONFIG_IDIF
+    ,
+    const int enable_idif
+#endif  // CONFIG_IDIF
+) {
   int i;
   DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
   DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]);
@@ -1219,6 +1481,11 @@
 
   if (is_dr_mode) {
     p_angle = mode_to_angle_map[mode] + angle_delta;
+#if CONFIG_EXT_DIR
+    const int mrl_index_to_delta[4] = { 0, 1, -1, 0 };
+    p_angle += mrl_index_to_delta[mrl_index];
+    assert(p_angle > 0 && p_angle < 270);
+#endif  // CONFIG_EXT_DIR
     if (p_angle <= 90)
       need_above = 1, need_left = 0, need_above_left = 1;
     else if (p_angle < 180)
@@ -1266,9 +1533,16 @@
     if (is_dr_mode)
       need_bottom =
           seq_ibp_flag ? (p_angle < 90) || (p_angle > 180) : p_angle > 180;
-
+#if CONFIG_IDIF
+    int num_left_pixels_needed =
+        txhpx + (need_bottom ? txwpx : 3) + (mrl_index << 1) + 1;
+    if (enable_idif && (p_angle > 90 && p_angle < 180)) {
+      num_left_pixels_needed += 1;
+    }
+#else
     const int num_left_pixels_needed =
         txhpx + (need_bottom ? txwpx : 3) + (mrl_index << 1);
+#endif  // CONFIG_IDIF
     i = 0;
     if (n_left_px > 0) {
       for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride];
@@ -1291,9 +1565,16 @@
     if (is_dr_mode)
       need_right =
           seq_ibp_flag ? (p_angle < 90) || (p_angle > 180) : p_angle < 90;
-
+#if CONFIG_IDIF
+    int num_top_pixels_needed =
+        txwpx + (need_right ? txhpx : 0) + (mrl_index << 1);
+    if (enable_idif && (p_angle > 90 && p_angle < 180)) {
+      num_top_pixels_needed += 1;
+    }
+#else
     const int num_top_pixels_needed =
         txwpx + (need_right ? txhpx : 0) + (mrl_index << 1);
+#endif  // CONFIG_IDIF
     if (n_top_px > 0) {
       memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0]));
       i = n_top_px;
@@ -1377,31 +1658,63 @@
           av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
         }
       }
-      upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, angle_above,
-                                                   filt_type_above);
-      if (need_above && upsample_above) {
-        const int n_px = txwpx + (need_right ? txhpx : 0);
-        av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
+#if CONFIG_IDIF
+      if (!enable_idif) {
+#endif  // CONFIG_IDIF
+        upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, angle_above,
+                                                     filt_type_above);
+        if (need_above && upsample_above) {
+          const int n_px = txwpx + (need_right ? txhpx : 0);
+          av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
+        }
+        upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, angle_left,
+                                                    filt_type_left);
+        if (need_left && upsample_left) {
+          const int n_px = txhpx + (need_bottom ? txwpx : 0);
+          av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
+        }
+#if CONFIG_IDIF
       }
-      upsample_left =
-          av1_use_intra_edge_upsample(txhpx, txwpx, angle_left, filt_type_left);
-      if (need_left && upsample_left) {
-        const int n_px = txhpx + (need_bottom ? txwpx : 0);
-        av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
-      }
+#endif  // CONFIG_IDIF
     }
+#if CONFIG_IDIF
+    if (enable_idif) {
+      highbd_dr_predictor_idif(dst, dst_stride, tx_size, above_row, left_col,
+                               p_angle, xd->bd, mrl_index);
+    } else {
+      highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
+                          upsample_above, upsample_left, p_angle, xd->bd,
+                          mrl_index);
+    }
+#else
     highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
                         upsample_above, upsample_left, p_angle, xd->bd,
                         mrl_index);
-
+#endif  // CONFIG_IDIF
     if (seq_ibp_flag) {
-      if (mrl_index == 0) {
+      if (mrl_index == 0
+#if CONFIG_IMPROVED_ANGULAR_INTRA
+          && (angle_delta % 2 == 0)
+#endif  // CONFIG_IMPROVED_ANGULAR_INTRA
+      ) {
         if (p_angle > 0 && p_angle < 90) {
           int mode_index = angle_to_mode_index[p_angle];
           uint8_t *weights = ibp_weights[tx_size][mode_index];
+#if CONFIG_IDIF
+          if (enable_idif) {
+            highbd_second_dr_predictor_idif(second_pred, txwpx, tx_size,
+                                            above_row, left_col, p_angle,
+                                            xd->bd);
+          } else {
+            highbd_second_dr_predictor(second_pred, txwpx, tx_size, above_row,
+                                       left_col, upsample_above, upsample_left,
+                                       p_angle, xd->bd);
+          }
+#else
           highbd_second_dr_predictor(second_pred, txwpx, tx_size, above_row,
                                      left_col, upsample_above, upsample_left,
                                      p_angle, xd->bd);
+#endif  // CONFIG_IDIF
           av1_highbd_ibp_dr_prediction_z1_c(weights, dst, dst_stride,
                                             second_pred, txwpx, txwpx, txhpx);
         }
@@ -1409,9 +1722,21 @@
           int mode_index = angle_to_mode_index[270 - p_angle];
           int transpose_tsize = transpose_tx_size[tx_size];
           uint8_t *weights = ibp_weights[transpose_tsize][mode_index];
+#if CONFIG_IDIF
+          if (enable_idif) {
+            highbd_second_dr_predictor_idif(second_pred, txwpx, tx_size,
+                                            above_row, left_col, p_angle,
+                                            xd->bd);
+          } else {
+            highbd_second_dr_predictor(second_pred, txwpx, tx_size, above_row,
+                                       left_col, upsample_above, upsample_left,
+                                       p_angle, xd->bd);
+          }
+#else
           highbd_second_dr_predictor(second_pred, txwpx, tx_size, above_row,
                                      left_col, upsample_above, upsample_left,
                                      p_angle, xd->bd);
+#endif  // CONFIG_IDIF
           av1_highbd_ibp_dr_prediction_z3_c(weights, dst, dst_stride,
                                             second_pred, txwpx, txwpx, txhpx);
         }
@@ -1551,6 +1876,9 @@
       row_off, col_off, ss_x, ss_y, yd, &px_bottom_left, bsize != init_bsize);
 
   const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
+#if CONFIG_IDIF
+  const int enable_idif = cm->seq_params.enable_idif;
+#endif  // CONFIG_IDIF
 
   const int is_sb_boundary =
       (mi_row % cm->mib_size == 0 && row_off == 0) ? 1 : 0;
@@ -1571,7 +1899,12 @@
       cm->seq_params.enable_orip
 #endif
       ,
-      cm->seq_params.enable_ibp, cm->ibp_directional_weights);
+      cm->seq_params.enable_ibp, cm->ibp_directional_weights
+#if CONFIG_IDIF
+      ,
+      enable_idif
+#endif  // CONFIG_IDIF
+  );
   return;
 }
 
@@ -1636,11 +1969,7 @@
           av1_get_max_uv_txsize(mbmi->sb_type[PLANE_TYPE_UV], 0, 0);
 #if CONFIG_ADAPTIVE_DS_FILTER
       cfl_store_tx(xd, blk_row, blk_col, luma_tx_size,
-#if DS_FRAME_LEVEL
-                   cm->features.ds_filter_type);
-#else
                    cm->seq_params.enable_cfl_ds_filter);
-#endif  // DS_FRAME_LEVEL
 #else
       cfl_store_tx(xd, blk_row, blk_col, luma_tx_size);
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h
index efd6533..0df6103 100644
--- a/av1/common/reconintra.h
+++ b/av1/common/reconintra.h
@@ -14,6 +14,7 @@
 #define AOM_AV1_COMMON_RECONINTRA_H_
 
 #include <stdlib.h>
+#include <math.h>
 
 #include "aom/aom_integer.h"
 #include "av1/common/av1_common_int.h"
@@ -124,7 +125,9 @@
                                 PLANE_TYPE plane_type, TX_TYPE tx_type,
                                 int is_inter) {
   bool allow_fsc = cm->seq_params.enable_fsc &&
+#if !CONFIG_ATC_DCTX_ALIGNED
                    cm->features.allow_screen_content_tools &&
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
                    plane_type == PLANE_TYPE_Y && is_inter && tx_type == IDTX;
   return allow_fsc;
 }
@@ -213,6 +216,44 @@
 
 extern const int8_t av1_filter_intra_taps[FILTER_INTRA_MODES][8][8];
 
+#if CONFIG_EXT_DIR
+// moved to av1_common_int.h
+#elif CONFIG_IMPROVED_ANGULAR_INTRA
+static const int16_t dr_intra_derivative[90] = {
+  // Angles are dense around vertical and horizontal directions, and coarse
+  // close to
+  // diagonal directions.
+  //                    Approx angle
+  0,    0, 0,        //
+  2048, 0, 0,        // 3, ...
+  1024, 0, 0,        // 6, ...
+  512,  0, 0, 0, 0,  // 9, ...
+  340,  0, 0,        // 14, ...
+  256,  0, 0,        // 17, ...
+  204,  0, 0,        // 20, ...
+  170,  0, 0,        // 23, ... (113 & 203 are base angles)
+  146,  0, 0,        // 26, ...
+  128,  0, 0,        // 29, ...
+  106,  0, 0, 0,     // 32, ...
+  92,   0, 0,        // 36, ...
+  82,   0, 0,        // 39, ...
+  72,   0, 0,        // 42, ...
+  64,   0, 0,        // 45, ... (45 & 135 are base angles)
+  56,   0, 0,        // 48, ...
+  50,   0, 0,        // 51, ...
+  44,   0, 0, 0,     // 54, ...
+  38,   0, 0,        // 58, ...
+  32,   0, 0,        // 61, ...
+  28,   0, 0,        // 64, ...
+  24,   0, 0,        // 67, ... (67 & 157 are base angles)
+  20,   0, 0,        // 70, ...
+  16,   0, 0,        // 73, ...
+  12,   0, 0, 0, 0,  // 76, ...
+  8,    0, 0,        // 81, ...
+  4,    0, 0,        // 84, ...
+  2,    0, 0,        // 87, ...
+};
+#else
 static const int16_t dr_intra_derivative[90] = {
   // More evenly spread out angles and limited to 10-bit
   // Values that are 0 will never be used
@@ -246,6 +287,7 @@
   7,    0, 0,        // 84, ...
   3,    0, 0,        // 87, ...
 };
+#endif  // CONFIG_EXT_DIR
 
 // Get the shift (up-scaled by 256) in X w.r.t a unit change in Y.
 // If angle > 0 && angle < 90, dx = -((int)(256 / t));
@@ -301,6 +343,24 @@
 }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
+#if CONFIG_IDIF
+#define POWER_DR_INTERP_FILTER 7
+
+DECLARE_ALIGNED(16, static const int16_t, av1_dr_interp_filter[32][4]) = {
+  { 0, 128, 0, 0 },     { -2, 127, 4, -1 },   { -3, 125, 8, -2 },
+  { -5, 123, 13, -3 },  { -6, 121, 17, -4 },  { -7, 118, 22, -5 },
+  { -9, 116, 27, -6 },  { -9, 112, 32, -7 },  { -10, 109, 37, -8 },
+  { -11, 106, 41, -8 }, { -11, 102, 46, -9 }, { -12, 98, 52, -10 },
+  { -12, 94, 56, -10 }, { -12, 90, 61, -11 }, { -12, 85, 66, -11 },
+  { -12, 81, 71, -12 }, { -12, 76, 76, -12 }, { -12, 71, 81, -12 },
+  { -11, 66, 85, -12 }, { -11, 61, 90, -12 }, { -10, 56, 94, -12 },
+  { -10, 52, 98, -12 }, { -9, 46, 102, -11 }, { -8, 41, 106, -11 },
+  { -8, 37, 109, -10 }, { -7, 32, 112, -9 },  { -6, 27, 116, -9 },
+  { -5, 22, 118, -7 },  { -4, 17, 121, -6 },  { -3, 13, 123, -5 },
+  { -2, 8, 125, -3 },   { -1, 4, 127, -2 }
+};
+#endif  // CONFIG_IDIF
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/common/restoration.c b/av1/common/restoration.c
index 219b507..c307424 100644
--- a/av1/common/restoration.c
+++ b/av1/common/restoration.c
@@ -91,6 +91,15 @@
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
 };
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+// Filter configuration of cross component weiner filter
+const int wienerns_config_uv_from_y_cross[][3] = {
+  { 1, 0, 0 }, { -1, 0, 0 },  { 0, 1, 1 },  { 0, -1, 1 },
+  { 1, 1, 2 }, { -1, -1, 2 }, { -1, 1, 3 }, { 1, -1, 3 },
+  { 2, 0, 4 }, { -2, 0, 4 },  { 0, 2, 5 },  { 0, -2, 5 },
+};
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
 #define WIENERNS_PREC_BITS_Y 7
 const int wienerns_coeff_y[][WIENERNS_COEFCFG_LEN] = {
 #if ENABLE_LR_4PART_CODE
@@ -163,14 +172,43 @@
 #endif  // ENABLE_LR_4PART_CODE
 };
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+const int wienerns_coeff_uv_from_y[][WIENERNS_COEFCFG_LEN] = {
+#if ENABLE_LR_4PART_CODE
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 5, -12, 0),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 5, -12, 0),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -7, 1),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -7, 1),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -8, 1),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -8, 1),
+#else
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 5, -12, 3),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 5, -12, 3),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -7, 3),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -7, 3),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -8, 3),
+  AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -8, 3),
+#endif  // ENABLE_LR_4PART_CODE
+};
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
 const WienernsFilterParameters wienerns_filter_y = AOM_MAKE_WIENERNS_CONFIG(
     WIENERNS_PREC_BITS_Y, wienerns_config_y, wienerns_coeff_y);
 const WienernsFilterParameters wienerns_filter_uv =
     AOM_MAKE_WIENERNS_CONFIG2(WIENERNS_PREC_BITS_UV, wienerns_config_uv_from_uv,
                               wienerns_config_uv_from_y, wienerns_coeff_uv);
-
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+const WienernsFilterParameters wienerns_cross_filter_uv =
+    AOM_MAKE_WIENERNS_CONFIG(WIENERNS_PREC_BITS_UV,
+                             wienerns_config_uv_from_y_cross,
+                             wienerns_coeff_uv_from_y);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 const WienernsFilterPairParameters wienerns_filters_midqp = {
   &wienerns_filter_y, &wienerns_filter_uv
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  ,
+  &wienerns_cross_filter_uv
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 };
 
 // Configs for the first set of filters for the case without subtract center.
@@ -263,6 +301,10 @@
 
 const WienernsFilterPairParameters wienerns_filters_highqp = {
   &wienerns_filter_y2, &wienerns_filter_uv
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  ,
+  &wienerns_cross_filter_uv
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 };
 
 ///////////////////////////////////////////////////////////////////////////
@@ -343,6 +385,10 @@
 
 const WienernsFilterPairParameters wienerns_filters_lowqp = {
   &wienerns_filter_y3, &wienerns_filter_uv
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  ,
+  &wienerns_cross_filter_uv
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 };
 
 #endif  // CONFIG_WIENER_NONSEP
@@ -496,6 +542,36 @@
 #endif
 }
 
+#if CONFIG_FLEXIBLE_RU_SIZE
+// set up the Minimum and maximum RU size for enacoder search
+// As normative regulation:
+// minimum RU size is equal to RESTORATION_UNITSIZE_MAX >> 2,
+// maximum RU size is equal to RESTORATION_UNITSIZE_MAX
+// The setting here is also for encoder search.
+void set_restoration_unit_size(int width, int height, int sx, int sy,
+                               RestorationInfo *rst) {
+  int s = AOMMIN(sx, sy);
+
+  rst[0].max_restoration_unit_size = RESTORATION_UNITSIZE_MAX >> 0;
+  rst[0].min_restoration_unit_size = RESTORATION_UNITSIZE_MAX >> 2;
+
+  // For large resolution, the minimum RU size is set to
+  // RESTORATION_UNITSIZE_MAX >> 1 to reduce the encode complexity.
+  if (width * height > 1920 * 1080 * 2)
+    rst[0].min_restoration_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
+
+  rst[1].max_restoration_unit_size = rst[0].max_restoration_unit_size >> s;
+  rst[1].min_restoration_unit_size = rst[0].min_restoration_unit_size >> s;
+
+  rst[2].max_restoration_unit_size = rst[1].max_restoration_unit_size;
+  rst[2].min_restoration_unit_size = rst[1].min_restoration_unit_size;
+
+  rst[0].restoration_unit_size = rst[0].min_restoration_unit_size;
+  rst[1].restoration_unit_size = rst[1].min_restoration_unit_size;
+  rst[2].restoration_unit_size = rst[2].min_restoration_unit_size;
+}
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
+
 static void extend_frame_highbd(uint16_t *data, int width, int height,
                                 int stride, int border_horz, int border_vert) {
   uint16_t *data_p;
@@ -527,8 +603,12 @@
   extend_frame_highbd(data, width, height, stride, border_horz, border_vert);
 }
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+void copy_tile(int width, int height, const uint16_t *src,
+#else
 static void copy_tile(int width, int height, const uint16_t *src,
-                      int src_stride, uint16_t *dst, int dst_stride) {
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+               int src_stride, uint16_t *dst, int dst_stride) {
   copy_tile_highbd(width, height, src, src_stride, dst, dst_stride);
 }
 
@@ -1822,6 +1902,7 @@
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
 
   const int block_size = 4;
+
   for (int r = 0; r < height; r += block_size) {
     const int h = AOMMIN(block_size, height - r);
     const uint16_t *dgd_row = dgd + r * stride;
@@ -1852,7 +1933,11 @@
 
   int is_uv = rui->plane != AOM_PLANE_Y;
   const NonsepFilterConfig *orig_config =
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      get_wienerns_config(rui->base_qindex, is_uv, 0);
+#else
       get_wienerns_config(rui->base_qindex, is_uv);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #if ADD_CENTER_TAP_TO_WIENERNS
   NonsepFilterConfig adjusted_config;
   WienerNonsepInfo adjusted_info;
@@ -1890,12 +1975,111 @@
   }
 }
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+// Convolving process of cross-component wiener filter for a 4x4 unit
+void av1_convolve_nonsep_cross_highbd_c(const uint16_t *dgd, int width,
+                                        int height, int stride,
+                                        const uint16_t *dgd2, int stride2,
+                                        const NonsepFilterConfig *config,
+                                        const int16_t *filter, uint16_t *dst,
+                                        int dst_stride, int bit_depth) {
+  (void)dgd;
+  (void)stride;
+  for (int i = 0; i < height; ++i) {
+    for (int j = 0; j < width; ++j) {
+      const int dgd2_id = i * stride2 + j;
+      const int dst_id = i * dst_stride + j;
+      int32_t tmp = (int32_t)dst[dst_id] * (1 << config->prec_bits);
+
+      for (int k = 0; k < config->num_pixels; ++k) {
+        const int pos = config->config[k][NONSEP_BUF_POS];
+        const int r = config->config[k][NONSEP_ROW_ID];
+        const int c = config->config[k][NONSEP_COL_ID];
+        const int ir = config->strict_bounds
+                           ? AOMMAX(AOMMIN(i + r, height - 1), 0)
+                           : i + r;
+        const int jc =
+            config->strict_bounds ? AOMMAX(AOMMIN(j + c, width - 1), 0) : j + c;
+        int16_t diff =
+            clip_base(dgd2[(ir)*stride2 + (jc)] - dgd2[dgd2_id], bit_depth);
+        diff = k % 2 ? -diff : diff;
+        tmp += filter[pos] * diff;
+      }
+      tmp = ROUND_POWER_OF_TWO_SIGNED(tmp, config->prec_bits);
+      dst[dst_id] = clip_pixel_highbd(tmp, bit_depth);
+    }
+  }
+}
+
+// Cross-component wiener filtering for a process unit
+void apply_cross_wienerns_class_id_highbd(
+    const uint16_t *dgd, int width, int height, int stride,
+    const WienerNonsepInfo *wienerns_info,
+    const NonsepFilterConfig *nsfilter_config, uint16_t *dst, int dst_stride,
+    int plane, const uint16_t *luma, int luma_stride, int bit_depth) {
+  assert(plane != AOM_PLANE_Y);
+  (void)plane;
+  assert(nsfilter_config->num_pixels2 == 0);
+  assert(wienerns_info->num_classes == 1);
+
+  const int16_t *filter = const_nsfilter_taps(wienerns_info, 0);
+
+  const int block_size = 4;
+  for (int r = 0; r < height; r += block_size) {
+    const int h = AOMMIN(block_size, height - r);
+    const uint16_t *dgd_row = dgd + r * stride;
+    const uint16_t *luma_row = luma + r * luma_stride;
+    uint16_t *dst_row = dst + r * dst_stride;
+
+    for (int c = 0; c < width; c += block_size) {
+      const int w = AOMMIN(block_size, width - c);
+      av1_convolve_nonsep_cross_highbd_c(
+          dgd_row + c, w, h, stride, luma_row + c, luma_stride, nsfilter_config,
+          filter, dst_row + c, dst_stride, bit_depth);
+    }
+  }
+}
+
+// Cross-component wiener filtering for  a stripe of process units
+static void wiener_ns_cross_filter_stripe_highbd(
+    const RestorationUnitInfo *rui, int stripe_width, int stripe_height,
+    int procunit_width, const uint16_t *src, int src_stride, uint16_t *dst,
+    int dst_stride, int32_t *tmpbuf, int bit_depth) {
+  (void)tmpbuf;
+  (void)bit_depth;
+
+  assert(rui->wienerns_cross_info.num_classes == 1);
+
+  int is_uv = rui->plane != AOM_PLANE_Y;
+
+  assert(is_uv);
+  const NonsepFilterConfig *orig_config =
+      get_wienerns_config(rui->base_qindex, is_uv, 1);
+
+  const NonsepFilterConfig *nsfilter_config = orig_config;
+  const WienerNonsepInfo *nsfilter_info = &rui->wienerns_cross_info;
+
+  for (int j = 0; j < stripe_width; j += procunit_width) {
+    int w = AOMMIN(procunit_width, stripe_width - j);
+    apply_cross_wienerns_class_id_highbd(
+        src + j, w, stripe_height, src_stride, nsfilter_info, nsfilter_config,
+        dst + j, dst_stride, rui->plane, rui->luma + j, rui->luma_stride,
+        bit_depth);
+  }
+}
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
 #if CONFIG_WIENER_NONSEP_CROSS_FILT
 uint16_t *wienerns_copy_luma_highbd(const uint16_t *dgd, int height_y,
                                     int width_y, int in_stride,
                                     uint16_t **luma_hbd, int height_uv,
                                     int width_uv, int border, int out_stride,
-                                    int bd) {
+                                    int bd
+#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2
+                                    ,
+                                    int ds_type
+#endif
+) {
   (void)bd;
   uint16_t *aug_luma = (uint16_t *)malloc(
       sizeof(uint16_t) * (width_uv + 2 * border) * (height_uv + 2 * border));
@@ -1973,6 +2157,25 @@
   } else {
     assert(0 && "Invalid dimensions");
   }
+#elif WIENERNS_CROSS_FILT_LUMA_TYPE == 2
+  const int ss_x = (((width_y + 1) >> 1) == width_uv);
+  const int ss_y = (((height_y + 1) >> 1) == height_uv);
+  if (ss_x && ss_y && ds_type == 1) {
+    for (int r = 0; r < height_uv; ++r) {
+      for (int c = 0; c < width_uv; ++c) {
+        (*luma)[r * out_stride + c] = (dgd[2 * r * in_stride + 2 * c] +
+                                       dgd[(2 * r + 1) * in_stride + 2 * c]) /
+                                      2;
+      }
+    }
+  } else {
+    for (int r = 0; r < height_uv; ++r) {
+      for (int c = 0; c < width_uv; ++c) {
+        (*luma)[r * out_stride + c] =
+            dgd[(1 + ss_y) * r * in_stride + (1 + ss_x) * c];
+      }
+    }
+  }
 #else
   av1_highbd_resize_plane(dgd, height_y, width_y, in_stride, *luma, height_uv,
                           width_uv, out_stride, bd);
@@ -2054,16 +2257,18 @@
 #elif CONFIG_PC_WIENER
 #define NUM_STRIPE_FILTERS 3
 
-static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = {
-  wiener_filter_stripe_highbd,
-  sgrproj_filter_stripe_highbd,
-  pc_wiener_stripe_highbd,
-};
+               static const stripe_filter_fun
+                   stripe_filters[NUM_STRIPE_FILTERS] = {
+                     wiener_filter_stripe_highbd,
+                     sgrproj_filter_stripe_highbd,
+                     pc_wiener_stripe_highbd,
+                   };
 #else
 #define NUM_STRIPE_FILTERS 2
-static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = {
-  wiener_filter_stripe_highbd, sgrproj_filter_stripe_highbd
-};
+               static const stripe_filter_fun
+                   stripe_filters[NUM_STRIPE_FILTERS] = {
+                     wiener_filter_stripe_highbd, sgrproj_filter_stripe_highbd
+                   };
 #endif  // CONFIG_WIENER_NONSEP && CONFIG_PC_WIENER
 
 // Filter one restoration unit
@@ -2104,6 +2309,7 @@
   const uint16_t *luma_in_ru = NULL;
   const int enable_cross_buffers =
       unit_rtype == RESTORE_WIENER_NONSEP && rui->plane != AOM_PLANE_Y;
+
   if (enable_cross_buffers)
     luma_in_ru =
         rui->luma + limits->v_start * rui->luma_stride + limits->h_start;
@@ -2195,6 +2401,86 @@
 #endif  // CONFIG_PC_WIENER
 }
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+// Cross-component fFiltering for one restoration unit
+void av1_wiener_ns_cross_filter_unit(
+    const RestorationTileLimits *limits, const RestorationUnitInfo *rui,
+    const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
+    const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y,
+    int bit_depth, uint16_t *data, int stride, uint16_t *dst, int dst_stride,
+    int32_t *tmpbuf, int optimized_lr) {
+  (void)rsb;
+  (void)rlbs;
+  (void)optimized_lr;
+  (void)tile_stripe0;
+
+  RestorationType unit_cross_rtype = rui->cross_restoration_type;
+
+  const int unit_h = limits->v_end - limits->v_start;
+  const int unit_w = limits->h_end - limits->h_start;
+  uint16_t *data_tl = data + limits->v_start * stride + limits->h_start;
+  uint16_t *dst_tl = dst + limits->v_start * dst_stride + limits->h_start;
+
+  if (unit_cross_rtype == RESTORE_NONE) {
+    return;
+  }
+
+  assert(unit_cross_rtype == RESTORE_WIENER_NONSEP);
+
+  const stripe_filter_fun stripe_filter = wiener_ns_cross_filter_stripe_highbd;
+
+  const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x;
+
+  // rui is a pointer to a const but we modify its contents when calling
+  // stripe_filter(). Use a temporary for now and refactor the datastructure
+  // later.
+  RestorationUnitInfo rui_contents = *rui;
+  RestorationUnitInfo *tmp_rui = &rui_contents;
+
+  const uint16_t *luma_in_plane = rui->luma;
+  const uint16_t *luma_in_ru =
+      luma_in_plane + limits->v_start * rui->luma_stride + limits->h_start;
+
+  // Convolve the whole tile one stripe at a time
+  RestorationTileLimits remaining_stripes = *limits;
+  int i = 0;
+  while (i < unit_h) {
+    int copy_above, copy_below;
+    remaining_stripes.v_start = limits->v_start + i;
+
+    get_stripe_boundary_info(&remaining_stripes, tile_rect, ss_y, &copy_above,
+                             &copy_below);
+
+    const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
+    const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y;
+
+    // Work out where this stripe's boundaries are within
+    // rsb->stripe_boundary_{above,below}
+    const int tile_stripe =
+        (remaining_stripes.v_start - tile_rect->top + runit_offset) /
+        full_stripe_height;
+    //    const int frame_stripe = tile_stripe0 + tile_stripe;
+    //    const int rsb_row = RESTORATION_CTX_VERT * frame_stripe;
+
+    // Calculate this stripe's height, based on two rules:
+    // * The topmost stripe in each tile is 8 luma pixels shorter than usual.
+    // * We can't extend past the end of the current restoration unit
+    const int nominal_stripe_height =
+        full_stripe_height - ((tile_stripe == 0) ? runit_offset : 0);
+    const int h = AOMMIN(nominal_stripe_height,
+                         remaining_stripes.v_end - remaining_stripes.v_start);
+
+    tmp_rui->luma = luma_in_ru + i * rui->luma_stride;
+
+    stripe_filter(tmp_rui, unit_w, h, procunit_width, data_tl + i * stride,
+                  stride, dst_tl + i * dst_stride, dst_stride, tmpbuf,
+                  bit_depth);
+
+    i += h;
+  }
+}
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
 static void filter_frame_on_unit(const RestorationTileLimits *limits,
                                  const AV1PixelRect *tile_rect,
                                  int rest_unit_idx, int rest_unit_idx_seq,
@@ -2227,6 +2513,15 @@
       ctxt->tile_stripe0, ctxt->ss_x, ctxt->ss_y, ctxt->bit_depth, ctxt->data8,
       ctxt->data_stride, ctxt->dst8, ctxt->dst_stride, tmpbuf,
       rsi->optimized_lr);
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  const int is_uv = (ctxt->plane != AOM_PLANE_Y);
+  if (is_uv)
+    av1_wiener_ns_cross_filter_unit(
+        limits, &rsi->unit_info[rest_unit_idx], &rsi->boundaries, rlbs,
+        tile_rect, ctxt->tile_stripe0, ctxt->ss_x, ctxt->ss_y, ctxt->bit_depth,
+        ctxt->data8, ctxt->data_stride, ctxt->dst8, ctxt->dst_stride, tmpbuf,
+        rsi->optimized_lr);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 }
 
 void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
@@ -2251,9 +2546,16 @@
   for (int plane = 0; plane < num_planes; ++plane) {
     RestorationInfo *rsi = &cm->rst_info[plane];
     RestorationType rtype = rsi->frame_restoration_type;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    RestorationType cross_rtype = rsi->frame_cross_restoration_type;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
     rsi->optimized_lr = optimized_lr;
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    if (rtype == RESTORE_NONE && cross_rtype == RESTORE_NONE) {
+#else
     if (rtype == RESTORE_NONE) {
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
       continue;
     }
 
@@ -2289,7 +2591,13 @@
                                          aom_yv12_partial_coloc_copy_v };
   assert(num_planes <= 3);
   for (int plane = 0; plane < num_planes; ++plane) {
-    if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
+    if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+        && cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    )
+      continue;
+
     AV1PixelRect tile_rect = loop_rest_ctxt->ctxt[plane].tile_rect;
     copy_funs[plane](loop_rest_ctxt->dst, loop_rest_ctxt->frame, tile_rect.left,
                      tile_rect.right, tile_rect.top, tile_rect.bottom);
@@ -2308,14 +2616,22 @@
       dgd->buffers[AOM_PLANE_Y], dgd->crop_heights[AOM_PLANE_Y],
       dgd->crop_widths[AOM_PLANE_Y], dgd->strides[AOM_PLANE_Y], &luma,
       dgd->crop_heights[1], dgd->crop_widths[1], WIENERNS_UV_BRD, luma_stride,
-      cm->seq_params.bit_depth);
+      cm->seq_params.bit_depth
+#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2
+      ,
+      cm->seq_params.enable_cfl_ds_filter == 1
+#endif
+  );
   assert(luma_buf != NULL);
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
 
   for (int plane = 0; plane < num_planes; ++plane) {
-    if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) {
+    if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+        && cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    )
       continue;
-    }
 
 #if CONFIG_WIENER_NONSEP || CONFIG_PC_WIENER
     ctxt[plane].plane = plane;
diff --git a/av1/common/restoration.h b/av1/common/restoration.h
index 8091597..f66987d 100644
--- a/av1/common/restoration.h
+++ b/av1/common/restoration.h
@@ -82,7 +82,11 @@
    (RESTORATION_PROC_UNIT_SIZE + RESTORATION_BORDER_VERT * 2 + \
     RESTORATION_PADDING))
 
+#if CONFIG_FLEXIBLE_RU_SIZE
+#define RESTORATION_UNITSIZE_MAX 512
+#else
 #define RESTORATION_UNITSIZE_MAX 256
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
 #define RESTORATION_UNITPELS_HORZ_MAX \
   (RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16)
 #define RESTORATION_UNITPELS_VERT_MAX                                \
@@ -203,6 +207,9 @@
 typedef struct {
   const WienernsFilterParameters *y;
   const WienernsFilterParameters *uv;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  const WienernsFilterParameters *uv_cross;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 } WienernsFilterPairParameters;
 
 extern const WienernsFilterPairParameters wienerns_filters_lowqp;
@@ -212,17 +219,33 @@
 #define USE_CENTER_WIENER_NONSEP 0
 
 static INLINE const WienernsFilterParameters *get_wienerns_parameters(
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    int qindex, int is_uv, int is_cross) {
+#else
     int qindex, int is_uv) {
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   const WienernsFilterPairParameters *pair_nsfilter_params = NULL;
   (void)qindex;
   pair_nsfilter_params = &wienerns_filters_midqp;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  if (is_cross) return pair_nsfilter_params->uv_cross;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   return is_uv ? pair_nsfilter_params->uv : pair_nsfilter_params->y;
 }
 
 static INLINE const NonsepFilterConfig *get_wienerns_config(int qindex,
-                                                            int is_uv) {
+                                                            int is_uv
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                                                            ,
+                                                            int is_cross
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+) {
   const WienernsFilterParameters *base_nsfilter_params =
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      get_wienerns_parameters(qindex, is_uv, is_cross);
+#else
       get_wienerns_parameters(qindex, is_uv);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   return &base_nsfilter_params->nsfilter_config;
 }
 #endif  // CONFIG_WIENER_NONSEP
@@ -258,6 +281,11 @@
    */
   RestorationType restoration_type;
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  /*! Cross restoration type*/
+  RestorationType cross_restoration_type;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
   /*!
    * Wiener filter parameters if restoration_type indicates Wiener
    */
@@ -324,6 +352,17 @@
    */
   PcwienerBuffers *pcwiener_buffers;
 #endif  // CONFIG_PC_WIENER
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  /*!
+   * Nonseparable Wiener cross filter information.
+   */
+  WienerNonsepInfo wienerns_cross_info;
+
+  /*!
+   * wienerns cross filter idx of the current RU
+   */
+  int wienerns_cross_filter_idx;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 } RestorationUnitInfo;
 
 /*!\cond */
@@ -382,6 +421,16 @@
    */
   int restoration_unit_size;
 
+#if CONFIG_FLEXIBLE_RU_SIZE
+  /*!
+   * Maximum restoration unit size
+   */
+  int max_restoration_unit_size;
+  /*!
+   * Minimum restoration unit size
+   */
+  int min_restoration_unit_size;
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
   /**
    * \name Fields allocated and initialised by av1_alloc_restoration_struct.
    * (horz_)units_per_tile give the number of restoration units in
@@ -413,6 +462,11 @@
    */
   RestorationUnitInfo *unit_info;
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  /*! Cross restoration type for frame*/
+  RestorationType frame_cross_restoration_type;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
   /*!
    * Restoration Stripe boundary info
    */
@@ -446,24 +500,33 @@
   sgrproj_info->xqd[1] = (SGRPROJ_PRJ_MIN1 + SGRPROJ_PRJ_MAX1) / 2;
 }
 
-static INLINE void set_default_wiener(WienerInfo *wiener_info) {
-  wiener_info->vfilter[0] = wiener_info->hfilter[0] = WIENER_FILT_TAP0_MIDV;
+static INLINE void set_default_wiener(WienerInfo *wiener_info, int chroma) {
+  const int wiener_filt_tap0_midv = chroma ? 0 : WIENER_FILT_TAP0_MIDV;
+  wiener_info->vfilter[0] = wiener_info->hfilter[0] = wiener_filt_tap0_midv;
   wiener_info->vfilter[1] = wiener_info->hfilter[1] = WIENER_FILT_TAP1_MIDV;
   wiener_info->vfilter[2] = wiener_info->hfilter[2] = WIENER_FILT_TAP2_MIDV;
   wiener_info->vfilter[WIENER_HALFWIN] = wiener_info->hfilter[WIENER_HALFWIN] =
       -2 *
-      (WIENER_FILT_TAP2_MIDV + WIENER_FILT_TAP1_MIDV + WIENER_FILT_TAP0_MIDV);
+      (WIENER_FILT_TAP2_MIDV + WIENER_FILT_TAP1_MIDV + wiener_filt_tap0_midv);
   wiener_info->vfilter[4] = wiener_info->hfilter[4] = WIENER_FILT_TAP2_MIDV;
   wiener_info->vfilter[5] = wiener_info->hfilter[5] = WIENER_FILT_TAP1_MIDV;
-  wiener_info->vfilter[6] = wiener_info->hfilter[6] = WIENER_FILT_TAP0_MIDV;
+  wiener_info->vfilter[6] = wiener_info->hfilter[6] = wiener_filt_tap0_midv;
 }
 
 #if CONFIG_WIENER_NONSEP
 static INLINE void set_default_wienerns(WienerNonsepInfo *wienerns_info,
-                                        int qindex, int num_classes,
-                                        int chroma) {
+                                        int qindex, int num_classes, int chroma
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                                        ,
+                                        int is_cross
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+) {
   const WienernsFilterParameters *nsfilter_params =
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      get_wienerns_parameters(qindex, chroma, is_cross);
+#else
       get_wienerns_parameters(qindex, chroma);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   wienerns_info->num_classes = num_classes;
   for (int c_id = 0; c_id < wienerns_info->num_classes; ++c_id) {
 #if CONFIG_LR_MERGE_COEFFS
@@ -482,13 +545,24 @@
 
 // 0: Skip luma pixels to scale down to chroma (simplest)
 // 1: Average 4 or 2 luma pixels to scale down to chroma
-// 2: Use 8-tap downsampling filter
+// 2: Average 2 (top and down) luma pixels to scale down to chroma for 420,
+// could be based on the luma downsampling type from CFL tool 3: Use 8-tap
+// downsampling filter
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+#define WIENERNS_CROSS_FILT_LUMA_TYPE 2
+#else
 #define WIENERNS_CROSS_FILT_LUMA_TYPE 0
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 
 uint16_t *wienerns_copy_luma_highbd(const uint16_t *dgd, int height_y,
                                     int width_y, int in_stride, uint16_t **luma,
                                     int height_uv, int width_uv, int border,
-                                    int out_stride, int bd);
+                                    int out_stride, int bd
+#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2
+                                    ,
+                                    int ds_type
+#endif
+);
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
 
 #endif  // CONFIG_WIENER_NONSEP
@@ -533,6 +607,9 @@
   FilterFrameCtxt ctxt[MAX_MB_PLANE];
   YV12_BUFFER_CONFIG *frame;
   YV12_BUFFER_CONFIG *dst;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  YV12_BUFFER_CONFIG *pre_filter_frame;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 } AV1LrStruct;
 
 extern const sgr_params_type av1_sgr_params[SGRPROJ_PARAMS];
@@ -587,6 +664,18 @@
     int bit_depth, uint16_t *data, int stride, uint16_t *dst, int dst_stride,
     int32_t *tmpbuf, int optimized_lr);
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+/*!\brief Function for applying cross wienerns filter to a single unit.
+ * The inputs are same as those of av1_loop_restoration_filter_unit
+ */
+void av1_wiener_ns_cross_filter_unit(
+    const RestorationTileLimits *limits, const RestorationUnitInfo *rui,
+    const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
+    const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y,
+    int bit_depth, uint16_t *data, int stride, uint16_t *dst, int dst_stride,
+    int32_t *tmpbuf, int optimized_lr);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
 /*!\brief Function for applying loop restoration filter to a frame
  *
  * \ingroup in_loop_restoration
@@ -687,7 +776,15 @@
 void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane);
 void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c,
                              const int sb_cols, int plane);
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+void copy_tile(int width, int height, const uint16_t *src, int src_stride,
+               uint16_t *dst, int dst_stride);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 
+#if CONFIG_FLEXIBLE_RU_SIZE
+void set_restoration_unit_size(int width, int height, int sx, int sy,
+                               RestorationInfo *rst);
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
 /*!\endcond */
 
 #ifdef __cplusplus
diff --git a/av1/common/scan.c b/av1/common/scan.c
index af2d30e..96bae5e 100644
--- a/av1/common/scan.c
+++ b/av1/common/scan.c
@@ -15,7 +15,7 @@
 #include "av1/common/common_data.h"
 #include "av1/common/scan.h"
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = {
   0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15,
 };
@@ -23,7 +23,7 @@
 DECLARE_ALIGNED(16, static const int16_t,
                 default_scan_4x4[16]) = { 0, 1,  4,  8,  5, 2,  3,  6,
                                           9, 12, 13, 10, 7, 11, 14, 15 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4[16]) = {
   0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
@@ -33,7 +33,7 @@
   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = {
   0,  4,  1,  8,  5,  2,  12, 9,  6,  3,  16, 13, 10, 7,  20, 17,
   14, 11, 24, 21, 18, 15, 28, 25, 22, 19, 29, 26, 23, 30, 27, 31,
@@ -43,7 +43,7 @@
   0,  1,  4,  2,  5,  8,  3,  6,  9,  12, 7,  10, 13, 16, 11, 14,
   17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 27, 30, 31,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = {
   0, 4, 8,  12, 16, 20, 24, 28, 1, 5, 9,  13, 17, 21, 25, 29,
@@ -70,7 +70,7 @@
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, default_scan_4x16[64]) = {
   0,  4,  1,  8,  5,  2,  12, 9,  6,  3,  16, 13, 10, 7,  20, 17,
   14, 11, 24, 21, 18, 15, 28, 25, 22, 19, 32, 29, 26, 23, 36, 33,
@@ -84,7 +84,7 @@
   33, 36, 31, 34, 37, 40, 35, 38, 41, 44, 39, 42, 45, 48, 43, 46,
   49, 52, 47, 50, 53, 56, 51, 54, 57, 60, 55, 58, 61, 59, 62, 63,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, default_scan_16x4[64]) = {
   0,  16, 1,  32, 17, 2,  48, 33, 18, 3,  49, 34, 19, 4,  50, 35,
@@ -121,7 +121,7 @@
   12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x32[256]) = {
   0,   8,   1,   16,  9,   2,   24,  17,  10,  3,   32,  25,  18,  11,  4,
   40,  33,  26,  19,  12,  5,   48,  41,  34,  27,  20,  13,  6,   56,  49,
@@ -163,7 +163,7 @@
   250, 223, 230, 237, 244, 251, 231, 238, 245, 252, 239, 246, 253, 247, 254,
   255,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, default_scan_32x8[256]) = {
   0,   32,  1,   64,  33,  2,   96,  65,  34,  3,   128, 97,  66,  35,  4,
@@ -268,7 +268,7 @@
   30, 62, 94, 126, 158, 190, 222, 254, 31, 63, 95, 127, 159, 191, 223, 255,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = {
   0,  8,  1,  16, 9,  2,  24, 17, 10, 3,  32, 25, 18, 11, 4,  40,
   33, 26, 19, 12, 5,  48, 41, 34, 27, 20, 13, 6,  56, 49, 42, 35,
@@ -282,7 +282,7 @@
   35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
   58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x8[64]) = {
   0, 8,  16, 24, 32, 40, 48, 56, 1, 9,  17, 25, 33, 41, 49, 57,
@@ -298,7 +298,7 @@
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, default_scan_8x16[128]) = {
   0,  8,   1,   16,  9,   2,   24,  17,  10,  3,   32,  25,  18,  11,  4,   40,
   33, 26,  19,  12,  5,   48,  41,  34,  27,  20,  13,  6,   56,  49,  42,  35,
@@ -321,7 +321,7 @@
   114, 121, 87,  94,  101, 108, 115, 122, 95,  102, 109, 116, 123, 103, 110,
   117, 124, 111, 118, 125, 119, 126, 127,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, default_scan_16x8[128]) = {
   0,  16,  1,   32, 17,  2,   48,  33,  18, 3,  64,  49,  34,  19,  4,   80,
@@ -380,7 +380,7 @@
   120, 121, 122, 123, 124, 125, 126, 127,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, default_scan_16x32[512]) = {
   0,   16,  1,   32,  17,  2,   48,  33,  18,  3,   64,  49,  34,  19,  4,
   80,  65,  50,  35,  20,  5,   96,  81,  66,  51,  36,  21,  6,   112, 97,
@@ -456,7 +456,7 @@
   491, 506, 447, 462, 477, 492, 507, 463, 478, 493, 508, 479, 494, 509, 495,
   510, 511,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, default_scan_32x16[512]) = {
   0,   32,  1,   64,  33,  2,   96,  65,  34,  3,   128, 97,  66,  35,  4,
@@ -645,7 +645,7 @@
   510, 511,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = {
   0,   16,  1,   32,  17,  2,   48,  33,  18,  3,   64,  49,  34,  19,  4,
   80,  65,  50,  35,  20,  5,   96,  81,  66,  51,  36,  21,  6,   112, 97,
@@ -687,7 +687,7 @@
   250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254,
   255
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16[256]) = {
   0,  16, 32, 48, 64, 80, 96,  112, 128, 144, 160, 176, 192, 208, 224, 240,
@@ -888,7 +888,7 @@
   1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = {
   0,   32,   1,    64,   33,  2,   96,   65,  34,   3,    128,  97,  66,  35,
   4,   160,  129,  98,   67,  36,  5,    192, 161,  130,  99,   68,  37,  6,
@@ -1047,9 +1047,9 @@
   862,  831,  863,  894,  925,  956, 987,  1018, 1019, 988,  957,  926, 895,
   927,  958,  989,  1020, 1021, 990, 959,  991,  1022, 1023
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t,
                 av1_default_iscan_4x4[16]) = { 0, 2, 5,  9,  1, 4,  8,  12,
                                                3, 7, 11, 14, 6, 10, 13, 15 };
@@ -1057,7 +1057,7 @@
 DECLARE_ALIGNED(16, static const int16_t,
                 av1_default_iscan_4x4[16]) = { 0, 1, 5,  6,  2, 4,  7,  12,
                                                3, 8, 11, 13, 9, 10, 14, 15 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x4[16]) = {
   0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
@@ -1067,7 +1067,7 @@
   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x8[32]) = {
   0,  2,  5,  9,  1,  4,  8,  13, 3,  7,  12, 17, 6,  11, 16, 21,
   10, 15, 20, 25, 14, 19, 24, 28, 18, 23, 27, 30, 22, 26, 29, 31
@@ -1077,7 +1077,7 @@
   0,  1,  3,  6,  2,  4,  7,  10, 5,  8,  11, 14, 9,  12, 15, 18,
   13, 16, 19, 22, 17, 20, 23, 26, 21, 24, 27, 29, 25, 28, 30, 31,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x8[32]) = {
   0, 8,  16, 24, 1, 9,  17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
@@ -1104,7 +1104,7 @@
   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x16[64]) = {
   0,  2,  5,  9,  1,  4,  8,  13, 3,  7,  12, 17, 6,  11, 16, 21,
   10, 15, 20, 25, 14, 19, 24, 29, 18, 23, 28, 33, 22, 27, 32, 37,
@@ -1118,7 +1118,7 @@
   29, 32, 35, 38, 33, 36, 39, 42, 37, 40, 43, 46, 41, 44, 47, 50,
   45, 48, 51, 54, 49, 52, 55, 58, 53, 56, 59, 61, 57, 60, 62, 63,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x4[64]) = {
   0, 2,  5,  9,  13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57,
@@ -1155,7 +1155,7 @@
   3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x32[256]) = {
   0,   2,   5,   9,   14,  20,  27,  35,  1,   4,   8,   13,  19,  26,  34,
   43,  3,   7,   12,  18,  25,  33,  42,  51,  6,   11,  17,  24,  32,  41,
@@ -1197,7 +1197,7 @@
   219, 226, 233, 239, 244, 248, 251, 253, 227, 234, 240, 245, 249, 252, 254,
   255,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x8[256]) = {
   0,   2,   5,   9,   14,  20,  27,  35,  43,  51,  59,  67,  75,  83,  91,
@@ -1316,7 +1316,7 @@
   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x8[64]) = {
   0,  2,  5,  9,  14, 20, 27, 35, 1,  4,  8,  13, 19, 26, 34, 42,
   3,  7,  12, 18, 25, 33, 41, 48, 6,  11, 17, 24, 32, 40, 47, 53,
@@ -1330,9 +1330,9 @@
   10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60,
   21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x16[128]) = {
   0,  2,  5,   9,   14,  20,  27,  35,  1,  4,   8,   13,  19,  26,  34,  43,
   3,  7,  12,  18,  25,  33,  42,  51,  6,  11,  17,  24,  32,  41,  50,  59,
@@ -1354,7 +1354,7 @@
   75, 82, 89,  96,  103, 109, 114, 118, 83, 90,  97,  104, 110, 115, 119, 122,
   91, 98, 105, 111, 116, 120, 123, 125, 99, 106, 112, 117, 121, 124, 126, 127,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x8[128]) = {
   0,  2,  5,  9,  14, 20, 27, 35, 43, 51,  59,  67,  75,  83,  91,  99,
@@ -1413,7 +1413,7 @@
   120, 121, 122, 123, 124, 125, 126, 127,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x32[512]) = {
   0,   2,   5,   9,   14,  20,  27,  35,  44,  54,  65,  77,  90,  104, 119,
   135, 1,   4,   8,   13,  19,  26,  34,  43,  53,  64,  76,  89,  103, 118,
@@ -1489,7 +1489,7 @@
   509, 391, 406, 420, 433, 445, 456, 466, 475, 483, 490, 496, 501, 505, 508,
   510, 511,
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x16[512]) = {
   0,   2,   5,   9,   14,  20,  27,  35,  44,  54,  65,  77,  90,  104, 119,
@@ -1718,7 +1718,7 @@
   255,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x16[256]) = {
   0,   2,   5,   9,   14,  20,  27,  35,  44,  54,  65,  77,  90,  104, 119,
   135, 1,   4,   8,   13,  19,  26,  34,  43,  53,  64,  76,  89,  103, 118,
@@ -1760,7 +1760,7 @@
   135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254,
   255
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x32[1024]) = {
   0,   32,   64,  96,   128, 160,  192, 224,  256, 288,  320, 352,  384, 416,
@@ -1921,7 +1921,7 @@
   1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
 };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x32[1024]) = {
   0,    2,    5,    9,    14,   20,   27,   35,   44,   54,   65,   77,   90,
   104,  119,  135,  152,  170,  189,  209,  230,  252,  275,  299,  324,  350,
@@ -2085,7 +2085,7 @@
   748,  792,  793,  833,  834,  870,  871,  903,  904,  932,  933,  957,  958,
   978,  979,  995,  996,  1008, 1009, 1017, 1018, 1022, 1023
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = {
   { default_scan_4x4, av1_default_iscan_4x4 },
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c
index 31e1c30..9be67cb 100644
--- a/av1/common/thread_common.c
+++ b/av1/common/thread_common.c
@@ -708,7 +708,13 @@
   lr_sync->jobs_dequeued = 0;
 
   for (int plane = 0; plane < num_planes; plane++) {
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE &&
+        cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE)
+      continue;
+#else
     if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
     num_even_lr_jobs =
         num_even_lr_jobs + ((ctxt[plane].rsi->vert_units_per_tile + 1) >> 1);
   }
@@ -716,7 +722,12 @@
   lr_job_counter[1] = num_even_lr_jobs;
 
   for (int plane = 0; plane < num_planes; plane++) {
-    if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
+    if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+        && cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    )
+      continue;
     const int is_uv = plane > 0;
     const int ss_y = is_uv && cm->seq_params.subsampling_y;
 
@@ -865,7 +876,12 @@
       dgd->buffers[AOM_PLANE_Y], dgd->crop_heights[AOM_PLANE_Y],
       dgd->crop_widths[AOM_PLANE_Y], dgd->strides[AOM_PLANE_Y], &luma,
       dgd->crop_heights[1], dgd->crop_widths[1], WIENERNS_UV_BRD, luma_stride,
-      cm->seq_params.bit_depth);
+      cm->seq_params.bit_depth
+#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2
+      ,
+      cm->seq_params.enable_cfl_ds_filter == 1
+#endif
+  );
   assert(luma_buf != NULL);
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
 
@@ -875,7 +891,12 @@
   int num_rows_lr = 0;
 
   for (int plane = 0; plane < num_planes; plane++) {
-    if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
+    if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+        && cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    )
+      continue;
 
 #if CONFIG_WIENER_NONSEP || CONFIG_PC_WIENER
     ctxt[plane].plane = plane;
diff --git a/av1/common/tip.c b/av1/common/tip.c
index 6e175d6..c427a4e 100644
--- a/av1/common/tip.c
+++ b/av1/common/tip.c
@@ -37,14 +37,22 @@
 
   if (!order_hint_info->enable_order_hint || frame_is_intra_only(cm)) return;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int cur_order_hint = cm->current_frame.display_order_hint;
+#else
   const int cur_order_hint = cm->current_frame.order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
 
   // Identify the nearest forward and backward references.
   for (int i = 0; i < INTER_REFS_PER_FRAME; i++) {
     const RefCntBuffer *const buf = get_ref_frame_buf(cm, i);
     if (buf == NULL) continue;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    const int ref_order_hint = buf->display_order_hint;
+#else
     const int ref_order_hint = buf->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
     const int ref_to_cur_dist =
         get_relative_dist(order_hint_info, ref_order_hint, cur_order_hint);
     if (ref_to_cur_dist < 0) {
@@ -72,7 +80,12 @@
   const RefCntBuffer *const start_frame_buf =
       get_ref_frame_buf(cm, start_frame);
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int *const ref_order_hints =
+      &start_frame_buf->ref_display_order_hint[0];
+#else
   const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0];
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) {
     if (ref_order_hints[rf] == target_frame_order) {
       return 1;
@@ -111,13 +124,21 @@
       get_ref_frame_buf(cm, start_frame);
   if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int start_frame_order_hint = start_frame_buf->display_order_hint;
+#else
   const int start_frame_order_hint = start_frame_buf->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
 
   assert(start_frame_buf->width == cm->width &&
          start_frame_buf->height == cm->height);
-
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int *const ref_order_hints = start_frame_buf->ref_display_order_hint;
+  const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
   const int *const ref_order_hints = start_frame_buf->ref_order_hints;
   const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int start_to_current_frame_offset = get_relative_dist(
       order_hint_info, start_frame_order_hint, cur_order_hint);
 
@@ -461,7 +482,11 @@
   }
 
   const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
   const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
 
   MV_REFERENCE_FRAME nearest_rf[2] = { tip_ref->ref_frame[0],
                                        tip_ref->ref_frame[1] };
@@ -477,12 +502,20 @@
     if (cm->features.tip_frame_mode) {
       cm->features.allow_tip_hole_fill = cm->seq_params.enable_tip_hole_fill;
       RefCntBuffer *ref0_frame_buf = get_ref_frame_buf(cm, nearest_rf[0]);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      const int ref0_frame_order_hint = ref0_frame_buf->display_order_hint;
+#else
       const int ref0_frame_order_hint = ref0_frame_buf->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
       const int cur_to_ref0_offset = get_relative_dist(
           order_hint_info, cur_order_hint, ref0_frame_order_hint);
 
       RefCntBuffer *ref1_frame_buf = get_ref_frame_buf(cm, nearest_rf[1]);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      const int ref1_frame_order_hint = ref1_frame_buf->display_order_hint;
+#else
       const int ref1_frame_order_hint = ref1_frame_buf->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
       const int cur_to_ref1_offset = get_relative_dist(
           order_hint_info, cur_order_hint, ref1_frame_order_hint);
 
@@ -569,10 +602,15 @@
   }
 }
 
-static AOM_INLINE void tip_build_one_inter_predictor(
-    uint16_t *dst, int dst_stride, const MV *const src_mv,
-    InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
-    int ref, uint16_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) {
+#if !CONFIG_REFINEMV
+static AOM_INLINE
+#endif  //! CONFIG_REFINEMV
+    void
+    tip_build_one_inter_predictor(
+        uint16_t *dst, int dst_stride, const MV *const src_mv,
+        InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
+        int ref, uint16_t **mc_buf,
+        CalcSubpelParamsFunc calc_subpel_params_func) {
   SubpelParams subpel_params;
   uint16_t *src;
   int src_stride;
@@ -582,15 +620,55 @@
 #endif  // CONFIG_OPTFLOW_REFINEMENT
                           mc_buf, &src, &subpel_params, &src_stride);
 
-  tip_highbd_inter_predictor(
-      src, src_stride, dst, dst_stride, &subpel_params,
-      inter_pred_params->block_width, inter_pred_params->block_height,
-      &inter_pred_params->conv_params, inter_pred_params->interp_filter_params,
-      inter_pred_params->bit_depth);
+#if CONFIG_D071_IMP_MSK_BLD
+  int use_bacp = 0;
+  int n_blocks = 0;
+  if (inter_pred_params->border_data.enable_bacp) {
+    const int sub_blk_idx = n_blocks * 2 + ref;
+    inter_pred_params->border_data.bacp_block_data[sub_blk_idx].x0 =
+        subpel_params.x0;
+    inter_pred_params->border_data.bacp_block_data[sub_blk_idx].x1 =
+        subpel_params.x1;
+    inter_pred_params->border_data.bacp_block_data[sub_blk_idx].y0 =
+        subpel_params.y0;
+    inter_pred_params->border_data.bacp_block_data[sub_blk_idx].y1 =
+        subpel_params.y1;
+    if (ref == 1) {
+      use_bacp = is_out_of_frame_block(
+          inter_pred_params, inter_pred_params->ref_frame_buf.width,
+          inter_pred_params->ref_frame_buf.height, n_blocks);
+
+      if (use_bacp && inter_pred_params->mask_comp.type == COMPOUND_AVERAGE) {
+        inter_pred_params->conv_params.do_average = 0;
+        inter_pred_params->comp_mode = MASK_COMP;
+        inter_pred_params->mask_comp.seg_mask = xd->seg_mask;
+      }
+    }
+  }
+
+  assert(IMPLIES(ref == 0, !use_bacp));
+
+  if (use_bacp) {
+    assert(inter_pred_params->comp_mode == MASK_COMP);
+    make_masked_inter_predictor(src, src_stride, dst, dst_stride,
+                                inter_pred_params, &subpel_params, use_bacp,
+                                n_blocks);
+
+  } else {
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
+    tip_highbd_inter_predictor(
+        src, src_stride, dst, dst_stride, &subpel_params,
+        inter_pred_params->block_width, inter_pred_params->block_height,
+        &inter_pred_params->conv_params,
+        inter_pred_params->interp_filter_params, inter_pred_params->bit_depth);
+#if CONFIG_D071_IMP_MSK_BLD
+  }
+#endif  // CONFIG_D071_IMP_MSK_BLD
 }
 
-#if CONFIG_OPTFLOW_ON_TIP
-#define MAKE_BFP_SAD_WRAPPER_COMMON(fnname)                                   \
+#if CONFIG_OPTFLOW_ON_TIP || CONFIG_REFINEMV
+#define MAKE_BFP_SAD_WRAPPER_COMMON8x8(fnname)                                \
   static unsigned int fnname##_8(const uint16_t *src_ptr, int source_stride,  \
                                  const uint16_t *ref_ptr, int ref_stride) {   \
     return fnname(src_ptr, source_stride, ref_ptr, ref_stride);               \
@@ -604,31 +682,119 @@
     return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;          \
   }
 
-MAKE_BFP_SAD_WRAPPER_COMMON(aom_highbd_sad8x8)
+MAKE_BFP_SAD_WRAPPER_COMMON8x8(aom_highbd_sad8x8)
+#define MAKE_BFP_SAD_WRAPPER_COMMON16x8(fnname)                               \
+  static unsigned int fnname##_8(const uint16_t *src_ptr, int source_stride,  \
+                                 const uint16_t *ref_ptr, int ref_stride) {   \
+    return fnname(src_ptr, source_stride, ref_ptr, ref_stride);               \
+  }                                                                           \
+  static unsigned int fnname##_10(const uint16_t *src_ptr, int source_stride, \
+                                  const uint16_t *ref_ptr, int ref_stride) {  \
+    return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;          \
+  }                                                                           \
+  static unsigned int fnname##_12(const uint16_t *src_ptr, int source_stride, \
+                                  const uint16_t *ref_ptr, int ref_stride) {  \
+    return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;          \
+  }
 
-// Get the proper sad calculation function for an 8x8 block
-static unsigned int get_highbd_sad_8X8(const uint16_t *src_ptr,
-                                       int source_stride,
-                                       const uint16_t *ref_ptr, int ref_stride,
-                                       int bd) {
+    MAKE_BFP_SAD_WRAPPER_COMMON16x8(aom_highbd_sad16x8)
+
+#define MAKE_BFP_SAD_WRAPPER_COMMON8x16(fnname)                               \
+  static unsigned int fnname##_8(const uint16_t *src_ptr, int source_stride,  \
+                                 const uint16_t *ref_ptr, int ref_stride) {   \
+    return fnname(src_ptr, source_stride, ref_ptr, ref_stride);               \
+  }                                                                           \
+  static unsigned int fnname##_10(const uint16_t *src_ptr, int source_stride, \
+                                  const uint16_t *ref_ptr, int ref_stride) {  \
+    return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;          \
+  }                                                                           \
+  static unsigned int fnname##_12(const uint16_t *src_ptr, int source_stride, \
+                                  const uint16_t *ref_ptr, int ref_stride) {  \
+    return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;          \
+  }
+
+        MAKE_BFP_SAD_WRAPPER_COMMON8x16(aom_highbd_sad8x16)
+
+#define MAKE_BFP_SAD_WRAPPER_COMMON16x16(fnname)                              \
+  static unsigned int fnname##_8(const uint16_t *src_ptr, int source_stride,  \
+                                 const uint16_t *ref_ptr, int ref_stride) {   \
+    return fnname(src_ptr, source_stride, ref_ptr, ref_stride);               \
+  }                                                                           \
+  static unsigned int fnname##_10(const uint16_t *src_ptr, int source_stride, \
+                                  const uint16_t *ref_ptr, int ref_stride) {  \
+    return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2;          \
+  }                                                                           \
+  static unsigned int fnname##_12(const uint16_t *src_ptr, int source_stride, \
+                                  const uint16_t *ref_ptr, int ref_stride) {  \
+    return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4;          \
+  }
+
+            MAKE_BFP_SAD_WRAPPER_COMMON16x16(aom_highbd_sad16x16)
+
+                unsigned int get_highbd_sad(const uint16_t *src_ptr,
+                                            int source_stride,
+                                            const uint16_t *ref_ptr,
+                                            int ref_stride, int bd, int bw,
+                                            int bh) {
   if (bd == 8) {
-    return aom_highbd_sad8x8_8(src_ptr, source_stride, ref_ptr, ref_stride);
+    if (bw == 8 && bh == 8)
+      return aom_highbd_sad8x8_8(src_ptr, source_stride, ref_ptr, ref_stride);
+    else if (bw == 16 && bh == 8)
+      return aom_highbd_sad16x8_8(src_ptr, source_stride, ref_ptr, ref_stride);
+    else if (bw == 8 && bh == 16)
+      return aom_highbd_sad8x16_8(src_ptr, source_stride, ref_ptr, ref_stride);
+    else if (bw == 16 && bh == 16)
+      return aom_highbd_sad16x16_8(src_ptr, source_stride, ref_ptr, ref_stride);
+    else {
+      assert(0);
+      return 0;
+    }
   } else if (bd == 10) {
-    return aom_highbd_sad8x8_10(src_ptr, source_stride, ref_ptr, ref_stride);
+    if (bw == 8 && bh == 8)
+      return aom_highbd_sad8x8_10(src_ptr, source_stride, ref_ptr, ref_stride);
+    else if (bw == 16 && bh == 8)
+      return aom_highbd_sad16x8_10(src_ptr, source_stride, ref_ptr, ref_stride);
+    else if (bw == 8 && bh == 16)
+      return aom_highbd_sad8x16_10(src_ptr, source_stride, ref_ptr, ref_stride);
+    else if (bw == 16 && bh == 16)
+      return aom_highbd_sad16x16_10(src_ptr, source_stride, ref_ptr,
+                                    ref_stride);
+    else {
+      assert(0);
+      return 0;
+    }
   } else if (bd == 12) {
-    return aom_highbd_sad8x8_12(src_ptr, source_stride, ref_ptr, ref_stride);
+    if (bw == 8 && bh == 8)
+      return aom_highbd_sad8x8_12(src_ptr, source_stride, ref_ptr, ref_stride);
+    else if (bw == 16 && bh == 8)
+      return aom_highbd_sad16x8_12(src_ptr, source_stride, ref_ptr, ref_stride);
+    else if (bw == 8 && bh == 16)
+      return aom_highbd_sad8x16_12(src_ptr, source_stride, ref_ptr, ref_stride);
+    else if (bw == 16 && bh == 16)
+      return aom_highbd_sad16x16_12(src_ptr, source_stride, ref_ptr,
+                                    ref_stride);
+    else {
+      assert(0);
+      return 0;
+    }
   } else {
     assert(0);
     return 0;
   }
 }
-
 // Build an 8x8 block in the TIP frame
 static AOM_INLINE void tip_build_inter_predictors_8x8(
     const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, TIP_PLANE *tip_plane,
     const MV mv[2], int mi_x, int mi_y, uint16_t **mc_buf,
     CONV_BUF_TYPE *tmp_conv_dst, CalcSubpelParamsFunc calc_subpel_params_func,
-    uint16_t *dst, int dst_stride) {
+    uint16_t *dst, int dst_stride
+#if CONFIG_REFINEMV
+    ,
+    uint16_t *dst0_16_refinemv, uint16_t *dst1_16_refinemv,
+    ReferenceArea ref_area[2]
+#endif  // CONFIG_REFINEMV
+
+) {
   // TODO(any): currently this only works for y plane
   assert(plane == 0);
 
@@ -664,6 +830,29 @@
   mbmi->motion_mode = SIMPLE_TRANSLATION;
   mbmi->sb_type[PLANE_TYPE_Y] = BLOCK_8X8;
   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
+#if CONFIG_FLEX_MVRES
+  mbmi->max_mv_precision = MV_PRECISION_ONE_EIGHTH_PEL;
+  mbmi->pb_mv_precision = MV_PRECISION_ONE_EIGHTH_PEL;
+#endif
+
+#if CONFIG_REFINEMV
+  MV best_mv_ref[2] = { { mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col },
+                        { mbmi->mv[1].as_mv.row, mbmi->mv[1].as_mv.col } };
+
+  int apply_refinemv = (is_refinemv_allowed_tip_blocks(cm, mbmi) && plane == 0);
+
+  if (apply_refinemv) {
+    uint16_t *dst_ref0 = NULL, *dst_ref1 = NULL;
+    dst_ref0 = &dst0_16_refinemv[0];
+    dst_ref1 = &dst1_16_refinemv[0];
+    mbmi->refinemv_flag = 1;
+
+    apply_mv_refinement(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y, mc_buf,
+                        calc_subpel_params_func, comp_pixel_x, comp_pixel_y,
+                        dst_ref0, dst_ref1, best_mv_ref, bw, bh);
+  }
+
+#endif  // CONFIG_REFINEMV
 
   // Arrays to hold optical flow offsets.
   int vx0[4] = { 0 };
@@ -688,12 +877,22 @@
     InterPredParams params0, params1;
     av1_opfl_build_inter_predictor(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y,
                                    mc_buf, &params0, calc_subpel_params_func, 0,
-                                   dst0);
+                                   dst0
+#if CONFIG_REFINEMV
+                                   ,
+                                   &best_mv_ref[0], bw, bh
+#endif  // CONFIG_REFINEMV
+    );
     av1_opfl_build_inter_predictor(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y,
                                    mc_buf, &params1, calc_subpel_params_func, 1,
-                                   dst1);
-    const unsigned int sad = get_highbd_sad_8X8(dst0, bw, dst1, bw, bd);
+                                   dst1
+#if CONFIG_REFINEMV
+                                   ,
+                                   &best_mv_ref[1], bw, bh
+#endif  // CONFIG_REFINEMV
 
+    );
+    const unsigned int sad = get_highbd_sad(dst0, bw, dst1, bw, bd, 8, 8);
     if (sad < sad_thres) {
       do_opfl = 0;
     }
@@ -701,8 +900,13 @@
 
   if (do_opfl) {
     // Initialize refined mv
+#if CONFIG_REFINEMV
+    const MV mv0 = best_mv_ref[0];
+    const MV mv1 = best_mv_ref[1];
+#else
     const MV mv0 = mv[0];
     const MV mv1 = mv[1];
+#endif  // CONFIG_REFINEMV
     for (int mvi = 0; mvi < 4; mvi++) {
       mv_refined[mvi * 2].as_mv = mv0;
       mv_refined[mvi * 2 + 1].as_mv = mv1;
@@ -712,9 +916,20 @@
     av1_get_optflow_based_mv_highbd(cm, xd, plane, mbmi, mv_refined, bw, bh,
                                     mi_x, mi_y, mc_buf, calc_subpel_params_func,
                                     gx0, gy0, gx1, gy1, vx0, vy0, vx1, vy1,
-                                    dst0, dst1, 0, use_4x4);
+                                    dst0, dst1, 0, use_4x4
+
+#if CONFIG_REFINEMV
+                                    ,
+                                    best_mv_ref, bw, bh
+#endif  // CONFIG_REFINEMV
+    );
   }
 
+#if CONFIG_D071_IMP_MSK_BLD
+  BacpBlockData bacp_block_data[2 * N_OF_OFFSETS];
+  uint8_t use_bacp = cm->features.enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
   for (int ref = 0; ref < 2; ++ref) {
     const struct scale_factors *const sf = cm->tip_ref.ref_scale_factor[ref];
     struct buf_2d *const pred_buf = &tip->pred[ref];
@@ -724,8 +939,23 @@
                           comp_pixel_x, ss_x, ss_y, bd, 0, sf, pred_buf,
                           MULTITAP_SHARP);
 
+#if CONFIG_REFINEMV
+    if (apply_refinemv) {
+      inter_pred_params.use_ref_padding = 1;
+      inter_pred_params.ref_area = &ref_area[ref];
+    }
+#endif  // CONFIG_REFINEMV
+
     inter_pred_params.comp_mode = UNIFORM_COMP;
 
+#if CONFIG_D071_IMP_MSK_BLD
+    inter_pred_params.border_data.enable_bacp = use_bacp;
+    inter_pred_params.border_data.bacp_block_data =
+        &bacp_block_data[0];  // Always point to the first ref
+    inter_pred_params.sb_type = mbmi->sb_type[PLANE_TYPE_Y];
+    inter_pred_params.mask_comp = mbmi->interinter_comp;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
     const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2);
     const int height = (cm->mi_params.mi_rows << MI_SIZE_LOG2);
     inter_pred_params.dist_to_top_edge = -GET_MV_SUBPEL(mi_y);
@@ -741,7 +971,12 @@
           dst, dst_stride, plane, mv_refined, &inter_pred_params, xd, mi_x,
           mi_y, ref, mc_buf, calc_subpel_params_func, use_4x4);
     } else {
-      tip_build_one_inter_predictor(dst, dst_stride, &mv[ref],
+      tip_build_one_inter_predictor(dst, dst_stride,
+#if CONFIG_REFINEMV
+                                    &best_mv_ref[ref],
+#else
+                                    &mv[ref],
+#endif  // CONFIG_REFINEMV
                                     &inter_pred_params, xd, mi_x, mi_y, ref,
                                     mc_buf, calc_subpel_params_func);
     }
@@ -749,7 +984,7 @@
 
   xd->tmp_conv_dst = org_buf;
 }
-#endif  // CONFIG_OPTFLOW_ON_TIP
+#endif  // CONFIG_OPTFLOW_ON_TIP || CONFIG_REFINEMV
 
 static AOM_INLINE void tip_build_inter_predictors_8x8_and_bigger(
     const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, TIP_PLANE *tip_plane,
@@ -759,9 +994,44 @@
   struct buf_2d *const dst_buf = &tip->dst;
   uint16_t *const dst = dst_buf->buf;
 
-#if CONFIG_OPTFLOW_ON_TIP
+#if CONFIG_REFINEMV || CONFIG_OPTFLOW_ON_TIP
+#if CONFIG_REFINEMV
+  uint16_t dst0_16_refinemv[REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT];
+  uint16_t dst1_16_refinemv[REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT];
+  int apply_refinemv = (plane == 0);
+  ReferenceArea ref_area[2];
+  if (apply_refinemv) {
+    MB_MODE_INFO *mbmi = aom_calloc(1, sizeof(*mbmi));
+    mbmi->mv[0].as_mv = mv[0];
+    mbmi->mv[1].as_mv = mv[1];
+    mbmi->ref_frame[0] = TIP_FRAME;
+    mbmi->ref_frame[1] = NONE_FRAME;
+    mbmi->interp_fltr = EIGHTTAP_REGULAR;
+    mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = 0;
+    mbmi->use_intrabc[0] = 0;
+    mbmi->motion_mode = SIMPLE_TRANSLATION;
+    mbmi->sb_type[PLANE_TYPE_Y] = BLOCK_8X8;
+    mbmi->interinter_comp.type = COMPOUND_AVERAGE;
+#if CONFIG_FLEX_MVRES
+    mbmi->max_mv_precision = MV_PRECISION_ONE_EIGHTH_PEL;
+    mbmi->pb_mv_precision = MV_PRECISION_ONE_EIGHTH_PEL;
+#endif
+    const int ss_x = plane ? cm->seq_params.subsampling_x : 0;
+    const int ss_y = plane ? cm->seq_params.subsampling_y : 0;
+    const int comp_pixel_x = (mi_x >> ss_x);
+    const int comp_pixel_y = (mi_y >> ss_y);
+    av1_get_reference_area_with_padding(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y,
+                                        ref_area, comp_pixel_x, comp_pixel_y);
+    aom_free(mbmi);
+  }
+#endif  // CONFIG_REFINEMV
+
   int dst_stride = dst_buf->stride;
-  if (plane == 0 && cm->features.use_optflow_tip) {
+  if (plane == 0 && (cm->features.use_optflow_tip
+#if CONFIG_REFINEMV
+                     || apply_refinemv
+#endif  // CONFIG_REFINEMV
+                     )) {
     if (bw != 8 || bh != 8) {
       for (int h = 0; h < bh; h += 8) {
         for (int w = 0; w < bw; w += 8) {
@@ -776,10 +1046,15 @@
     }
     tip_build_inter_predictors_8x8(cm, xd, plane, tip_plane, mv, mi_x, mi_y,
                                    mc_buf, tmp_conv_dst,
-                                   calc_subpel_params_func, dst, dst_stride);
+                                   calc_subpel_params_func, dst, dst_stride
+#if CONFIG_REFINEMV
+                                   ,
+                                   dst0_16_refinemv, dst1_16_refinemv, ref_area
+#endif  // CONFIG_REFINEMV
+    );
     return;
   }
-#endif  // CONFIG_OPTFLOW_ON_TIP
+#endif  // CONFIG_OPTFLOW_ON_TIP || CONFIG_REFINEMV
 
   const int bd = cm->seq_params.bit_depth;
 
@@ -789,6 +1064,12 @@
   const int comp_pixel_y = (mi_y >> ss_y);
   const int comp_bw = bw >> ss_x;
   const int comp_bh = bh >> ss_y;
+
+#if CONFIG_D071_IMP_MSK_BLD
+  BacpBlockData bacp_block_data[2 * N_OF_OFFSETS];
+  uint8_t use_bacp = cm->features.enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
   for (int ref = 0; ref < 2; ++ref) {
     const struct scale_factors *const sf = cm->tip_ref.ref_scale_factor[ref];
     struct buf_2d *const pred_buf = &tip->pred[ref];
@@ -800,6 +1081,16 @@
 
     inter_pred_params.comp_mode = UNIFORM_COMP;
 
+#if CONFIG_D071_IMP_MSK_BLD
+    inter_pred_params.border_data.enable_bacp = use_bacp;
+    inter_pred_params.border_data.bacp_block_data =
+        &bacp_block_data[0];  // Always point to the first ref
+    inter_pred_params.sb_type = BLOCK_8X8;
+    assert(bw == 8 &&
+           bh == 8);  // Currently BACP is supported only for 8x8 block
+    inter_pred_params.mask_comp.type = COMPOUND_AVERAGE;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
     const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2);
     const int height = (cm->mi_params.mi_rows << MI_SIZE_LOG2);
     inter_pred_params.dist_to_top_edge = -GET_MV_SUBPEL(mi_y);
diff --git a/av1/common/tip.h b/av1/common/tip.h
index c199d06..d5c50d6 100644
--- a/av1/common/tip.h
+++ b/av1/common/tip.h
@@ -118,6 +118,7 @@
   *mv = get_mv_from_fullmv(&fullmv);
 }
 
+#if !CONFIG_REFINEMV
 // Clamp MV to UMV border based on its distance to left/right/top/bottom edge
 static AOM_INLINE MV tip_clamp_mv_to_umv_border_sb(
     InterPredParams *const inter_pred_params, const MV *src_mv, int bw, int bh,
@@ -172,7 +173,7 @@
 
   return clamped_mv;
 }
-
+#endif  //! CONFIG_REFINEMV
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/common/token_cdfs.h b/av1/common/token_cdfs.h
index 546dd7a..b70a2dd 100644
--- a/av1/common/token_cdfs.h
+++ b/av1/common/token_cdfs.h
@@ -398,6 +398,24 @@
                                                  } };
 #endif  // CONFIG_CONTEXT_DERIVATION
 
+#if CONFIG_ATC_DCTX_ALIGNED
+static const aom_cdf_prob av1_default_coeff_base_bob_multi_cdfs
+    [TOKEN_CDF_Q_CTXS][SIG_COEF_CONTEXTS_BOB][CDF_SIZE(NUM_BASE_LEVELS + 1)] = {
+      { { AOM_CDF3(10923, 21845) },
+        { AOM_CDF3(10923, 21845) },
+        { AOM_CDF3(10923, 21845) } },
+      { { AOM_CDF3(18786, 24298) },
+        { AOM_CDF3(24159, 27856) },
+        { AOM_CDF3(25533, 28778) } },
+      { { AOM_CDF3(19757, 25371) },
+        { AOM_CDF3(20834, 26403) },
+        { AOM_CDF3(20013, 26339) } },
+      { { AOM_CDF3(21974, 28446) },
+        { AOM_CDF3(23020, 29244) },
+        { AOM_CDF3(23858, 29893) } }
+    };
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 static const aom_cdf_prob
     av1_default_eob_extra_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
                               [EOB_COEF_CONTEXTS][CDF_SIZE(2)] = {
@@ -843,6 +861,110 @@
                                     } } }
                               };
 
+#if CONFIG_ATC_DCTX_ALIGNED
+static const aom_cdf_prob
+    av1_default_eob_multi16_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(
+        EOB_MAX_SYMS - 6)] = { { { AOM_CDF5(6554, 13107, 19661, 26214) },
+                                 { AOM_CDF5(6554, 13107, 19661, 26214) } },
+                               { { AOM_CDF5(222, 339, 759, 2493) },
+                                 { AOM_CDF5(4613, 6877, 12106, 18380) } },
+                               { { AOM_CDF5(1418, 2074, 4194, 10380) },
+                                 { AOM_CDF5(14420, 18173, 23582, 28265) } },
+                               { { AOM_CDF5(2817, 4572, 9636, 19946) },
+                                 { AOM_CDF5(18618, 21512, 26133, 29765) } } };
+
+static const aom_cdf_prob
+    av1_default_eob_multi32_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(
+        EOB_MAX_SYMS - 5)] = {
+      { { AOM_CDF6(5461, 10923, 16384, 21845, 27307) },
+        { AOM_CDF6(5461, 10923, 16384, 21845, 27307) } },
+      { { AOM_CDF6(169, 487, 1046, 1986, 4403) },
+        { AOM_CDF6(4144, 7636, 13889, 20369, 26338) } },
+      { { AOM_CDF6(1365, 1804, 3348, 6966, 13052) },
+        { AOM_CDF6(12169, 14967, 19396, 23910, 28156) } },
+      { { AOM_CDF6(2394, 3348, 6277, 12419, 20179) },
+        { AOM_CDF6(16608, 19308, 23841, 28108, 31336) } }
+    };
+
+static const aom_cdf_prob
+    av1_default_eob_multi64_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(
+        EOB_MAX_SYMS - 4)] = {
+      { { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+        { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) } },
+      { { AOM_CDF7(100, 388, 969, 2002, 4280, 9308) },
+        { AOM_CDF7(4333, 8167, 14492, 20418, 26250, 29955) } },
+      { { AOM_CDF7(1148, 1569, 2970, 5924, 10850, 20433) },
+        { AOM_CDF7(11727, 14694, 19634, 24043, 27541, 30259) } },
+      { { AOM_CDF7(2471, 3361, 6306, 11907, 19426, 28048) },
+        { AOM_CDF7(16268, 19103, 23823, 27751, 30728, 32149) } }
+    };
+
+static const aom_cdf_prob
+    av1_default_eob_multi128_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(
+        EOB_MAX_SYMS - 3)] = {
+      { { AOM_CDF8(4096, 8192, 12288, 16384, 20480, 24576, 28672) },
+        { AOM_CDF8(4096, 8192, 12288, 16384, 20480, 24576, 28672) } },
+      { { AOM_CDF8(34, 889, 2098, 3797, 6595, 10857, 15412) },
+        { AOM_CDF8(2654, 4900, 10162, 15197, 21643, 27251, 31178) } },
+      { { AOM_CDF8(1804, 2359, 3999, 7338, 12798, 19374, 26178) },
+        { AOM_CDF8(12268, 15216, 20355, 24508, 27905, 29986, 31541) } },
+      { { AOM_CDF8(3169, 4356, 7874, 13681, 20866, 27270, 30960) },
+        { AOM_CDF8(17157, 19749, 24432, 27880, 30345, 31718, 32514) } }
+    };
+
+static const aom_cdf_prob
+    av1_default_eob_multi256_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(
+        EOB_MAX_SYMS - 2)] = {
+      { { AOM_CDF9(3641, 7282, 10923, 14564, 18204, 21845, 25486, 29127) },
+        { AOM_CDF9(3641, 7282, 10923, 14564, 18204, 21845, 25486, 29127) } },
+      { { AOM_CDF9(33, 670, 2335, 4307, 7421, 13364, 18212, 24272) },
+        { AOM_CDF9(4218, 6101, 11211, 14560, 18294, 23147, 29103, 31965) } },
+      { { AOM_CDF9(2847, 3904, 6472, 10515, 16418, 23542, 27413, 30171) },
+        { AOM_CDF9(10704, 13812, 18880, 22949, 26500, 29132, 31028, 31981) } },
+      { { AOM_CDF9(3871, 5381, 9315, 15338, 22376, 28297, 31018, 32233) },
+        { AOM_CDF9(16748, 19447, 24213, 27658, 30076, 31428, 32280, 32665) } }
+    };
+
+static const aom_cdf_prob
+    av1_default_eob_multi512_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(
+        EOB_MAX_SYMS - 1)] = {
+      { { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, 26214,
+                    29491) },
+        { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, 26214,
+                    29491) } },
+      { { AOM_CDF10(210, 734, 3354, 8943, 13834, 20297, 25921, 28366, 28820) },
+        { AOM_CDF10(7846, 10783, 17565, 23721, 26809, 28393, 30689, 32349,
+                    32667) } },
+      { { AOM_CDF10(4390, 7204, 11548, 15823, 20831, 26069, 28315, 30071,
+                    31063) },
+        { AOM_CDF10(9109, 12852, 19233, 23707, 27291, 29681, 31403, 32259,
+                    32589) } },
+      { { AOM_CDF10(7299, 9302, 13207, 18796, 25134, 30084, 31270, 32095,
+                    32513) },
+        { AOM_CDF10(18536, 20753, 25277, 28762, 31002, 31931, 32489, 32703,
+                    32707) } }
+    };
+
+static const aom_cdf_prob
+    av1_default_eob_multi1024_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(
+        EOB_MAX_SYMS)] = { { { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873,
+                                         20852, 23831, 26810, 29789) },
+                             { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873,
+                                         20852, 23831, 26810, 29789) } },
+                           { { AOM_CDF11(401, 535, 3344, 6821, 8426, 13375,
+                                         14043, 17788, 19527, 21667) },
+                             { AOM_CDF11(4161, 8108, 14066, 17761, 20706, 21700,
+                                         22771, 26741, 31827, 32653) } },
+                           { { AOM_CDF11(4252, 6240, 9718, 13784, 19105, 24225,
+                                         26454, 28321, 29757, 30648) },
+                             { AOM_CDF11(11539, 14807, 19777, 22884, 26491,
+                                         28818, 30433, 31677, 32422, 32654) } },
+                           { { AOM_CDF11(8416, 10839, 15261, 20667, 26077,
+                                         30169, 31223, 31886, 32276, 32527) },
+                             { AOM_CDF11(21740, 24160, 28114, 30490, 31733,
+                                         32248, 32571, 32700, 32704,
+                                         32708) } } };
+#else
 static const aom_cdf_prob
     av1_default_eob_multi16_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(
         5)] = { { { { AOM_CDF5(840, 1039, 1980, 4895) },
@@ -1013,6 +1135,7 @@
                                  29486, 29724, 29807, 32570) },
                      { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
                                  23831, 26810, 29789) } } } };
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
 static const aom_cdf_prob
     av1_default_coeff_lps_multi_cdfs_idtx[TOKEN_CDF_Q_CTXS][IDTX_LEVEL_CONTEXTS]
@@ -1075,7 +1198,7 @@
                                              { AOM_CDF4(13384, 20248, 26029) } }
                                          };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 static const aom_cdf_prob
     av1_default_coeff_lps_multi_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES]
                                     [LEVEL_CONTEXTS][CDF_SIZE(BR_CDF_SIZE)] = {
@@ -1901,7 +2024,7 @@
             { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) },
             { AOM_CDF4(8192, 16384, 24576) } } } }
     };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 static const aom_cdf_prob av1_default_coeff_base_multi_cdfs_idtx
     [TOKEN_CDF_Q_CTXS][IDTX_SIG_COEF_CONTEXTS]
@@ -1962,7 +2085,7 @@
                                           { AOM_CDF4(2674, 5476, 12215) },
                                           { AOM_CDF4(1568, 3054, 5228) } } };
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 static const aom_cdf_prob av1_default_coeff_base_lf_multi_cdfs
     [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][LF_SIG_COEF_CONTEXTS]
     [CDF_SIZE(LF_BASE_SYMBOLS)] = {
@@ -6461,7 +6584,7 @@
                                         { AOM_CDF3(10923, 21845) },
                                         { AOM_CDF3(10923, 21845) },
                                         { AOM_CDF3(10923, 21845) } } } } };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #if CONFIG_PAR_HIDING
 static const aom_cdf_prob
     av1_default_coeff_base_ph_cdfs[TOKEN_CDF_Q_CTXS][COEFF_BASE_PH_CONTEXTS]
diff --git a/av1/common/txb_common.h b/av1/common/txb_common.h
index 79f6929..0b82352 100644
--- a/av1/common/txb_common.h
+++ b/av1/common/txb_common.h
@@ -109,16 +109,16 @@
             AOMMIN(levels[pos + stride], MAX_BASE_BR_RANGE) +
             AOMMIN(levels[pos + 1 + stride], MAX_BASE_BR_RANGE);
   mag = AOMMIN((mag + 1) >> 1, 6);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   return mag;
 #else
   //((row | col) < 2) is equivalent to ((row < 2) && (col < 2))
   if ((row | col) < 2) return mag + 7;
   return mag + 14;
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 }
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 // This function returns the low range context index for
 // the low-frequency region for the EOB coefficient.
 static AOM_FORCE_INLINE int get_br_ctx_lf_eob(const int c,  // raster order
@@ -139,9 +139,9 @@
     return 7;
   return 14;
 }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 // This function returns the low range context index/increment for the
 // coefficients residing in the low-frequency region for 2D transforms.
 // Not used for the DC term.
@@ -251,9 +251,9 @@
 
   return mag + 14;
 }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 static const uint8_t clip_max5[256] = {
   0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -266,7 +266,7 @@
   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
 };
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 static const uint8_t clip_max3[256] = {
   0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
@@ -321,7 +321,7 @@
   return sign_ctx;
 }
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 // This function returns the template sum of absolute values
 // for coefficient coding for the low-frequency region.
 static AOM_FORCE_INLINE int get_nz_mag_lf(const uint8_t *const levels,
@@ -346,7 +346,7 @@
   }
   return mag;
 }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 // This function returns the template sum of absolute values
 // for coefficient coding for the higher-frequency default region.
@@ -399,7 +399,7 @@
   return ctx + 7;
 }
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 // This function returns the base range context index/increment for the
 // coefficients residing in the low-frequency region for 1D/2D transforms.
 static AOM_FORCE_INLINE int get_nz_map_ctx_from_stats_lf(
@@ -449,7 +449,7 @@
   }
   return 0;
 }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 // This function returns the base range context index/increment for the
 // coefficients residing in the higher-frequency region for 1D/2D transforms.
@@ -457,9 +457,9 @@
     const int stats,
     const int coeff_idx,  // raster order
     const int bwl,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
     const TX_SIZE tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
     const TX_CLASS tx_class) {
   // tx_class == 0(TX_CLASS_2D)
   if ((tx_class | coeff_idx) == 0) return 0;
@@ -467,7 +467,7 @@
   ctx = AOMMIN(ctx, 4);
   switch (tx_class) {
     case TX_CLASS_2D: {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       const int row = coeff_idx >> bwl;
       const int col = coeff_idx - (row << bwl);
       if (row + col < 6) return ctx;
@@ -486,33 +486,33 @@
       //   if (row + col < 4) return 5 + ctx + 1;
       //   return 21 + ctx;
       return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
     case TX_CLASS_HORIZ: {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       return ctx + 15;
 #else
       const int row = coeff_idx >> bwl;
       const int col = coeff_idx - (row << bwl);
       return ctx + nz_map_ctx_offset_1d[col];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
     case TX_CLASS_VERT: {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       return ctx + 15;
 #else
       const int row = coeff_idx >> bwl;
       return ctx + nz_map_ctx_offset_1d[row];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
     default: break;
   }
   return 0;
 }
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 typedef aom_cdf_prob (*base_lf_cdf_arr)[CDF_SIZE(LF_BASE_SYMBOLS)];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 typedef aom_cdf_prob (*base_cdf_arr)[CDF_SIZE(4)];
 typedef aom_cdf_prob (*br_cdf_arr)[CDF_SIZE(BR_CDF_SIZE)];
 #if CONFIG_PAR_HIDING
@@ -563,6 +563,15 @@
   return 3;
 }
 
+#if CONFIG_ATC_DCTX_ALIGNED
+// Return context index for first position.
+static INLINE int get_lower_levels_ctx_bob(int bwl, int height, int scan_idx) {
+  if (scan_idx <= (height << bwl) / 8) return 0;
+  if (scan_idx <= (height << bwl) / 4) return 1;
+  return 2;
+}
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 static INLINE int get_upper_levels_ctx_2d(const uint8_t *levels, int coeff_idx,
                                           int bwl) {
   int mag;
@@ -576,7 +585,7 @@
   return ctx + 7;
 }
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 // This function returns the base range context index/increment for the
 // coefficients residing in the low-frequency region for 2D transforms.
 static INLINE int get_lower_levels_ctx_lf_2d(const uint8_t *levels,
@@ -612,14 +621,14 @@
       get_nz_mag_lf(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class);
   return get_nz_map_ctx_from_stats_lf(stats, coeff_idx, bwl, tx_class);
 }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 static INLINE int get_lower_levels_ctx_2d(const uint8_t *levels, int coeff_idx,
                                           int bwl
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                                           ,
                                           TX_SIZE tx_size
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
 ) {
   assert(coeff_idx > 0);
   int mag;
@@ -632,7 +641,7 @@
   mag += AOMMIN(levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)], 3);  // { 2, 0 }
 
   const int ctx = AOMMIN((mag + 1) >> 1, 4);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   const int row = coeff_idx >> bwl;
   const int col = coeff_idx - (row << bwl);
   if (row + col < 6) return ctx;
@@ -640,10 +649,10 @@
   return ctx + 10;
 #else
   return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 }
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 // This function determines the limits to separate the low-frequency
 // coefficient coding region from the higher-frequency default
 // region. It is based on the diagonal sum (row+col) or row, columns
@@ -661,35 +670,35 @@
   }
   return limits;
 }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 static AOM_FORCE_INLINE int get_lower_levels_ctx(const uint8_t *levels,
                                                  int coeff_idx, int bwl,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                                                  TX_SIZE tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
                                                  TX_CLASS tx_class) {
   const int stats =
       get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_class);
 #else
   return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 }
 
 static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx,
                                                int bwl, int height,
                                                const uint8_t *levels,
                                                int coeff_idx,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                                                TX_SIZE tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
                                                TX_CLASS tx_class
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                                                ,
                                                int plane
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 ) {
   if (is_last) {
     if (scan_idx == 0) return 0;
@@ -697,7 +706,7 @@
     if (scan_idx <= (height << bwl) >> 2) return 2;
     return 3;
   }
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   const int row = coeff_idx >> bwl;
   const int col = coeff_idx - (row << bwl);
   int limits = get_lf_limits(row, col, tx_class, plane);
@@ -708,7 +717,7 @@
   }
 #else
   return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 }
 
 static INLINE void set_dc_sign(int *cul_level, int dc_val) {
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c
index 1293edf..6fa6b52 100644
--- a/av1/common/warped_motion.c
+++ b/av1/common/warped_motion.c
@@ -983,3 +983,188 @@
   return 0;
 }
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
+// From the warp model, derive the MV in (x,y) position.
+// (x,y) is the horizontal and vertical position of the frame
+//(0,0) is the top-left co-ordinate of the frame
+int_mv get_warp_motion_vector_xy_pos(const WarpedMotionParams *model,
+                                     const int x, const int y,
+                                     MvSubpelPrecision precision) {
+  int_mv res;
+
+  if (model->wmtype == IDENTITY) {
+    res.as_int = 0;
+    return res;
+  }
+
+  if (model->wmtype == TRANSLATION) {
+    // All global motion vectors are stored with WARPEDMODEL_PREC_BITS (16)
+    // bits of fractional precision. The offset for a translation is stored in
+    // entries 0 and 1. For translations, all but the top three (two if
+    // precision < MV_SUBPEL_EIGHTH) fractional bits are always
+    // zero.
+    //
+#if CONFIG_FLEX_MVRES
+    // After the right shifts, there are 3 fractional bits of precision. If
+    // precision < MV_SUBPEL_EIGHTH is false, the bottom bit is always zero
+    // (so we don't need a call to convert_to_trans_prec here)
+    res.as_mv.col = model->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF;
+    res.as_mv.row = model->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF;
+
+    // When extended warp prediction is enabled, the warp model can be derived
+    // from the neighbor. Neighbor may have different MV precision than current
+    // block. Therefore, this assertion is not valid when
+    // CONFIG_EXTENDED_WARP_PREDICTION is enabled
+#if !CONFIG_EXTENDED_WARP_PREDICTION
+    assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col),
+                   precision == MV_PRECISION_ONE_EIGHTH_PEL));
+#endif
+#if CONFIG_C071_SUBBLK_WARPMV
+    if (precision < MV_PRECISION_HALF_PEL)
+#endif  // CONFIG_C071_SUBBLK_WARPMV
+      lower_mv_precision(&res.as_mv, precision);
+#else
+    // After the right shifts, there are 3 fractional bits of precision. If
+    // allow_hp is false, the bottom bit is always zero (so we don't need a
+    // call to convert_to_trans_prec here)
+    res.as_mv.col = model->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF;
+    res.as_mv.row = model->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF;
+    assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col), allow_hp));
+    if (is_integer) {
+      integer_mv_precision(&res.as_mv);
+    }
+#endif
+    return res;
+  }
+
+  const int32_t *mat = model->wmmat;
+  int tx, ty;
+
+  if (model->wmtype == ROTZOOM) {
+    assert(model->wmmat[5] == model->wmmat[2]);
+    assert(model->wmmat[4] == -model->wmmat[3]);
+  }
+
+  int xc =
+      (mat[2] * x + mat[3] * y + mat[0]) - (1 << WARPEDMODEL_PREC_BITS) * x;
+  int yc =
+      (mat[4] * x + mat[5] * y + mat[1]) - (1 << WARPEDMODEL_PREC_BITS) * y;
+
+#if CONFIG_FLEX_MVRES
+  tx = convert_to_trans_prec(precision, xc);
+  ty = convert_to_trans_prec(precision, yc);
+#else
+  tx = convert_to_trans_prec(allow_hp, xc);
+  ty = convert_to_trans_prec(allow_hp, yc);
+#endif
+
+  res.as_mv.row = ty;
+  res.as_mv.col = tx;
+
+#if CONFIG_FLEX_MVRES
+#if CONFIG_C071_SUBBLK_WARPMV
+  if (precision < MV_PRECISION_HALF_PEL)
+#endif  // CONFIG_C071_SUBBLK_WARPMV
+    lower_mv_precision(&res.as_mv, precision);
+#else
+  if (is_integer) {
+    integer_mv_precision(&res.as_mv);
+  }
+#endif
+  return res;
+}
+
+// return 0 if the model is invalid
+// pts (col, row) is the array of source points in the unit of integer pixel
+// mvs are the array of the MVs corresponding to the source points
+// for nth point,
+//  pts[2*n] is the col value of the source position. pts[2*n + 1] is the row
+//  value of the source position mvs[2*n] is the col value of mv. mvs[2*n + 1]
+//  is the row value of mv pts_inref[2*n] is the col value of the projected
+//  position. pts_inref[2*n + 1] is the row value of the projected position
+int get_model_from_corner_mvs(WarpedMotionParams *derive_model, int *pts,
+                              int np, int *mvs, const BLOCK_SIZE bsize) {
+  // In order to derive the warp model we need 3 projected points
+  // If the number of projected points (np) is not equal to 3, model is not
+  // valid.
+  if (np != 3) {
+    derive_model->invalid = 1;
+    return 0;
+  }
+
+  int x0, y0;
+  int ref_x0, ref_x1, ref_x2, ref_y0, ref_y1, ref_y2;
+  int pts_inref[2 * 3];
+  const int width_log2 = mi_size_wide_log2[bsize] + MI_SIZE_LOG2;
+  const int height_log2 = mi_size_high_log2[bsize] + MI_SIZE_LOG2;
+
+  assert(derive_model != NULL);
+
+  for (int n = 0; n < np; n++) {
+    pts_inref[2 * n] = pts[2 * n] * (1 << WARPEDMODEL_PREC_BITS) +
+                       mvs[2 * n] * (1 << GM_TRANS_ONLY_PREC_DIFF);
+    pts_inref[2 * n + 1] = pts[2 * n + 1] * (1 << WARPEDMODEL_PREC_BITS) +
+                           mvs[2 * n + 1] * (1 << GM_TRANS_ONLY_PREC_DIFF);
+    int valid_point = (pts[2 * n] >= 0 && pts[2 * n + 1] >= 0 &&
+                       pts_inref[2 * n] >= 0 && pts_inref[2 * n + 1] >= 0);
+    if (!valid_point) return 0;
+  }
+
+  int all_mvs_same = 1;
+  for (int k = 1; k < np; k++) {
+    all_mvs_same &= (mvs[0] == mvs[2 * k]) & (mvs[1] == mvs[2 * k + 1]);
+  }
+  if (all_mvs_same) {
+    derive_model->invalid = 1;
+    return 0;
+  }
+
+  // Top-left point
+  x0 = pts[2 * 0];
+  y0 = pts[2 * 0 + 1];
+  ref_x0 = pts_inref[2 * 0];
+  ref_y0 = pts_inref[2 * 0 + 1];
+
+  // Top-right point
+  ref_x1 = pts_inref[2 * 1];
+  ref_y1 = pts_inref[2 * 1 + 1];
+
+  // Bottom-left point
+  ref_x2 = pts_inref[2 * 2];
+  ref_y2 = pts_inref[2 * 2 + 1];
+
+  derive_model->wmmat[2] = (ref_x1 - ref_x0) >> width_log2;
+  derive_model->wmmat[4] = (ref_y1 - ref_y0) >> width_log2;
+
+  derive_model->wmmat[3] = (ref_x2 - ref_x0) >> height_log2;
+  derive_model->wmmat[5] = (ref_y2 - ref_y0) >> height_log2;
+
+  int64_t wmmat0 = (int64_t)ref_x0 -
+                   (int64_t)derive_model->wmmat[2] * (int64_t)x0 -
+                   (int64_t)derive_model->wmmat[3] * (int64_t)y0;
+  int64_t wmmat1 = (int64_t)ref_y0 -
+                   (int64_t)derive_model->wmmat[4] * (int64_t)x0 -
+                   (int64_t)derive_model->wmmat[5] * (int64_t)y0;
+
+  derive_model->wmtype = AFFINE;
+  derive_model->invalid = 0;
+
+  av1_reduce_warp_model(derive_model);
+
+  // check compatibility with the fast warp filter
+  if (!av1_get_shear_params(derive_model)) {
+    derive_model->invalid = 1;
+    return 0;
+  }
+
+  derive_model->wmmat[0] = (int32_t)clamp64(wmmat0, -WARPEDMODEL_TRANS_CLAMP,
+                                            WARPEDMODEL_TRANS_CLAMP - 1);
+  derive_model->wmmat[1] = (int32_t)clamp64(wmmat1, -WARPEDMODEL_TRANS_CLAMP,
+                                            WARPEDMODEL_TRANS_CLAMP - 1);
+
+  derive_model->wmmat[6] = derive_model->wmmat[7] = 0;
+
+  return 1;
+}
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h
index 444ce88..9722b22 100644
--- a/av1/common/warped_motion.h
+++ b/av1/common/warped_motion.h
@@ -302,4 +302,86 @@
                           WarpedMotionParams *wm_params);
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+// Given a warp model which was initially used at a temporal distance of
+// `in_distance`, rescale it to a new temporal distance of `out_distance`.
+// Both distances are allowed to be negative, but they must be nonzero.
+//
+// The mathematically ideal way to rescale a warp model from one temporal
+// distance to another would be to use a matrix exponential: If we write the
+// input model as a 3x3 matrix M, then the output model should be
+//
+//  ideal output = M ^ (out_distance / in_distance)
+//
+// However, computing a matrix exponential is complicated, especially in
+// fixed point, and so would not be very hardware friendly. In addition,
+// this function is mainly used to predict global motion parameters, with
+// the true values being coded as a delta from this prediction. As the
+// global motion will not be perfectly consistent, there's a limit to how
+// accurate our prediction can be.
+//
+// For these reasons, we approximate the matrix exponential using its
+// first-order Taylor series:
+//
+//  output = I + (M - I) * (out_distance / in_distance)
+//
+// This is far easier to compute, and provides a "good enough" approximation
+// for the models we use in practice, which are all reasonably near to the
+// identity model (all parameters except for the translational part are
+// within +/- 1/2 of the identity).
+static INLINE void av1_scale_warp_model(const WarpedMotionParams *in_params,
+                                        int in_distance,
+                                        WarpedMotionParams *out_params,
+                                        int out_distance) {
+  static int param_shift[MAX_PARAMDIM - 1] = {
+    GM_TRANS_PREC_DIFF,    GM_TRANS_PREC_DIFF,   GM_ALPHA_PREC_DIFF,
+    GM_ALPHA_PREC_DIFF,    GM_ALPHA_PREC_DIFF,   GM_ALPHA_PREC_DIFF,
+    GM_ROW3HOMO_PREC_DIFF, GM_ROW3HOMO_PREC_DIFF
+  };
+
+  static int param_min[MAX_PARAMDIM - 1] = { GM_TRANS_MIN,    GM_TRANS_MIN,
+                                             GM_ALPHA_MIN,    GM_ALPHA_MIN,
+                                             GM_ALPHA_MIN,    GM_ALPHA_MIN,
+                                             GM_ROW3HOMO_MIN, GM_ROW3HOMO_MIN };
+
+  static int param_max[MAX_PARAMDIM - 1] = { GM_TRANS_MAX,    GM_TRANS_MAX,
+                                             GM_ALPHA_MAX,    GM_ALPHA_MAX,
+                                             GM_ALPHA_MAX,    GM_ALPHA_MAX,
+                                             GM_ROW3HOMO_MAX, GM_ROW3HOMO_MAX };
+
+  assert(in_distance != 0);
+  assert(out_distance != 0);
+
+  // Flip signs so that in_distance is positive.
+  // We do this because
+  //   scaled_value = (... + divisor/2) / divisor
+  // is the simplest way to implement division with round-to-nearest in C,
+  // but it only works correctly if the divisor is positive
+  if (in_distance < 0) {
+    in_distance = -in_distance;
+    out_distance = -out_distance;
+  }
+
+  out_params->wmtype = in_params->wmtype;
+  for (int param = 0; param < MAX_PARAMDIM - 1; param++) {
+    int center = default_warp_params.wmmat[param];
+
+    int input = in_params->wmmat[param] - center;
+    int divisor = in_distance * (1 << param_shift[param]);
+    int output = (int)(((int64_t)input * out_distance + divisor / 2) / divisor);
+    output = clamp(output, param_min[param], param_max[param]) *
+             (1 << param_shift[param]);
+
+    out_params->wmmat[param] = center + output;
+  }
+}
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
+int_mv get_warp_motion_vector_xy_pos(const WarpedMotionParams *model,
+                                     const int x, const int y,
+                                     MvSubpelPrecision precision);
+int get_model_from_corner_mvs(WarpedMotionParams *derive_model, int *pts,
+                              int np, int *mvs, const BLOCK_SIZE bsize);
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // AOM_AV1_COMMON_WARPED_MOTION_H_
diff --git a/av1/common/x86/av1_inv_txfm_ssse3.h b/av1/common/x86/av1_inv_txfm_ssse3.h
index 25b45bd..7704498 100644
--- a/av1/common/x86/av1_inv_txfm_ssse3.h
+++ b/av1/common/x86/av1_inv_txfm_ssse3.h
@@ -68,11 +68,11 @@
 };
 
 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
 #else
   0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
 };
 
@@ -83,11 +83,11 @@
 DECLARE_ALIGNED(16, static const int16_t,
                 av1_eob_to_eobxy_16x32_default[32]) = {
   0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
 #else
   0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
   0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
 };
@@ -99,13 +99,13 @@
 };
 
 DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
   0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
 #else
   0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
   0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07,
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
   0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
 };
diff --git a/av1/decoder/accounting.c b/av1/decoder/accounting.c
index b8c1e5a..0062189 100644
--- a/av1/decoder/accounting.c
+++ b/av1/decoder/accounting.c
@@ -18,26 +18,51 @@
 #include "aom/aom_integer.h"
 #include "av1/decoder/accounting.h"
 
-static int accounting_hash(const char *str) {
+static int accounting_hash(AccountingSymbolInfo *acct_info) {
   uint32_t val;
   const unsigned char *ustr;
   val = 0;
-  ustr = (const unsigned char *)str;
+  ustr = (const unsigned char *)acct_info->c_file;
   /* This is about the worst hash one can design, but it should be good enough
      here. */
   while (*ustr) val += *ustr++;
+
+  for (int i = 0; i < AOM_ACCOUNTING_MAX_TAGS; i++) {
+    if (acct_info->tags[i] == NULL) break;
+    ustr = (const unsigned char *)acct_info->tags[i];
+    while (*ustr) val += *ustr++;
+  }
+  val += acct_info->c_line;
   return val % AOM_ACCOUNTING_HASH_SIZE;
 }
 
+int tags_equal(AccountingSymbolInfo *a, AccountingSymbolInfo *b) {
+  for (int i = 0; i < AOM_ACCOUNTING_MAX_TAGS; i++) {
+    if (a->tags[i] == NULL && b->tags[i] != NULL) return 0;
+    if (a->tags[i] != NULL && b->tags[i] == NULL) return 0;
+    if (a->tags[i] != b->tags[i]) {
+      if (strcmp(a->tags[i], b->tags[i]) != 0) {
+        return 0;
+      }
+    }
+  }
+  return 1;
+}
+
 /* Dictionary lookup based on an open-addressing hash table. */
-int aom_accounting_dictionary_lookup(Accounting *accounting, const char *str) {
+int aom_accounting_dictionary_lookup(Accounting *accounting,
+                                     AccountingSymbolInfo *acct_info) {
   int hash;
-  size_t len;
   AccountingDictionary *dictionary;
   dictionary = &accounting->syms.dictionary;
-  hash = accounting_hash(str);
+  hash = accounting_hash(acct_info);
   while (accounting->hash_dictionary[hash] != -1) {
-    if (strcmp(dictionary->strs[accounting->hash_dictionary[hash]], str) == 0) {
+    if (strcmp(dictionary->acct_infos[accounting->hash_dictionary[hash]].c_file,
+               acct_info->c_file) == 0 &&
+        dictionary->acct_infos[accounting->hash_dictionary[hash]].c_line ==
+            acct_info->c_line &&
+        tags_equal(&dictionary->acct_infos[accounting->hash_dictionary[hash]],
+                   acct_info)) {
       return accounting->hash_dictionary[hash];
     }
     hash++;
@@ -46,9 +71,8 @@
   /* No match found. */
   assert(dictionary->num_strs + 1 < MAX_SYMBOL_TYPES);
   accounting->hash_dictionary[hash] = dictionary->num_strs;
-  len = strlen(str);
-  dictionary->strs[dictionary->num_strs] = malloc(len + 1);
-  snprintf(dictionary->strs[dictionary->num_strs], len + 1, "%s", str);
+  dictionary->acct_infos[dictionary->num_strs] = *acct_info;
+
   dictionary->num_strs++;
   return dictionary->num_strs - 1;
 }
@@ -77,42 +101,25 @@
 }
 
 void aom_accounting_clear(Accounting *accounting) {
-  int i;
-  AccountingDictionary *dictionary;
   free(accounting->syms.syms);
-  dictionary = &accounting->syms.dictionary;
-  for (i = 0; i < dictionary->num_strs; i++) {
-    free(dictionary->strs[i]);
-  }
 }
 
-void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y) {
+void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y,
+                                TREE_TYPE tree_type) {
   accounting->context.x = x;
   accounting->context.y = y;
+  accounting->context.tree_type = tree_type;
 }
 
-void aom_accounting_record(Accounting *accounting, const char *str,
-                           uint32_t bits) {
+void aom_accounting_record(Accounting *accounting, int value,
+                           SYMBOL_CODING_MODE coding_mode,
+                           AccountingSymbolInfo acct_info, uint64_t bits) {
   AccountingSymbol sym;
-  // Reuse previous symbol if it has the same context and symbol id.
-  if (accounting->syms.num_syms) {
-    AccountingSymbol *last_sym;
-    last_sym = &accounting->syms.syms[accounting->syms.num_syms - 1];
-    if (memcmp(&last_sym->context, &accounting->context,
-               sizeof(AccountingSymbolContext)) == 0) {
-      uint32_t id;
-      id = aom_accounting_dictionary_lookup(accounting, str);
-      if (id == last_sym->id) {
-        last_sym->bits += bits;
-        last_sym->samples++;
-        return;
-      }
-    }
-  }
   sym.context = accounting->context;
-  sym.samples = 1;
+  sym.value = value;
+  sym.coding_mode = coding_mode;
   sym.bits = bits;
-  sym.id = aom_accounting_dictionary_lookup(accounting, str);
+  sym.id = aom_accounting_dictionary_lookup(accounting, &acct_info);
   assert(sym.id <= 255);
   if (accounting->syms.num_syms == accounting->num_syms_allocated) {
     accounting->num_syms_allocated *= 2;
@@ -134,8 +141,21 @@
          accounting->syms.num_binary_syms);
   for (i = 0; i < accounting->syms.num_syms; i++) {
     sym = &accounting->syms.syms[i];
-    printf("%s x: %d, y: %d bits: %f samples: %d\n",
-           accounting->syms.dictionary.strs[sym->id], sym->context.x,
-           sym->context.y, (float)sym->bits / 8.0, sym->samples);
+    printf("%s x: %d, y: %d, tree: %d, bits: %f value: %d\n",
+           accounting->syms.dictionary.acct_infos[sym->id].c_func,
+           sym->context.x, sym->context.y, sym->context.tree_type,
+           (double)sym->bits / (double)(1 << AOM_ACCT_BITRES), 1);
   }
 }
+
+AccountingSymbolInfo aom_accounting_make_info(
+    const char *c_func, const char *c_file, int c_line, const char *tag0,
+    const char *tag1, const char *tag2, const char *tag3) {
+  AccountingSymbolInfo info = {
+    .c_func = c_func,
+    .c_file = c_file,
+    .c_line = c_line,
+    .tags = { tag0, tag1, tag2, tag3 },
+  };
+  return info;
+}
diff --git a/av1/decoder/accounting.h b/av1/decoder/accounting.h
index e4d3037..26cc0d9 100644
--- a/av1/decoder/accounting.h
+++ b/av1/decoder/accounting.h
@@ -13,6 +13,7 @@
 #define AOM_AV1_DECODER_ACCOUNTING_H_
 #include <stdlib.h>
 #include "aom/aomdx.h"
+#include "av1/common/enums.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -25,25 +26,66 @@
 #define MAX_SYMBOL_TYPES (256)
 
 /*The resolution of fractional-precision bit usage measurements, i.e.,
-   3 => 1/8th bits.*/
-#define AOM_ACCT_BITRES (3)
+   16 => 1/65536th bits.*/
+#define AOM_ACCT_BITRES (16)
+
+#define AOM_ACCOUNTING_MAX_TAGS (4)
+
+enum {
+  SYMBOL_BIT = 0,
+  SYMBOL_BIT_BYPASS = 1,
+  SYMBOL_LITERAL_BYPASS = 2,
+  SYMBOL_UNARY = 3,
+  SYMBOL_CDF = 4,
+} UENUM1BYTE(SYMBOL_CODING_MODE);
 
 typedef struct {
   int16_t x;
   int16_t y;
+  TREE_TYPE tree_type;
 } AccountingSymbolContext;
 
 typedef struct {
   AccountingSymbolContext context;
   uint32_t id;
-  /** Number of bits in units of 1/8 bit. */
-  uint32_t bits;
-  uint32_t samples;
+  /** Number of bits in units of 1/65536 bit. */
+  uint64_t bits;
+  int value;
+  SYMBOL_CODING_MODE coding_mode;
+  int coding_type;
 } AccountingSymbol;
 
+typedef struct {
+  const char *c_func;
+  const char *c_file;
+  int c_line;
+  const char *tags[AOM_ACCOUNTING_MAX_TAGS];
+} AccountingSymbolInfo;
+
+AccountingSymbolInfo aom_accounting_make_info(
+    const char *c_func, const char *c_file, int c_line, const char *tag0,
+    const char *tag1, const char *tag2, const char *tag3);
+
+#define ACCT_INFO0() \
+  aom_accounting_make_info(__func__, __FILE__, __LINE__, NULL, NULL, NULL, NULL)
+#define ACCT_INFO1(tag0) \
+  aom_accounting_make_info(__func__, __FILE__, __LINE__, tag0, NULL, NULL, NULL)
+#define ACCT_INFO2(tag0, tag1) \
+  aom_accounting_make_info(__func__, __FILE__, __LINE__, tag0, tag1, NULL, NULL)
+#define ACCT_INFO3(tag0, tag1, tag2) \
+  aom_accounting_make_info(__func__, __FILE__, __LINE__, tag0, tag1, tag2, NULL)
+#define ACCT_INFO4(tag0, tag1, tag2, tag3) \
+  aom_accounting_make_info(__func__, __FILE__, __LINE__, tag0, tag1, tag2, tag3)
+
+#define GET_ACCT_INFO_MACRO(_0, _1, _2, _3, _4, NAME, ...) NAME
+#define ACCT_INFO(...)                                                       \
+  GET_ACCT_INFO_MACRO(_0 __VA_OPT__(, ) __VA_ARGS__, ACCT_INFO4, ACCT_INFO3, \
+                      ACCT_INFO2, ACCT_INFO1, ACCT_INFO0)                    \
+  (__VA_ARGS__)
+
 /** Dictionary for translating strings into id. */
 typedef struct {
-  char *strs[MAX_SYMBOL_TYPES];
+  AccountingSymbolInfo acct_infos[MAX_SYMBOL_TYPES];
   int num_strs;
 } AccountingDictionary;
 
@@ -70,16 +112,20 @@
   int num_syms_allocated;
   int16_t hash_dictionary[AOM_ACCOUNTING_HASH_SIZE];
   AccountingSymbolContext context;
-  uint32_t last_tell_frac;
+  uint64_t last_tell_frac;
 };
 
 void aom_accounting_init(Accounting *accounting);
 void aom_accounting_reset(Accounting *accounting);
 void aom_accounting_clear(Accounting *accounting);
-void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y);
-int aom_accounting_dictionary_lookup(Accounting *accounting, const char *str);
-void aom_accounting_record(Accounting *accounting, const char *str,
-                           uint32_t bits);
+
+void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y,
+                                TREE_TYPE tree_type);
+int aom_accounting_dictionary_lookup(Accounting *accounting,
+                                     AccountingSymbolInfo *acct_info);
+void aom_accounting_record(Accounting *accounting, int value,
+                           SYMBOL_CODING_MODE coding_mode,
+                           AccountingSymbolInfo acct_info, uint64_t bits);
 void aom_accounting_dump(Accounting *accounting);
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c
index e59c8f5..dd52456 100644
--- a/av1/decoder/decodeframe.c
+++ b/av1/decoder/decodeframe.c
@@ -72,8 +72,6 @@
 #include "av1/decoder/decodetxb.h"
 #include "av1/decoder/detokenize.h"
 
-#define ACCT_STR __func__
-
 #define AOM_MIN_THREADS_PER_TILE 1
 #define AOM_MAX_THREADS_PER_TILE 2
 
@@ -128,7 +126,11 @@
 }
 
 static AOM_INLINE void loop_restoration_read_sb_coeffs(
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane,
+#else
     const AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane,
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
     int runit_idx);
 
 static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) {
@@ -247,6 +249,37 @@
   PLANE_TYPE plane_type = get_plane_type(plane);
 
   av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size);
+#if CONFIG_INSPECTION
+  {
+    const int txwpx = tx_size_wide[tx_size];
+    const int txhpx = tx_size_high[tx_size];
+
+    struct macroblockd_plane *const pd = &xd->plane[plane];
+    const int dst_stride = pd->dst.stride;
+    uint16_t *dst = &pd->dst.buf[(row * dst_stride + col) << MI_SIZE_LOG2];
+    for (int i = 0; i < txhpx; i++) {
+      for (int j = 0; j < txwpx; j++) {
+        uint16_t pixel = dst[i * dst_stride + j];
+        int stride = cm->predicted_pixels.strides[plane > 0];
+        int pixel_c, pixel_r;
+
+        if (plane) {
+          mi_to_pixel_loc(&pixel_c, &pixel_r,
+                          mbmi->chroma_ref_info.mi_col_chroma_base,
+                          mbmi->chroma_ref_info.mi_row_chroma_base, col, row,
+                          pd->subsampling_x, pd->subsampling_y);
+        } else {
+          mi_to_pixel_loc(&pixel_c, &pixel_r, xd->mi_col, xd->mi_row, col, row,
+                          pd->subsampling_x, pd->subsampling_y);
+        }
+
+        pixel_c += j;
+        pixel_r += i;
+        cm->predicted_pixels.buffers[plane][pixel_r * stride + pixel_c] = pixel;
+      }
+    }
+  }
+#endif  // CONFIG_INSPECTION
 
 #if CONFIG_MISMATCH_DEBUG
   const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2);
@@ -267,8 +300,12 @@
                       pd->subsampling_x, pd->subsampling_y);
     }
     mismatch_check_block_pre(pd->dst.buf, pd->dst.stride,
-                             cm->current_frame.order_hint, plane, pixel_c,
-                             pixel_r, blk_w, blk_h);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             cm->current_frame.display_order_hint,
+#else
+                             cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             plane, pixel_c, pixel_r, blk_w, blk_h);
   }
 #endif  // CONFIG_MISMATCH_DEBUG
 
@@ -317,7 +354,12 @@
       mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row,
                       pd->subsampling_x, pd->subsampling_y);
     }
-    mismatch_check_block_tx(dst, pd->dst.stride, cm->current_frame.order_hint,
+    mismatch_check_block_tx(dst, pd->dst.stride,
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                            cm->current_frame.display_order_hint,
+#else
+                            cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
                             plane, pixel_c, pixel_r, blk_w, blk_h);
   }
 #endif  // CONFIG_MISMATCH_DEBUG
@@ -325,12 +367,7 @@
   if (plane == AOM_PLANE_Y && store_cfl_required(cm, xd) &&
       xd->tree_type == SHARED_PART) {
 #if CONFIG_ADAPTIVE_DS_FILTER
-    cfl_store_tx(xd, row, col, tx_size,
-#if DS_FRAME_LEVEL
-                 cm->features.ds_filter_type);
-#else
-                 cm->seq_params.enable_cfl_ds_filter);
-#endif  // DS_FRAME_LEVEL
+    cfl_store_tx(xd, row, col, tx_size, cm->seq_params.enable_cfl_ds_filter);
 #else
     cfl_store_tx(xd, row, col, tx_size);
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
@@ -394,7 +431,12 @@
     mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row,
                     pd->subsampling_x, pd->subsampling_y);
   }
-  mismatch_check_block_tx(dst, pd->dst.stride, cm->current_frame.order_hint,
+  mismatch_check_block_tx(dst, pd->dst.stride,
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                          cm->current_frame.display_order_hint,
+#else
+                          cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
                           plane, pixel_c, pixel_r, blk_w, blk_h);
 #endif  // CONFIG_MISMATCH_DEBUG
 }
@@ -561,8 +603,8 @@
   }
 
   CHROMA_REF_INFO *chroma_ref_info = &xd->mi[0]->chroma_ref_info;
-  set_chroma_ref_info(mi_row, mi_col, index, bsize, chroma_ref_info,
-                      parent ? &parent->chroma_ref_info : NULL,
+  set_chroma_ref_info(xd->tree_type, mi_row, mi_col, index, bsize,
+                      chroma_ref_info, parent ? &parent->chroma_ref_info : NULL,
                       parent ? parent->bsize : BLOCK_INVALID,
                       parent ? parent->partition : PARTITION_NONE,
                       xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
@@ -578,12 +620,14 @@
                        num_planes, chroma_ref_info);
 }
 
+#if !CONFIG_REFINEMV
 typedef struct PadBlock {
   int x0;
   int x1;
   int y0;
   int y1;
 } PadBlock;
+#endif  //! CONFIG_REFINEMV
 
 static AOM_INLINE void highbd_build_mc_border(const uint16_t *src,
                                               int src_stride, uint16_t *dst,
@@ -622,10 +666,12 @@
   } while (--b_h);
 }
 
-static INLINE int update_extend_mc_border_params(
-    const struct scale_factors *const sf, struct buf_2d *const pre_buf,
-    MV32 scaled_mv, PadBlock *block, int subpel_x_mv, int subpel_y_mv,
-    int do_warp, int is_intrabc, int *x_pad, int *y_pad) {
+#if !CONFIG_REFINEMV
+int update_extend_mc_border_params(const struct scale_factors *const sf,
+                                   struct buf_2d *const pre_buf, MV32 scaled_mv,
+                                   PadBlock *block, int subpel_x_mv,
+                                   int subpel_y_mv, int do_warp, int is_intrabc,
+                                   int *x_pad, int *y_pad) {
   // Get reference width and height.
   int frame_width = pre_buf->width;
   int frame_height = pre_buf->height;
@@ -662,6 +708,7 @@
   }
   return 0;
 }
+#endif  //! CONFIG_REFINEMV
 
 static INLINE void extend_mc_border(const struct scale_factors *const sf,
                                     struct buf_2d *const pre_buf,
@@ -673,7 +720,13 @@
   int x_pad = 0, y_pad = 0;
   if (update_extend_mc_border_params(sf, pre_buf, scaled_mv, &block,
                                      subpel_x_mv, subpel_y_mv, do_warp,
-                                     is_intrabc, &x_pad, &y_pad)) {
+                                     is_intrabc, &x_pad, &y_pad
+#if CONFIG_REFINEMV
+                                     ,
+                                     NULL
+#endif  // CONFIG_REFINEMV
+
+                                     )) {
     // Get reference block pointer.
     const uint16_t *const buf_ptr =
         pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0;
@@ -690,7 +743,7 @@
            x_pad * (AOM_INTERP_EXTEND - 1);
   }
 }
-
+#if !CONFIG_REFINEMV
 static void dec_calc_subpel_params(
     const MV *const src_mv, InterPredParams *const inter_pred_params,
     const MACROBLOCKD *const xd, int mi_x, int mi_y, uint16_t **pre,
@@ -810,8 +863,17 @@
   }
   *pre = pre_buf->buf0 + block->y0 * pre_buf->stride + block->x0;
   *src_stride = pre_buf->stride;
-}
 
+#if CONFIG_D071_IMP_MSK_BLD
+  if (inter_pred_params->border_data.enable_bacp) {
+    subpel_params->x0 = block->x0;
+    subpel_params->x1 = block->x1;
+    subpel_params->y0 = block->y0;
+    subpel_params->y1 = block->y1;
+  }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+}
+#endif  //! CONFIG_REFINEMV
 static void dec_calc_subpel_params_and_extend(
     const MV *const src_mv, InterPredParams *const inter_pred_params,
     MACROBLOCKD *const xd, int mi_x, int mi_y, int ref,
@@ -820,6 +882,19 @@
 #endif  // CONFIG_OPTFLOW_REFINEMENT
     uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params,
     int *src_stride) {
+
+#if CONFIG_REFINEMV
+  if (inter_pred_params->use_ref_padding) {
+    common_calc_subpel_params_and_extend(
+        src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+        use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+        mc_buf, pre, subpel_params, src_stride);
+    return;
+  }
+#endif
+
   PadBlock block;
   MV32 scaled_mv;
   int subpel_x_mv, subpel_y_mv;
@@ -837,6 +912,7 @@
 }
 
 #if CONFIG_TIP
+#if !CONFIG_REFINEMV
 static AOM_INLINE void tip_dec_calc_subpel_params(
     const MV *const src_mv, InterPredParams *const inter_pred_params, int mi_x,
     int mi_y, uint16_t **pre, SubpelParams *subpel_params, int *src_stride,
@@ -847,6 +923,11 @@
     MV32 *scaled_mv, int *subpel_x_mv, int *subpel_y_mv) {
   const struct scale_factors *sf = inter_pred_params->scale_factors;
   struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf;
+
+#if CONFIG_REFINEMV
+  const int bw = inter_pred_params->original_pu_width;
+  const int bh = inter_pred_params->original_pu_height;
+#else
 #if CONFIG_OPTFLOW_REFINEMENT
   // Use original block size to clamp MV and to extend block boundary
   const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width
@@ -857,6 +938,8 @@
   const int bw = inter_pred_params->block_width;
   const int bh = inter_pred_params->block_height;
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#endif  // CONFIG_REFINEMV
+
   const int is_scaled = av1_is_scaled(sf);
   if (is_scaled) {
     const int ssx = inter_pred_params->subsampling_x;
@@ -900,10 +983,21 @@
     block->y0 = pos_y >> SCALE_SUBPEL_BITS;
 
     // Get reference block bottom right coordinate.
+#if CONFIG_D071_IMP_MSK_BLD
+    block->x1 =
+        ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+         SCALE_SUBPEL_BITS) +
+        1;
+    block->y1 =
+        ((pos_y + (inter_pred_params->block_height - 1) * subpel_params->ys) >>
+         SCALE_SUBPEL_BITS) +
+        1;
+#else
     block->x1 =
         ((pos_x + (bw - 1) * subpel_params->xs) >> SCALE_SUBPEL_BITS) + 1;
     block->y1 =
         ((pos_y + (bh - 1) * subpel_params->ys) >> SCALE_SUBPEL_BITS) + 1;
+#endif  // CONFIG_D071_IMP_MSK_BLD
 
     MV temp_mv;
     temp_mv = tip_clamp_mv_to_umv_border_sb(inter_pred_params, src_mv, bw, bh,
@@ -942,8 +1036,13 @@
     block->y0 = pos_y;
 
     // Get reference block bottom right coordinate.
+#if CONFIG_D071_IMP_MSK_BLD
+    block->x1 = pos_x + inter_pred_params->block_width;
+    block->y1 = pos_y + inter_pred_params->block_height;
+#else
     block->x1 = pos_x + bw;
     block->y1 = pos_y + bh;
+#endif  // CONFIG_D071_IMP_MSK_BLD
 
     scaled_mv->row = mv_q4.row;
     scaled_mv->col = mv_q4.col;
@@ -952,8 +1051,17 @@
   }
   *pre = pre_buf->buf0 + block->y0 * pre_buf->stride + block->x0;
   *src_stride = pre_buf->stride;
-}
 
+#if CONFIG_D071_IMP_MSK_BLD
+  if (inter_pred_params->border_data.enable_bacp) {
+    subpel_params->x0 = block->x0;
+    subpel_params->x1 = block->x1;
+    subpel_params->y0 = block->y0;
+    subpel_params->y1 = block->y1;
+  }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+}
+#endif
 static void tip_dec_calc_subpel_params_and_extend(
     const MV *const src_mv, InterPredParams *const inter_pred_params,
     MACROBLOCKD *const xd, int mi_x, int mi_y, int ref,
@@ -962,7 +1070,22 @@
 #endif  // CONFIG_OPTFLOW_REFINEMENT
     uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params,
     int *src_stride) {
+
+#if CONFIG_REFINEMV
+  if (inter_pred_params->use_ref_padding) {
+    // printf(" used pading in the decoder \n");
+    tip_common_calc_subpel_params_and_extend(
+        src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+        use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+        mc_buf, pre, subpel_params, src_stride);
+    return;
+  }
+#else
+
   (void)xd;
+#endif  // CONFIG_REFINEMV
   PadBlock block;
   MV32 scaled_mv;
   int subpel_x_mv, subpel_y_mv;
@@ -1083,7 +1206,7 @@
   MACROBLOCKD *const xd = &dcb->xd;
 
 #if CONFIG_ACCOUNTING
-  aom_accounting_set_context(&pbi->accounting, mi_col, mi_row);
+  aom_accounting_set_context(&pbi->accounting, mi_col, mi_row, xd->tree_type);
 #endif
   set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, parent,
               index);
@@ -1158,11 +1281,19 @@
 static void dec_build_inter_predictors(const AV1_COMMON *cm,
                                        DecoderCodingBlock *dcb, int plane,
                                        MB_MODE_INFO *mi, int build_for_obmc,
-                                       int bw, int bh, int mi_x, int mi_y) {
+                                       int bw, int bh, int mi_x, int mi_y
+#if CONFIG_REFINEMV
+                                       ,
+                                       int build_for_refine_mv_only
+#endif  // CONFIG_REFINEMV
+) {
   av1_build_inter_predictors(cm, &dcb->xd, plane, mi,
 #if CONFIG_BAWP
                              NULL,
 #endif
+#if CONFIG_REFINEMV
+                             build_for_refine_mv_only,
+#endif  // CONFIG_REFINEMV
                              build_for_obmc, bw, bh, mi_x, mi_y, dcb->mc_buf,
                              dec_calc_subpel_params_and_extend);
 }
@@ -1173,13 +1304,35 @@
                                                  BLOCK_SIZE bsize) {
   MACROBLOCKD *const xd = &dcb->xd;
   const int num_planes = av1_num_planes(cm);
+
+#if CONFIG_REFINEMV
+  MB_MODE_INFO *mbmi = xd->mi[0];
+  int need_subblock_mvs = xd->is_chroma_ref && mbmi->refinemv_flag &&
+                          !is_intrabc_block(mbmi, xd->tree_type);
+  assert(IMPLIES(need_subblock_mvs, !is_interintra_pred(mbmi)));
+  if (need_subblock_mvs && default_refinemv_modes(mbmi))
+    need_subblock_mvs &= (mbmi->comp_group_idx == 0 &&
+                          mbmi->interinter_comp.type == COMPOUND_AVERAGE);
+  if (need_subblock_mvs) {
+    fill_subblock_refine_mv(xd->refinemv_subinfo, xd->plane[0].width,
+                            xd->plane[0].height, mbmi->mv[0].as_mv,
+                            mbmi->mv[1].as_mv);
+  }
+#endif  // CONFIG_REFINEMV
+
   for (int plane = 0; plane < num_planes; ++plane) {
     if (plane && !xd->is_chroma_ref) break;
     const int mi_x = mi_col * MI_SIZE;
     const int mi_y = mi_row * MI_SIZE;
     dec_build_inter_predictors(cm, dcb, plane, xd->mi[0], 0,
                                xd->plane[plane].width, xd->plane[plane].height,
-                               mi_x, mi_y);
+                               mi_x, mi_y
+#if CONFIG_REFINEMV
+                               ,
+                               0
+#endif  // CONFIG_REFINEMV
+    );
+
     if (is_interintra_pred(xd->mi[0])) {
       BUFFER_SET ctx = { { xd->plane[0].dst.buf, xd->plane[1].dst.buf,
                            xd->plane[2].dst.buf },
@@ -1217,7 +1370,12 @@
 
     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
     dec_build_inter_predictors(ctxt->cm, (DecoderCodingBlock *)ctxt->dcb, j,
-                               &backup_mbmi, 1, bw, bh, mi_x, mi_y);
+                               &backup_mbmi, 1, bw, bh, mi_x, mi_y
+#if CONFIG_REFINEMV
+                               ,
+                               0
+#endif  // CONFIG_REFINEMV
+    );
   }
 }
 
@@ -1238,9 +1396,9 @@
     cm, tmp_buf, tmp_width, tmp_height, tmp_stride, xd->mb_to_right_edge, dcb
   };
   const BLOCK_SIZE bsize = xd->mi[0]->sb_type[PLANE_TYPE_Y];
-  foreach_overlappable_nb_above(cm, xd,
-                                max_neighbor_obmc[mi_size_wide_log2[bsize]],
-                                dec_build_prediction_by_above_pred, &ctxt);
+  foreach_overlappable_nb_above(
+      cm, xd, max_neighbor_obmc[mi_size_wide_log2[bsize]],
+      dec_build_prediction_by_above_pred, &ctxt, false);
 
   xd->mb_to_left_edge = -GET_MV_SUBPEL(xd->mi_col * MI_SIZE);
   xd->mb_to_right_edge = ctxt.mb_to_far_edge;
@@ -1272,7 +1430,12 @@
 
     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
     dec_build_inter_predictors(ctxt->cm, (DecoderCodingBlock *)ctxt->dcb, j,
-                               &backup_mbmi, 1, bw, bh, mi_x, mi_y);
+                               &backup_mbmi, 1, bw, bh, mi_x, mi_y
+#if CONFIG_REFINEMV
+                               ,
+                               0
+#endif  // CONFIG_REFINEMV
+    );
   }
 }
 
@@ -1335,12 +1498,7 @@
   if (store_cfl_required(cm, xd) && xd->tree_type == SHARED_PART) {
 #if CONFIG_ADAPTIVE_DS_FILTER
     cfl_store_block(xd, mbmi->sb_type[PLANE_TYPE_Y], mbmi->tx_size,
-#if DS_FRAME_LEVEL
-                    cm->features.ds_filter_type
-#else
-                    cm->seq_params.enable_cfl_ds_filter
-#endif
-    );
+                    cm->seq_params.enable_cfl_ds_filter);
 #else
     cfl_store_block(xd, mbmi->sb_type[PLANE_TYPE_Y], mbmi->tx_size);
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
@@ -1394,10 +1552,44 @@
                       pd->subsampling_x, pd->subsampling_y);
     }
     mismatch_check_block_pre(pd->dst.buf, pd->dst.stride,
-                             cm->current_frame.order_hint, plane, pixel_c,
-                             pixel_r, pd->width, pd->height);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             cm->current_frame.display_order_hint,
+#else
+                             cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             plane, pixel_c, pixel_r, pd->width, pd->height);
   }
 #endif  // CONFIG_MISMATCH_DEBUG
+
+#if CONFIG_INSPECTION
+  for (int plane = 0; plane < num_planes; plane++) {
+    struct macroblockd_plane *const pd = &xd->plane[plane];
+    const int dst_stride = pd->dst.stride;
+    const int plane_block_size =
+        get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
+    const int plane_width = mi_size_wide[plane_block_size];
+    const int plane_height = mi_size_high[plane_block_size];
+    for (int i = 0; i < plane_height * MI_SIZE; i++) {
+      for (int j = 0; j < plane_width * MI_SIZE; j++) {
+        uint16_t pixel = pd->dst.buf[i * dst_stride + j];
+        int stride = cm->predicted_pixels.strides[plane > 0];
+        int pixel_c, pixel_r;
+        if (plane) {
+          mi_to_pixel_loc(&pixel_c, &pixel_r,
+                          mbmi->chroma_ref_info.mi_col_chroma_base,
+                          mbmi->chroma_ref_info.mi_row_chroma_base, 0, 0,
+                          pd->subsampling_x, pd->subsampling_y);
+        } else {
+          mi_to_pixel_loc(&pixel_c, &pixel_r, xd->mi_col, xd->mi_row, 0, 0,
+                          pd->subsampling_x, pd->subsampling_y);
+        }
+        pixel_c += j;
+        pixel_r += i;
+        cm->predicted_pixels.buffers[plane][pixel_r * stride + pixel_c] = pixel;
+      }
+    }
+  }
+#endif  // CONFIG_INSPECTION
 }
 
 static AOM_INLINE void set_color_index_map_offset(MACROBLOCKD *const xd,
@@ -1672,7 +1864,7 @@
         is_inter ? ec_ctx->inter_4way_txfm_partition_cdf[is_rect][split4_ctx]
                  : ec_ctx->intra_4way_txfm_partition_cdf[is_rect][split4_ctx];
     const TX_PARTITION_TYPE split4_partition =
-        aom_read_symbol(r, split4_cdf, 4, ACCT_STR);
+        aom_read_symbol(r, split4_cdf, 4, ACCT_INFO("split4_partition"));
     partition = split4_partition;
     /*
     If only one split type (horizontal or vertical) is allowed for this block,
@@ -1684,7 +1876,8 @@
     // Read bit to indicate if there is any split at all
     aom_cdf_prob *split2_cdf = is_inter ? ec_ctx->inter_2way_txfm_partition_cdf
                                         : ec_ctx->intra_2way_txfm_partition_cdf;
-    const int has_first_split = aom_read_symbol(r, split2_cdf, 2, ACCT_STR);
+    const int has_first_split =
+        aom_read_symbol(r, split2_cdf, 2, ACCT_INFO("has_first_split"));
     partition = has_first_split
                     ? (allow_horz ? TX_PARTITION_HORZ : TX_PARTITION_VERT)
                     : TX_PARTITION_NONE;
@@ -1751,7 +1944,8 @@
   const int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
                                          xd->left_txfm_context + blk_row,
                                          mbmi->sb_type[plane_type], tx_size);
-  is_split = aom_read_symbol(r, ec_ctx->txfm_partition_cdf[ctx], 2, ACCT_STR);
+  is_split = aom_read_symbol(r, ec_ctx->txfm_partition_cdf[ctx], 2,
+                             ACCT_INFO("is_split"));
 
   if (is_split) {
     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
@@ -1817,7 +2011,7 @@
   const int ctx = get_tx_size_context(xd);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   const int depth = aom_read_symbol(r, ec_ctx->tx_size_cdf[tx_size_cat][ctx],
-                                    max_depths + 1, ACCT_STR);
+                                    max_depths + 1, ACCT_INFO("depth"));
   assert(depth >= 0 && depth <= max_depths);
   const TX_SIZE tx_size = depth_to_tx_size(depth, bsize);
   return tx_size;
@@ -2023,8 +2217,8 @@
 #endif  // CONFIG_CROSS_CHROMA_TX
 
   CHROMA_REF_INFO *chroma_ref_info = &xd->mi[0]->chroma_ref_info;
-  set_chroma_ref_info(mi_row, mi_col, index, bsize, chroma_ref_info,
-                      parent ? &parent->chroma_ref_info : NULL,
+  set_chroma_ref_info(xd->tree_type, mi_row, mi_col, index, bsize,
+                      chroma_ref_info, parent ? &parent->chroma_ref_info : NULL,
                       parent ? parent->bsize : BLOCK_INVALID,
                       parent ? parent->partition : PARTITION_NONE,
                       xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
@@ -2049,6 +2243,46 @@
 }
 
 #if CONFIG_EXT_RECUR_PARTITIONS
+#if CONFIG_UNEVEN_4WAY
+/*!\brief Maps (ext_part, 4way, 4way_type, rect_type) to partition_type. */
+static PARTITION_TYPE
+    rect_part_table[2][2][NUM_UNEVEN_4WAY_PARTS][NUM_RECT_PARTS] = {
+      {
+          // !do_ext_partition
+          {
+              // !do_4way
+              { // UNEVEN_4A
+                PARTITION_HORZ, PARTITION_VERT },
+              { // UNEVEN_4B
+                PARTITION_HORZ, PARTITION_VERT },
+          },
+          {
+              // do_4way
+              { // UNEVEN_4A
+                PARTITION_HORZ, PARTITION_VERT },
+              { // UNEVEN_4B
+                PARTITION_HORZ, PARTITION_VERT },
+          },
+      },
+      {
+          // do_ext_partition
+          {
+              // !do_4way
+              { // UNEVEN_4A
+                PARTITION_HORZ_3, PARTITION_VERT_3 },
+              { // UNEVEN_4B
+                PARTITION_HORZ_3, PARTITION_VERT_3 },
+          },
+          {
+              // do_4way
+              { // UNEVEN_4A
+                PARTITION_HORZ_4A, PARTITION_VERT_4A },
+              { // UNEVEN_4B
+                PARTITION_HORZ_4B, PARTITION_VERT_4B },
+          },
+      },
+    };
+#else
 /*!\brief Maps (ext_part, rect_type) to partition_type. */
 static PARTITION_TYPE rect_part_table[2][NUM_RECT_PARTS] = {
   // !do_ext_partition
@@ -2056,6 +2290,7 @@
   // do_ext_partition
   { PARTITION_HORZ_3, PARTITION_VERT_3 },
 };
+#endif  // CONFIG_UNEVEN_4WAY
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
 static PARTITION_TYPE read_partition(const AV1_COMMON *const cm,
@@ -2066,11 +2301,8 @@
                                      const PARTITION_TREE *ptree_luma,
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
                                      BLOCK_SIZE bsize) {
-#if CONFIG_EXT_RECUR_PARTITIONS
-  if (!is_partition_point(bsize)) return PARTITION_NONE;
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
-
   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
+  assert(ctx >= 0);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
 
 #if CONFIG_EXT_RECUR_PARTITIONS
@@ -2079,28 +2311,24 @@
   const int plane = xd->tree_type == CHROMA_PART;
   const int ssx = cm->seq_params.subsampling_x;
   const int ssy = cm->seq_params.subsampling_y;
-  if (plane == 1 && bsize == BLOCK_8X8) {
-    return PARTITION_NONE;
-  }
-  if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) {
-    return sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ssx, ssy);
+  const PARTITION_TYPE derived_partition =
+      av1_get_normative_forced_partition_type(
+          &cm->mi_params, xd->tree_type, ssx, ssy, mi_row, mi_col, bsize,
+          ptree_luma, &ptree->chroma_ref_info);
+  if (derived_partition != PARTITION_INVALID) {
+    return derived_partition;
   }
 
-  PARTITION_TYPE implied_partition;
-  const bool is_part_implied = is_partition_implied_at_boundary(
-      &cm->mi_params, xd->tree_type, ssx, ssy, mi_row, mi_col, bsize,
-      &ptree->chroma_ref_info, &implied_partition);
-  if (is_part_implied) return implied_partition;
-
-  const bool do_split =
-      aom_read_symbol(r, ec_ctx->do_split_cdf[plane][ctx], 2, ACCT_STR);
+  const bool do_split = aom_read_symbol(r, ec_ctx->do_split_cdf[plane][ctx], 2,
+                                        ACCT_INFO("do_split"));
   if (!do_split) {
     return PARTITION_NONE;
   }
   const int square_split_ctx = square_split_context(xd, mi_row, mi_col, bsize);
   if (is_square_split_eligible(bsize, cm->sb_size)) {
-    const bool do_square_split = aom_read_symbol(
-        r, ec_ctx->do_square_split_cdf[plane][square_split_ctx], 2, ACCT_STR);
+    const bool do_square_split =
+        aom_read_symbol(r, ec_ctx->do_square_split_cdf[plane][square_split_ctx],
+                        2, ACCT_INFO("do_square_split"));
     if (do_square_split) {
       return PARTITION_SPLIT;
     }
@@ -2108,20 +2336,46 @@
 
   RECT_PART_TYPE rect_type = rect_type_implied_by_bsize(bsize, xd->tree_type);
   if (rect_type == RECT_INVALID) {
-    rect_type =
-        aom_read_symbol(r, ec_ctx->rect_type_cdf[plane][ctx], 2, ACCT_STR);
+    rect_type = aom_read_symbol(r, ec_ctx->rect_type_cdf[plane][ctx],
+                                NUM_RECT_PARTS, ACCT_INFO("rect_type"));
   }
 
-  const bool disable_ext_part = !cm->seq_params.enable_ext_partitions;
-  const bool ext_partition_allowed =
-      !disable_ext_part &&
-      is_ext_partition_allowed(bsize, rect_type, xd->tree_type);
   bool do_ext_partition = false;
+#if CONFIG_UNEVEN_4WAY
+  bool do_uneven_4way_partition = false;
+  UNEVEN_4WAY_PART_TYPE uneven_4way_partition_type = UNEVEN_4A;
+#endif  // CONFIG_UNEVEN_4WAY
+
+  const bool ext_partition_allowed =
+      cm->seq_params.enable_ext_partitions &&
+      is_ext_partition_allowed(bsize, rect_type, xd->tree_type);
   if (ext_partition_allowed) {
-    do_ext_partition = aom_read_symbol(
-        r, ec_ctx->do_ext_partition_cdf[plane][rect_type][ctx], 2, ACCT_STR);
+    do_ext_partition =
+        aom_read_symbol(r, ec_ctx->do_ext_partition_cdf[plane][rect_type][ctx],
+                        2, ACCT_INFO("do_ext_partition"));
+#if CONFIG_UNEVEN_4WAY
+    if (do_ext_partition) {
+      const bool uneven_4way_partition_allowed =
+          is_uneven_4way_partition_allowed(bsize, rect_type, xd->tree_type);
+      if (uneven_4way_partition_allowed) {
+        do_uneven_4way_partition = aom_read_symbol(
+            r, ec_ctx->do_uneven_4way_partition_cdf[plane][rect_type][ctx], 2,
+            ACCT_INFO("do_uneven_4way_partition"));
+        if (do_uneven_4way_partition) {
+          uneven_4way_partition_type = aom_read_symbol(
+              r, ec_ctx->uneven_4way_partition_type_cdf[plane][rect_type][ctx],
+              NUM_UNEVEN_4WAY_PARTS, ACCT_INFO("uneven_4way_partition_type"));
+        }
+      }
+    }
+#endif  // CONFIG_UNEVEN_4WAY
   }
+#if CONFIG_UNEVEN_4WAY
+  return rect_part_table[do_ext_partition][do_uneven_4way_partition]
+                        [uneven_4way_partition_type][rect_type];
+#else
   return rect_part_table[do_ext_partition][rect_type];
+#endif  // CONFIG_UNEVEN_4WAY
 #else   // !CONFIG_EXT_RECUR_PARTITIONS
   if (!has_rows && !has_cols) return PARTITION_SPLIT;
 
@@ -2141,20 +2395,22 @@
   aom_cdf_prob *partition_cdf = ec_ctx->partition_cdf[plane][ctx];
   if (has_rows && has_cols) {
     return (PARTITION_TYPE)aom_read_symbol(
-        r, partition_cdf, partition_cdf_length(bsize), ACCT_STR);
+        r, partition_cdf, partition_cdf_length(bsize), ACCT_INFO());
   } else if (!has_rows && has_cols) {
     assert(bsize > BLOCK_8X8);
     aom_cdf_prob cdf[2];
     partition_gather_vert_alike(cdf, partition_cdf, bsize);
     assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP));
-    return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_HORZ;
+    return aom_read_cdf(r, cdf, 2, ACCT_INFO()) ? PARTITION_SPLIT
+                                                : PARTITION_HORZ;
   } else {
     assert(has_rows && !has_cols);
     assert(bsize > BLOCK_8X8);
     aom_cdf_prob cdf[2];
     partition_gather_horz_alike(cdf, partition_cdf, bsize);
     assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP));
-    return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_VERT;
+    return aom_read_cdf(r, cdf, 2, ACCT_INFO()) ? PARTITION_SPLIT
+                                                : PARTITION_VERT;
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 }
@@ -2185,12 +2441,19 @@
   MACROBLOCKD *const xd = &dcb->xd;
   const int ss_x = xd->plane[1].subsampling_x;
   const int ss_y = xd->plane[1].subsampling_y;
+  // Half block width/height.
   const int hbs_w = mi_size_wide[bsize] / 2;
   const int hbs_h = mi_size_high[bsize] / 2;
-#if !CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+  // One-eighth block width/height.
+  const int ebs_w = mi_size_wide[bsize] / 8;
+  const int ebs_h = mi_size_high[bsize] / 8;
+#endif  // CONFIG_UNEVEN_4WAY
+#if !CONFIG_EXT_RECUR_PARTITIONS
+  // Quarter block width/height.
   const int qbs_w = mi_size_wide[bsize] / 4;
   const int qbs_h = mi_size_high[bsize] / 4;
-#endif  // !CONFIG_H_PARTITION
+#endif  // !CONFIG_EXT_RECUR_PARTITIONS
   PARTITION_TYPE partition;
   const int has_rows = (mi_row + hbs_h) < cm->mi_params.mi_rows;
   const int has_cols = (mi_col + hbs_w) < cm->mi_params.mi_cols;
@@ -2220,7 +2483,11 @@
         get_partition_plane_end(xd->tree_type, av1_num_planes(cm));
     for (int plane = plane_start; plane < plane_end; ++plane) {
       int rcol0, rcol1, rrow0, rrow1;
-      if (cm->rst_info[plane].frame_restoration_type != RESTORE_NONE &&
+      if ((cm->rst_info[plane].frame_restoration_type != RESTORE_NONE
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+           || cm->rst_info[plane].frame_cross_restoration_type != RESTORE_NONE
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+           ) &&
           av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
                                              &rcol0, &rcol1, &rrow0, &rrow1)) {
         const int rstride = cm->rst_info[plane].horz_units_per_tile;
@@ -2239,8 +2506,8 @@
     ptree->is_settled = 1;
     PARTITION_TREE *parent = ptree->parent;
     set_chroma_ref_info(
-        mi_row, mi_col, ptree->index, bsize, &ptree->chroma_ref_info,
-        parent ? &parent->chroma_ref_info : NULL,
+        xd->tree_type, mi_row, mi_col, ptree->index, bsize,
+        &ptree->chroma_ref_info, parent ? &parent->chroma_ref_info : NULL,
         parent ? parent->bsize : BLOCK_INVALID,
         parent ? parent->partition : PARTITION_NONE, ss_x, ss_y);
 
@@ -2263,6 +2530,12 @@
     ptree->partition = partition;
 
     switch (partition) {
+#if CONFIG_UNEVEN_4WAY
+      case PARTITION_HORZ_4A:
+      case PARTITION_HORZ_4B:
+      case PARTITION_VERT_4A:
+      case PARTITION_VERT_4B:
+#endif  // CONFIG_UNEVEN_4WAY
       case PARTITION_SPLIT:
         ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0);
         ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1);
@@ -2280,9 +2553,7 @@
         ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0);
         ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1);
         ptree->sub_tree[2] = av1_alloc_ptree_node(ptree, 2);
-#if CONFIG_H_PARTITION
         ptree->sub_tree[3] = av1_alloc_ptree_node(ptree, 3);
-#endif  // CONFIG_H_PARTITION
         break;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
       default: break;
@@ -2310,7 +2581,8 @@
     const int index =
         (partition == PARTITION_HORZ || partition == PARTITION_VERT) +
         (partition == PARTITION_HORZ_3 || partition == PARTITION_VERT_3);
-    set_chroma_ref_info(mi_row, mi_col, index, bsize, &chroma_ref_info,
+    set_chroma_ref_info(xd->tree_type, mi_row, mi_col, index, bsize,
+                        &chroma_ref_info,
                         parent ? &parent->chroma_ref_info : NULL,
                         parent ? parent->bsize : BLOCK_INVALID,
                         parent ? parent->partition : PARTITION_NONE,
@@ -2391,7 +2663,81 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
       break;
 #if CONFIG_EXT_RECUR_PARTITIONS
-#if CONFIG_H_PARTITION
+
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      int this_mi_row = mi_row;
+      DEC_PARTITION(this_mi_row, mi_col, subsize, 0);
+      this_mi_row += ebs_h;
+      if (this_mi_row >= cm->mi_params.mi_rows) break;
+      DEC_PARTITION(this_mi_row, mi_col, bsize_med, 1);
+      this_mi_row += 2 * ebs_h;
+      if (this_mi_row >= cm->mi_params.mi_rows) break;
+      DEC_PARTITION(this_mi_row, mi_col, bsize_big, 2);
+      this_mi_row += 4 * ebs_h;
+      if (this_mi_row >= cm->mi_params.mi_rows) break;
+      DEC_PARTITION(this_mi_row, mi_col, subsize, 3);
+      break;
+    }
+    case PARTITION_HORZ_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      int this_mi_row = mi_row;
+      DEC_PARTITION(this_mi_row, mi_col, subsize, 0);
+      this_mi_row += ebs_h;
+      if (this_mi_row >= cm->mi_params.mi_rows) break;
+      DEC_PARTITION(this_mi_row, mi_col, bsize_big, 1);
+      this_mi_row += 4 * ebs_h;
+      if (this_mi_row >= cm->mi_params.mi_rows) break;
+      DEC_PARTITION(this_mi_row, mi_col, bsize_med, 2);
+      this_mi_row += 2 * ebs_h;
+      if (this_mi_row >= cm->mi_params.mi_rows) break;
+      DEC_PARTITION(this_mi_row, mi_col, subsize, 3);
+      break;
+    }
+    case PARTITION_VERT_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      int this_mi_col = mi_col;
+      DEC_PARTITION(mi_row, this_mi_col, subsize, 0);
+      this_mi_col += ebs_w;
+      if (this_mi_col >= cm->mi_params.mi_cols) break;
+      DEC_PARTITION(mi_row, this_mi_col, bsize_med, 1);
+      this_mi_col += 2 * ebs_w;
+      if (this_mi_col >= cm->mi_params.mi_cols) break;
+      DEC_PARTITION(mi_row, this_mi_col, bsize_big, 2);
+      this_mi_col += 4 * ebs_w;
+      if (this_mi_col >= cm->mi_params.mi_cols) break;
+      DEC_PARTITION(mi_row, this_mi_col, subsize, 3);
+      break;
+    }
+    case PARTITION_VERT_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      int this_mi_col = mi_col;
+      DEC_PARTITION(mi_row, this_mi_col, subsize, 0);
+      this_mi_col += ebs_w;
+      if (this_mi_col >= cm->mi_params.mi_cols) break;
+      DEC_PARTITION(mi_row, this_mi_col, bsize_big, 1);
+      this_mi_col += 4 * ebs_w;
+      if (this_mi_col >= cm->mi_params.mi_cols) break;
+      DEC_PARTITION(mi_row, this_mi_col, bsize_med, 2);
+      this_mi_col += 2 * ebs_w;
+      if (this_mi_col >= cm->mi_params.mi_cols) break;
+      DEC_PARTITION(mi_row, this_mi_col, subsize, 3);
+      break;
+    }
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_HORZ_3:
     case PARTITION_VERT_3: {
       for (int i = 0; i < 4; ++i) {
@@ -2414,39 +2760,13 @@
       }
       break;
     }
-#else
-    case PARTITION_HORZ_3: {
-      const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_HORZ);
-      int this_mi_row = mi_row;
-      DEC_PARTITION(this_mi_row, mi_col, subsize, 0);
-      this_mi_row += qbs_h;
-      if (this_mi_row >= cm->mi_params.mi_rows) break;
-      DEC_PARTITION(this_mi_row, mi_col, bsize3, 1);
-      this_mi_row += 2 * qbs_h;
-      if (this_mi_row >= cm->mi_params.mi_rows) break;
-      DEC_PARTITION(this_mi_row, mi_col, subsize, 2);
-      break;
-    }
-    case PARTITION_VERT_3: {
-      const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_VERT);
-      int this_mi_col = mi_col;
-      DEC_PARTITION(mi_row, this_mi_col, subsize, 0);
-      this_mi_col += qbs_w;
-      if (this_mi_col >= cm->mi_params.mi_cols) break;
-      DEC_PARTITION(mi_row, this_mi_col, bsize3, 1);
-      this_mi_col += 2 * qbs_w;
-      if (this_mi_col >= cm->mi_params.mi_cols) break;
-      DEC_PARTITION(mi_row, this_mi_col, subsize, 2);
-      break;
-    }
-#endif  // CONFIG_H_PARTITION
     case PARTITION_SPLIT:
       DEC_PARTITION(mi_row, mi_col, subsize, 0);
       DEC_PARTITION(mi_row, mi_col + hbs_w, subsize, 1);
       DEC_PARTITION(mi_row + hbs_h, mi_col, subsize, 2);
       DEC_PARTITION(mi_row + hbs_h, mi_col + hbs_w, subsize, 3);
       break;
-#else
+#else   // !CONFIG_EXT_RECUR_PARTITIONS
     case PARTITION_SPLIT:
       DEC_PARTITION(mi_row, mi_col, subsize, 0);
       DEC_PARTITION(mi_row, mi_col + hbs_w, subsize, 1);
@@ -2561,6 +2881,11 @@
                      parse_decode_flag);
     xd->tree_type = SHARED_PART;
   }
+#if CONFIG_INSPECTION
+  if (pbi->inspect_sb_cb != NULL) {
+    (*pbi->inspect_sb_cb)(pbi, pbi->inspect_ctx);
+  }
+#endif  // CONFIG_INSPECTION
 }
 
 static AOM_INLINE void setup_segmentation(AV1_COMMON *const cm,
@@ -2652,7 +2977,7 @@
 
 #if CONFIG_LR_FLEX_SYNTAX
 // Converts decoded index to frame restoration type depending on lr tools
-// thta are enabled for the frame for a given plane.
+// that are enabled for the frame for a given plane.
 static RestorationType index_to_frame_restoration_type(
     const AV1_COMMON *const cm, int plane, int ndx) {
   RestorationType r = RESTORE_NONE;
@@ -2671,8 +2996,19 @@
                                                struct aom_read_bit_buffer *rb) {
   assert(!cm->features.all_lossless);
   const int num_planes = av1_num_planes(cm);
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  for (int p = 0; p < num_planes; ++p) {
+    RestorationInfo *rsi = &cm->rst_info[p];
+    rsi->frame_restoration_type = RESTORE_NONE;
+    rsi->frame_cross_restoration_type = RESTORE_NONE;
+  }
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   if (is_global_intrabc_allowed(cm)) return;
+#if CONFIG_FLEXIBLE_RU_SIZE
+  int luma_none = 1, chroma_none = 1;
+#else
   int all_none = 1, chroma_none = 1;
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
   for (int p = 0; p < num_planes; ++p) {
     RestorationInfo *rsi = &cm->rst_info[p];
 #if CONFIG_LR_FLEX_SYNTAX
@@ -2690,9 +3026,16 @@
     if (rsi->frame_restoration_type == RESTORE_SWITCHABLE &&
         cm->features.lr_tools_count[p] > 2) {
       if (aom_rb_read_bit(rb)) {
+        int tools_count = cm->features.lr_tools_count[p];
         for (int i = 1; i < RESTORE_SWITCHABLE_TYPES; ++i) {
-          if (!(plane_lr_tools_disable_mask & (1 << i)))
-            plane_lr_tools_disable_mask |= (aom_rb_read_bit(rb) << i);
+          if (!(plane_lr_tools_disable_mask & (1 << i))) {
+            const int disable_tool = aom_rb_read_bit(rb);
+            plane_lr_tools_disable_mask |= (disable_tool << i);
+            tools_count -= disable_tool;
+            // if tools_count becomes 2 break from the loop since we
+            // do not allow any other tool to be disabled.
+            if (tools_count == 2) break;
+          }
         }
         av1_set_lr_tools(plane_lr_tools_disable_mask, p, &cm->features);
       }
@@ -2728,8 +3071,23 @@
       }
     }
 #endif  // CONFIG_LR_FLEX_SYNTAX
+
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    if (p > 0) {
+      if (aom_rb_read_bit(rb)) {
+        rsi->frame_cross_restoration_type = RESTORE_WIENER_NONSEP;
+      }
+    }
+    if (rsi->frame_restoration_type != RESTORE_NONE ||
+        rsi->frame_cross_restoration_type != RESTORE_NONE) {
+#else
     if (rsi->frame_restoration_type != RESTORE_NONE) {
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+#if CONFIG_FLEXIBLE_RU_SIZE
+      luma_none &= p > 0;
+#else
       all_none = 0;
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
       chroma_none &= p == 0;
     }
 #if CONFIG_WIENER_NONSEP
@@ -2742,6 +3100,44 @@
                                     : NUM_WIENERNS_CLASS_INIT_CHROMA;
 #endif  // CONFIG_WIENER_NONSEP
   }
+#if CONFIG_FLEXIBLE_RU_SIZE
+  const int frame_width = cm->superres_upscaled_width;
+  const int frame_height = cm->superres_upscaled_height;
+  set_restoration_unit_size(frame_width, frame_height,
+                            cm->seq_params.subsampling_x,
+                            cm->seq_params.subsampling_y, cm->rst_info);
+  int size = cm->rst_info[0].max_restoration_unit_size;
+
+  cm->rst_info[0].restoration_unit_size =
+      cm->rst_info[0].max_restoration_unit_size;
+  if (!luma_none) {
+    if (aom_rb_read_bit(rb))
+      cm->rst_info[0].restoration_unit_size = size >> 1;
+    else {
+      if (aom_rb_read_bit(rb))
+        cm->rst_info[0].restoration_unit_size = size;
+      else
+        cm->rst_info[0].restoration_unit_size = size >> 2;
+    }
+  }
+  if (num_planes > 1) {
+    cm->rst_info[1].restoration_unit_size =
+        cm->rst_info[1].max_restoration_unit_size;
+    if (!chroma_none) {
+      size = cm->rst_info[1].max_restoration_unit_size;
+      if (aom_rb_read_bit(rb))
+        cm->rst_info[1].restoration_unit_size = size >> 1;
+      else {
+        if (aom_rb_read_bit(rb))
+          cm->rst_info[1].restoration_unit_size = size;
+        else
+          cm->rst_info[1].restoration_unit_size = size >> 2;
+      }
+    }
+    cm->rst_info[2].restoration_unit_size =
+        cm->rst_info[1].restoration_unit_size;
+  }
+#else
   if (!all_none) {
 #if CONFIG_BLOCK_256
     assert(cm->sb_size == BLOCK_64X64 || cm->sb_size == BLOCK_128X128 ||
@@ -2796,6 +3192,7 @@
     cm->rst_info[2].restoration_unit_size =
         cm->rst_info[1].restoration_unit_size;
   }
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
 }
 
 static AOM_INLINE void read_wiener_filter(MACROBLOCKD *xd, int wiener_win,
@@ -2803,11 +3200,11 @@
                                           WienerInfoBank *bank,
                                           aom_reader *rb) {
 #if CONFIG_LR_MERGE_COEFFS
-  const int exact_match =
-      aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2, ACCT_STR);
+  const int exact_match = aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2,
+                                          ACCT_INFO("exact_match"));
   int k;
   for (k = 0; k < bank->bank_size - 1; ++k) {
-    if (aom_read_literal(rb, 1, ACCT_STR)) break;
+    if (aom_read_literal(rb, 1, ACCT_INFO("bank_size"))) break;
   }
   const int ref = k;
   if (exact_match) {
@@ -2830,7 +3227,8 @@
         aom_read_primitive_refsubexpfin(
             rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
             WIENER_FILT_TAP0_SUBEXP_K,
-            ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) +
+            ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV,
+            ACCT_INFO("vfilter[0]")) +
         WIENER_FILT_TAP0_MINV;
   else
     wiener_info->vfilter[0] = wiener_info->vfilter[WIENER_WIN - 1] = 0;
@@ -2838,13 +3236,15 @@
       aom_read_primitive_refsubexpfin(
           rb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
           WIENER_FILT_TAP1_SUBEXP_K,
-          ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV, ACCT_STR) +
+          ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV,
+          ACCT_INFO("vfilter[1]")) +
       WIENER_FILT_TAP1_MINV;
   wiener_info->vfilter[2] = wiener_info->vfilter[WIENER_WIN - 3] =
       aom_read_primitive_refsubexpfin(
           rb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
           WIENER_FILT_TAP2_SUBEXP_K,
-          ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV, ACCT_STR) +
+          ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV,
+          ACCT_INFO("vfilter[2]")) +
       WIENER_FILT_TAP2_MINV;
   // The central element has an implicit +WIENER_FILT_STEP
   wiener_info->vfilter[WIENER_HALFWIN] =
@@ -2856,7 +3256,8 @@
         aom_read_primitive_refsubexpfin(
             rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1,
             WIENER_FILT_TAP0_SUBEXP_K,
-            ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) +
+            ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV,
+            ACCT_INFO("hfilter[0]")) +
         WIENER_FILT_TAP0_MINV;
   else
     wiener_info->hfilter[0] = wiener_info->hfilter[WIENER_WIN - 1] = 0;
@@ -2864,13 +3265,15 @@
       aom_read_primitive_refsubexpfin(
           rb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1,
           WIENER_FILT_TAP1_SUBEXP_K,
-          ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV, ACCT_STR) +
+          ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV,
+          ACCT_INFO("hfilter[1]")) +
       WIENER_FILT_TAP1_MINV;
   wiener_info->hfilter[2] = wiener_info->hfilter[WIENER_WIN - 3] =
       aom_read_primitive_refsubexpfin(
           rb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1,
           WIENER_FILT_TAP2_SUBEXP_K,
-          ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV, ACCT_STR) +
+          ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV,
+          ACCT_INFO("hfilter[2]")) +
       WIENER_FILT_TAP2_MINV;
   // The central element has an implicit +WIENER_FILT_STEP
   wiener_info->hfilter[WIENER_HALFWIN] =
@@ -2884,11 +3287,11 @@
                                            SgrprojInfoBank *bank,
                                            aom_reader *rb) {
 #if CONFIG_LR_MERGE_COEFFS
-  const int exact_match =
-      aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2, ACCT_STR);
+  const int exact_match = aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2,
+                                          ACCT_INFO("exact_match"));
   int k;
   for (k = 0; k < bank->bank_size - 1; ++k) {
-    if (aom_read_literal(rb, 1, ACCT_STR)) break;
+    if (aom_read_literal(rb, 1, ACCT_INFO("bank"))) break;
   }
   const int ref = k;
   if (exact_match) {
@@ -2904,7 +3307,7 @@
 #endif  // CONFIG_LR_MERGE_COEFFS
   SgrprojInfo *ref_sgrproj_info = av1_ref_from_sgrproj_bank(bank, ref);
 
-  sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_STR);
+  sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_INFO("ep"));
   const sgr_params_type *params = &av1_sgr_params[sgrproj_info->ep];
 
   if (params->r[0] == 0) {
@@ -2912,13 +3315,13 @@
     sgrproj_info->xqd[1] =
         aom_read_primitive_refsubexpfin(
             rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
-            ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) +
+            ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_INFO()) +
         SGRPROJ_PRJ_MIN1;
   } else if (params->r[1] == 0) {
     sgrproj_info->xqd[0] =
         aom_read_primitive_refsubexpfin(
             rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
-            ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_STR) +
+            ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_INFO()) +
         SGRPROJ_PRJ_MIN0;
     sgrproj_info->xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - sgrproj_info->xqd[0],
                                  SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1);
@@ -2926,12 +3329,12 @@
     sgrproj_info->xqd[0] =
         aom_read_primitive_refsubexpfin(
             rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
-            ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_STR) +
+            ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_INFO()) +
         SGRPROJ_PRJ_MIN0;
     sgrproj_info->xqd[1] =
         aom_read_primitive_refsubexpfin(
             rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
-            ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) +
+            ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_INFO()) +
         SGRPROJ_PRJ_MIN1;
   }
 
@@ -2948,11 +3351,11 @@
   assert(num_classes <= WIENERNS_MAX_CLASSES);
 #if CONFIG_LR_MERGE_COEFFS
   for (int c_id = 0; c_id < num_classes; ++c_id) {
-    const int exact_match =
-        aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2, ACCT_STR);
+    const int exact_match = aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf,
+                                            2, ACCT_INFO("exact_match"));
     int ref;
     for (ref = 0; ref < bank->bank_size_for_class[c_id] - 1; ++ref) {
-      if (aom_read_literal(rb, 1, ACCT_STR)) break;
+      if (aom_read_literal(rb, 1, ACCT_INFO("bank"))) break;
     }
     if (exact_match) {
       copy_nsfilter_taps_for_class(
@@ -2967,7 +3370,12 @@
   (void)xd;
 #endif  // CONFIG_LR_MERGE_COEFFS
   const WienernsFilterParameters *nsfilter_params =
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      get_wienerns_parameters(xd->current_base_qindex, is_uv,
+                              wienerns_info->is_cross_filter);
+#else
       get_wienerns_parameters(xd->current_base_qindex, is_uv);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   const int beg_feat = 0;
   const int end_feat = nsfilter_params->ncoeffs;
   const int(*wienerns_coeffs)[WIENERNS_COEFCFG_LEN] = nsfilter_params->coeffs;
@@ -2997,28 +3405,33 @@
     const int rodd = is_uv ? 0 : (end_feat & 1);
     for (int i = beg_feat; i < end_feat; ++i) {
       if (rodd && i == end_feat - 5 && i != beg_feat) {
-        reduce_step[0] = aom_read_symbol(
-            rb, xd->tile_ctx->wienerns_reduce_cdf[0], 2, ACCT_STR);
+        reduce_step[0] =
+            aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[0], 2,
+                            ACCT_INFO("wienerns_reduce_cdf0"));
         if (reduce_step[0]) break;
       }
       if (!rodd && i == end_feat - 4 && i != beg_feat) {
-        reduce_step[1] = aom_read_symbol(
-            rb, xd->tile_ctx->wienerns_reduce_cdf[1], 2, ACCT_STR);
+        reduce_step[1] =
+            aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[1], 2,
+                            ACCT_INFO("wienerns_reduce_cdf1"));
         if (reduce_step[1]) break;
       }
       if (rodd && i == end_feat - 3 && i != beg_feat) {
-        reduce_step[2] = aom_read_symbol(
-            rb, xd->tile_ctx->wienerns_reduce_cdf[2], 2, ACCT_STR);
+        reduce_step[2] =
+            aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[2], 2,
+                            ACCT_INFO("wienerns_reduce_cdf2"));
         if (reduce_step[2]) break;
       }
       if (!rodd && i == end_feat - 2 && i != beg_feat) {
-        reduce_step[3] = aom_read_symbol(
-            rb, xd->tile_ctx->wienerns_reduce_cdf[3], 2, ACCT_STR);
+        reduce_step[3] =
+            aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[3], 2,
+                            ACCT_INFO("wienerns_reduce_cdf3"));
         if (reduce_step[3]) break;
       }
       if (rodd && i == end_feat - 1 && i != beg_feat) {
-        reduce_step[4] = aom_read_symbol(
-            rb, xd->tile_ctx->wienerns_reduce_cdf[4], 2, ACCT_STR);
+        reduce_step[4] =
+            aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[4], 2,
+                            ACCT_INFO("wienerns_reduce_cdf4"));
         if (reduce_step[4]) break;
       }
 #if ENABLE_LR_4PART_CODE
@@ -3029,7 +3442,8 @@
                   wienerns_coeffs[i - beg_feat][WIENERNS_MIN_ID],
               xd->tile_ctx->wienerns_4part_cdf
                   [wienerns_coeffs[i - beg_feat][WIENERNS_PAR_ID]],
-              wienerns_coeffs[i - beg_feat][WIENERNS_BIT_ID], ACCT_STR) +
+              wienerns_coeffs[i - beg_feat][WIENERNS_BIT_ID],
+              ACCT_INFO("wienerns_info_nsfilter")) +
           wienerns_coeffs[i - beg_feat][WIENERNS_MIN_ID];
 #else
       wienerns_info_nsfilter[i] =
@@ -3038,7 +3452,7 @@
               wienerns_coeffs[i - beg_feat][WIENERNS_PAR_ID],
               ref_wienerns_info_nsfilter[i] -
                   wienerns_coeffs[i - beg_feat][WIENERNS_MIN_ID],
-              ACCT_STR) +
+              ACCT_INFO("wienerns_info_nsfilter")) +
           wienerns_coeffs[i - beg_feat][WIENERNS_MIN_ID];
 #endif  // ENABLE_LR_4PART_CODE
     }
@@ -3048,17 +3462,39 @@
 #endif  // CONFIG_WIENER_NONSEP
 
 static AOM_INLINE void loop_restoration_read_sb_coeffs(
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane,
+#else
     const AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane,
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
     int runit_idx) {
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  RestorationInfo *rsi = &cm->rst_info[plane];
+#else
   const RestorationInfo *rsi = &cm->rst_info[plane];
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   RestorationUnitInfo *rui = &rsi->unit_info[runit_idx];
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  assert(rsi->frame_restoration_type != RESTORE_NONE ||
+         rsi->frame_cross_restoration_type != RESTORE_NONE);
+  rui->restoration_type = RESTORE_NONE;
+  rui->cross_restoration_type = RESTORE_NONE;
+#else
   assert(rsi->frame_restoration_type != RESTORE_NONE);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 
   assert(!cm->features.all_lossless);
 
   const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN;
 #if CONFIG_WIENER_NONSEP
   rui->wienerns_info.num_classes = rsi->num_filter_classes;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  rui->wienerns_cross_info.num_classes =
+      xd->wienerns_cross_info[plane].filter[0].num_classes;
+
+  rui->wienerns_info.is_cross_filter = 0;
+  rui->wienerns_cross_info.is_cross_filter = 1;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #endif  // CONFIG_WIENER_NONSEP
 
   if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) {
@@ -3067,16 +3503,17 @@
     for (int re = 0; re <= cm->features.lr_last_switchable_ndx[plane]; re++) {
       if (cm->features.lr_tools_disable_mask[plane] & (1 << re)) continue;
       const int found = aom_read_symbol(
-          r, xd->tile_ctx->switchable_flex_restore_cdf[re][plane], 2, ACCT_STR);
+          r, xd->tile_ctx->switchable_flex_restore_cdf[re][plane], 2,
+          ACCT_INFO("found"));
       if (found) {
         rui->restoration_type = re;
         break;
       }
     }
 #else
-    rui->restoration_type =
-        aom_read_symbol(r, xd->tile_ctx->switchable_restore_cdf,
-                        RESTORE_SWITCHABLE_TYPES, ACCT_STR);
+    rui->restoration_type = aom_read_symbol(
+        r, xd->tile_ctx->switchable_restore_cdf, RESTORE_SWITCHABLE_TYPES,
+        ACCT_INFO("restoration_type"));
 #endif  // CONFIG_LR_FLEX_SYNTAX
     switch (rui->restoration_type) {
       case RESTORE_WIENER:
@@ -3101,7 +3538,8 @@
       default: assert(rui->restoration_type == RESTORE_NONE); break;
     }
   } else if (rsi->frame_restoration_type == RESTORE_WIENER) {
-    if (aom_read_symbol(r, xd->tile_ctx->wiener_restore_cdf, 2, ACCT_STR)) {
+    if (aom_read_symbol(r, xd->tile_ctx->wiener_restore_cdf, 2,
+                        ACCT_INFO("wiener_restore_cdf"))) {
       rui->restoration_type = RESTORE_WIENER;
       read_wiener_filter(xd, wiener_win, &rui->wiener_info,
                          &xd->wiener_info[plane], r);
@@ -3109,7 +3547,8 @@
       rui->restoration_type = RESTORE_NONE;
     }
   } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) {
-    if (aom_read_symbol(r, xd->tile_ctx->sgrproj_restore_cdf, 2, ACCT_STR)) {
+    if (aom_read_symbol(r, xd->tile_ctx->sgrproj_restore_cdf, 2,
+                        ACCT_INFO("sgrproj_restore_cdf"))) {
       rui->restoration_type = RESTORE_SGRPROJ;
       read_sgrproj_filter(xd, &rui->sgrproj_info, &xd->sgrproj_info[plane], r);
     } else {
@@ -3117,7 +3556,8 @@
     }
 #if CONFIG_WIENER_NONSEP
   } else if (rsi->frame_restoration_type == RESTORE_WIENER_NONSEP) {
-    if (aom_read_symbol(r, xd->tile_ctx->wienerns_restore_cdf, 2, ACCT_STR)) {
+    if (aom_read_symbol(r, xd->tile_ctx->wienerns_restore_cdf, 2,
+                        ACCT_INFO("wienerns_restore_cdf"))) {
       rui->restoration_type = RESTORE_WIENER_NONSEP;
       read_wienerns_filter(xd, plane != AOM_PLANE_Y, &rui->wienerns_info,
                            &xd->wienerns_info[plane], r);
@@ -3127,7 +3567,8 @@
 #endif  // CONFIG_WIENER_NONSEP
 #if CONFIG_PC_WIENER
   } else if (rsi->frame_restoration_type == RESTORE_PC_WIENER) {
-    if (aom_read_symbol(r, xd->tile_ctx->pc_wiener_restore_cdf, 2, ACCT_STR)) {
+    if (aom_read_symbol(r, xd->tile_ctx->pc_wiener_restore_cdf, 2,
+                        ACCT_INFO("pc_wiener_restore_cdf"))) {
       rui->restoration_type = RESTORE_PC_WIENER;
       // No side-information for now.
     } else {
@@ -3135,6 +3576,18 @@
     }
 #endif  // CONFIG_PC_WIENER
   }
+
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  if (rsi->frame_cross_restoration_type == RESTORE_WIENER_NONSEP) {
+    if (aom_read_symbol(r, xd->tile_ctx->wienerns_restore_cdf, 2,
+                        ACCT_INFO())) {
+      rui->cross_restoration_type = RESTORE_WIENER_NONSEP;
+      read_wienerns_filter(xd, plane != AOM_PLANE_Y, &rui->wienerns_cross_info,
+                           &xd->wienerns_cross_info[plane], r);
+    }
+  }
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
 #if CONFIG_LR_FLEX_SYNTAX
   assert(((cm->features.lr_tools_disable_mask[plane] >> rui->restoration_type) &
           1) == 0);
@@ -3790,6 +4243,11 @@
                         cdef_info->cdef_strengths[0] == 0 &&
                         cdef_info->cdef_uv_strengths[0] == 0;
     const int no_restoration =
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+        rst_info[0].frame_cross_restoration_type == RESTORE_NONE &&
+        rst_info[1].frame_cross_restoration_type == RESTORE_NONE &&
+        rst_info[2].frame_cross_restoration_type == RESTORE_NONE &&
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
         rst_info[0].frame_restoration_type == RESTORE_NONE &&
         rst_info[1].frame_restoration_type == RESTORE_NONE &&
         rst_info[2].frame_restoration_type == RESTORE_NONE;
@@ -4065,7 +4523,15 @@
 
   for (int plane = 0; plane < num_planes; ++plane) {
     dcb->dqcoeff_block[plane] = cb_buffer->dqcoeff[plane];
+#if CONFIG_INSPECTION
+    dcb->dqcoeff_block_copy[plane] = cb_buffer->dqcoeff_copy[plane];
+    dcb->qcoeff_block[plane] = cb_buffer->qcoeff[plane];
+    dcb->dequant_values[plane] = cb_buffer->dequant_values[plane];
+#endif  // CONFIG_INSPECTION
     dcb->eob_data[plane] = cb_buffer->eob_data[plane];
+#if CONFIG_ATC_DCTX_ALIGNED
+    dcb->bob_data[plane] = cb_buffer->bob_data[plane];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     dcb->cb_offset[plane] = 0;
     dcb->txb_offset[plane] = 0;
   }
@@ -4344,7 +4810,7 @@
     av1_zero_left_context(xd);
 #if CONFIG_REF_MV_BANK
     av1_zero(xd->ref_mv_bank);
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
     xd->ref_mv_bank_pt = &td->ref_mv_bank;
 #endif
 #endif  // CONFIG_REF_MV_BANK
@@ -4366,9 +4832,9 @@
       // for MV referencing during decoding the tile.
       // xd->ref_mv_bank is updated as decoding goes.
       xd->ref_mv_bank.rmb_sb_hits = 0;
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
       td->ref_mv_bank = xd->ref_mv_bank;
-#endif  // !CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // !CONFIG_MVP_IMPROVEMENT
 #endif  // CONFIG_REF_MV_BANK
 
 #if CONFIG_WARP_REF_LIST
@@ -4865,7 +5331,7 @@
     av1_zero_left_context(xd);
 #if CONFIG_REF_MV_BANK
     av1_zero(xd->ref_mv_bank);
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
     xd->ref_mv_bank_pt = &td->ref_mv_bank;
 #endif
 #endif  // CONFIG_REF_MV_BANK
@@ -4885,9 +5351,9 @@
 
 #if CONFIG_REF_MV_BANK
       xd->ref_mv_bank.rmb_sb_hits = 0;
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
       td->ref_mv_bank = xd->ref_mv_bank;
-#endif  // !CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // !CONFIG_MVP_IMPROVEMENT
 #endif  // CONFIG_REF_MV_BANK
 
 #if CONFIG_WARP_REF_LIST
@@ -6003,7 +6469,7 @@
     if (aom_rb_read_bit(rb)) {
       seq_params->lr_tools_disable_mask[1] = DEF_UV_LR_TOOLS_DISABLE_MASK;
       for (int i = 1; i < RESTORE_SWITCHABLE_TYPES; ++i) {
-        if (DEF_UV_LR_TOOLS_DISABLE_MASK | (1 << i)) continue;
+        if (DEF_UV_LR_TOOLS_DISABLE_MASK & (1 << i)) continue;
         seq_params->lr_tools_disable_mask[1] |= (aom_rb_read_bit(rb) << i);
       }
     } else {
@@ -6016,10 +6482,15 @@
 
 void av1_read_sequence_header_beyond_av1(struct aom_read_bit_buffer *rb,
                                          SequenceHeader *seq_params) {
+  // printf("print sps\n");
 #if CONFIG_REF_MV_BANK
   seq_params->enable_refmvbank = aom_rb_read_bit(rb);
 #endif  // CONFIG_REF_MV_BANK
   seq_params->explicit_ref_frame_map = aom_rb_read_bit(rb);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // 0 : use show_existing_frame, 1: use implicit derivation
+  seq_params->enable_frame_output_order = aom_rb_read_bit(rb);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   // A bit is sent here to indicate if the max number of references is 7. If
   // this bit is 0, then two more bits are sent to indicate the exact number
   // of references allowed (range: 3 to 6).
@@ -6048,6 +6519,12 @@
 #if CONFIG_BAWP
   seq_params->enable_bawp = aom_rb_read_bit(rb);
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  seq_params->enable_cwp = aom_rb_read_bit(rb);
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  seq_params->enable_imp_msk_bld = aom_rb_read_bit(rb);
+#endif  // CONFIG_D071_IMP_MSK_BLD
   seq_params->enable_fsc = aom_rb_read_bit(rb);
 #if CONFIG_CCSO
   seq_params->enable_ccso = aom_rb_read_bit(rb);
@@ -6058,6 +6535,9 @@
 #if CONFIG_ORIP
   seq_params->enable_orip = aom_rb_read_bit(rb);
 #endif
+#if CONFIG_IDIF
+  seq_params->enable_idif = aom_rb_read_bit(rb);
+#endif  // CONFIG_IDIF
 #if CONFIG_OPTFLOW_REFINEMENT
   seq_params->enable_opfl_refine = seq_params->order_hint_info.enable_order_hint
                                        ? aom_rb_read_literal(rb, 2)
@@ -6067,6 +6547,10 @@
 #if CONFIG_ADAPTIVE_MVD
   seq_params->enable_adaptive_mvd = aom_rb_read_bit(rb);
 #endif  // CONFIG_ADAPTIVE_MVD
+
+#if CONFIG_REFINEMV
+  seq_params->enable_refinemv = aom_rb_read_bit(rb);
+#endif  // CONFIG_REFINEMV
 #if CONFIG_FLEX_MVRES
   seq_params->enable_flex_mvres = aom_rb_read_bit(rb);
 #endif  // CONFIG_FLEX_MVRES
@@ -6081,6 +6565,13 @@
 #if CONFIG_EXT_RECUR_PARTITIONS
   seq_params->enable_ext_partitions = aom_rb_read_bit(rb);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  if (seq_params->reduced_still_picture_hdr) {
+    seq_params->enable_global_motion = 0;
+  } else {
+    seq_params->enable_global_motion = aom_rb_read_bit(rb);
+  }
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 }
 
 static int read_global_motion_params(WarpedMotionParams *params,
@@ -6088,16 +6579,27 @@
                                      struct aom_read_bit_buffer *rb,
 #if !CONFIG_FLEX_MVRES
                                      int allow_hp) {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  (void)allow_hp;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 #else
                                      MvSubpelPrecision precision) {
   const int precision_loss = get_gm_precision_loss(precision);
-#endif
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  (void)precision_loss;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+#endif  // !CONFIG_FLEX_MVRES
   TransformationType type = aom_rb_read_bit(rb);
   if (type != IDENTITY) {
-    if (aom_rb_read_bit(rb))
+    if (aom_rb_read_bit(rb)) {
       type = ROTZOOM;
-    else
+    } else {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+      type = AFFINE;
+#else
       type = aom_rb_read_bit(rb) ? TRANSLATION : AFFINE;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+    }
   }
 
   *params = default_warp_params;
@@ -6133,6 +6635,11 @@
   }
 
   if (type >= TRANSLATION) {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+    const int trans_dec_factor = GM_TRANS_DECODE_FACTOR;
+    const int trans_prec_diff = GM_TRANS_PREC_DIFF;
+    const int trans_max = GM_TRANS_MAX;
+#else
     const int trans_bits = (type == TRANSLATION)
 #if CONFIG_FLEX_MVRES
                                ? GM_ABS_TRANS_ONLY_BITS - precision_loss
@@ -6155,13 +6662,15 @@
                                     ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
 #endif
                                     : GM_TRANS_PREC_DIFF;
+    const int trans_max = (1 << trans_bits);
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 
     params->wmmat[0] = aom_rb_read_signed_primitive_refsubexpfin(
-                           rb, (1 << trans_bits) + 1, SUBEXPFIN_K,
+                           rb, trans_max + 1, SUBEXPFIN_K,
                            (ref_params->wmmat[0] >> trans_prec_diff)) *
                        trans_dec_factor;
     params->wmmat[1] = aom_rb_read_signed_primitive_refsubexpfin(
-                           rb, (1 << trans_bits) + 1, SUBEXPFIN_K,
+                           rb, trans_max + 1, SUBEXPFIN_K,
                            (ref_params->wmmat[1] >> trans_prec_diff)) *
                        trans_dec_factor;
   }
@@ -6179,10 +6688,86 @@
 
 static AOM_INLINE void read_global_motion(AV1_COMMON *cm,
                                           struct aom_read_bit_buffer *rb) {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  const SequenceHeader *const seq_params = &cm->seq_params;
+  int num_total_refs = cm->ref_frames_info.num_total_refs;
+  bool use_global_motion = false;
+  if (seq_params->enable_global_motion) {
+    use_global_motion = aom_rb_read_bit(rb);
+  }
+  if (!use_global_motion) {
+    for (int frame = 0; frame < INTER_REFS_PER_FRAME; ++frame) {
+      cm->global_motion[frame] = default_warp_params;
+      cm->cur_frame->global_motion[frame] = default_warp_params;
+    }
+    return;
+  }
+
+  int our_ref = aom_rb_read_primitive_quniform(rb, num_total_refs + 1);
+  if (our_ref == num_total_refs) {
+    // Special case: Use IDENTITY model
+    cm->base_global_motion_model = default_warp_params;
+    cm->base_global_motion_distance = 1;
+  } else {
+    RefCntBuffer *buf = get_ref_frame_buf(cm, our_ref);
+    assert(buf);
+    int their_num_refs = buf->num_ref_frames;
+    if (their_num_refs == 0) {
+      // Special case: if an intra/key frame is used as a ref, use an
+      // IDENTITY model
+      cm->base_global_motion_model = default_warp_params;
+      cm->base_global_motion_distance = 1;
+    } else {
+      int their_ref = aom_rb_read_primitive_quniform(rb, their_num_refs);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      const int our_ref_order_hint = buf->display_order_hint;
+      const int their_ref_order_hint = buf->ref_display_order_hint[their_ref];
+#else
+        const int our_ref_order_hint = buf->order_hint;
+        const int their_ref_order_hint = buf->ref_order_hints[their_ref];
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      cm->base_global_motion_model = buf->global_motion[their_ref];
+      cm->base_global_motion_distance =
+          get_relative_dist(&seq_params->order_hint_info, our_ref_order_hint,
+                            their_ref_order_hint);
+    }
+  }
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
   for (int frame = 0; frame < cm->ref_frames_info.num_total_refs; ++frame) {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+    int temporal_distance;
+    if (seq_params->order_hint_info.enable_order_hint) {
+      const RefCntBuffer *const ref_buf = get_ref_frame_buf(cm, frame);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      const int ref_order_hint = ref_buf->display_order_hint;
+      const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
+        const int ref_order_hint = ref_buf->order_hint;
+        const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      temporal_distance = get_relative_dist(&seq_params->order_hint_info,
+                                            cur_order_hint, ref_order_hint);
+    } else {
+      temporal_distance = 1;
+    }
+
+    if (temporal_distance == 0) {
+      // Don't code global motion for frames at the same temporal instant
+      cm->global_motion[frame] = default_warp_params;
+      continue;
+    }
+
+    WarpedMotionParams ref_params_;
+    av1_scale_warp_model(&cm->base_global_motion_model,
+                         cm->base_global_motion_distance, &ref_params_,
+                         temporal_distance);
+    WarpedMotionParams *ref_params = &ref_params_;
+#else
     const WarpedMotionParams *ref_params =
         cm->prev_frame ? &cm->prev_frame->global_motion[frame]
                        : &default_warp_params;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
     int good_params =
 #if !CONFIG_FLEX_MVRES
         read_global_motion_params(&cm->global_motion[frame], ref_params, rb,
@@ -6291,6 +6876,10 @@
       continue;
     }
     frame_bufs[i].order_hint = 0;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+    frame_bufs[i].display_order_hint = 0;
+    av1_zero(frame_bufs[i].ref_display_order_hint);
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
     av1_zero(frame_bufs[i].ref_order_hints);
   }
   av1_zero_unused_internal_frame_buffers(&cm->buffer_pool->int_frame_buffers);
@@ -6336,6 +6925,33 @@
   return cur_disp_order_hint;
 }
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+static INLINE int get_ref_frame_disp_order_hint(AV1_COMMON *const cm,
+                                                const RefCntBuffer *const buf) {
+  // Find the reference frame with the largest order_hint
+  int max_disp_order_hint = 0;
+  for (int map_idx = 0; map_idx < INTER_REFS_PER_FRAME; map_idx++) {
+    if ((int)buf->ref_display_order_hint[map_idx] > max_disp_order_hint)
+      max_disp_order_hint = buf->ref_display_order_hint[map_idx];
+  }
+
+  // If the order_hint is above the threshold distance of 35 frames (largest
+  // possible lag_in_frames) from the found reference frame, we assume it was
+  // modified using:
+  //     order_hint = display_order_hint % display_order_hint_factor
+  // Here, the actual display_order_hint is recovered.
+  const int display_order_hint_factor =
+      1 << (cm->seq_params.order_hint_info.order_hint_bits_minus_1 + 1);
+  int disp_order_hint = buf->order_hint;
+  while (abs(max_disp_order_hint - disp_order_hint) > 35) {
+    if (disp_order_hint > max_disp_order_hint) return disp_order_hint;
+
+    disp_order_hint += display_order_hint_factor;
+  }
+  return disp_order_hint;
+}
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+
 // On success, returns 0. On failure, calls aom_internal_error and does not
 // return.
 static int read_uncompressed_header(AV1Decoder *pbi,
@@ -6507,11 +7123,7 @@
     }
   }
   features->disable_cdf_update = aom_rb_read_bit(rb);
-#if DS_FRAME_LEVEL
-  if (current_frame->frame_type == KEY_FRAME) {
-    features->ds_filter_type = aom_rb_read_literal(rb, 2);
-  }
-#endif  // DS_FRAME_LEVEL
+
   if (seq_params->force_screen_content_tools == 2) {
     features->allow_screen_content_tools = aom_rb_read_bit(rb);
   } else {
@@ -6707,7 +7319,11 @@
           buf->order_hint = order_hint;
           // TODO(kslu) This is a workaround for error resilient mode. Make
           // it more consistent with get_disp_order_hint().
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+          buf->display_order_hint = get_ref_frame_disp_order_hint(cm, buf);
+#else
           buf->display_order_hint = order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
         }
       }
     }
@@ -6736,17 +7352,21 @@
       features->allow_global_intrabc = aom_rb_read_bit(rb);
       features->allow_local_intrabc =
           features->allow_global_intrabc ? aom_rb_read_bit(rb) : 1;
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
       features->max_drl_bits =
           aom_rb_read_primitive_quniform(
               rb, MAX_MAX_DRL_BITS - MIN_MAX_DRL_BITS + 1) +
           MIN_MAX_DRL_BITS;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
     }
 #endif  // CONFIG_IBC_SR_EXT
 
     features->allow_ref_frame_mvs = 0;
     cm->prev_frame = NULL;
+
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+    cm->cur_frame->num_ref_frames = 0;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
   } else {
     features->allow_ref_frame_mvs = 0;
 #if CONFIG_TIP
@@ -6763,15 +7383,19 @@
         features->allow_global_intrabc = aom_rb_read_bit(rb);
         features->allow_local_intrabc =
             features->allow_global_intrabc ? aom_rb_read_bit(rb) : 1;
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
         features->max_drl_bits =
             aom_rb_read_primitive_quniform(
                 rb, MAX_MAX_DRL_BITS - MIN_MAX_DRL_BITS + 1) +
             MIN_MAX_DRL_BITS;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
       }
 #endif  // CONFIG_IBC_SR_EXT
 
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+      cm->cur_frame->num_ref_frames = 0;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
     } else if (pbi->need_resync != 1) { /* Skip if need resync */
       // Implicitly derive the reference mapping
       RefFrameMapPair ref_frame_map_pairs[REF_FRAMES];
@@ -6852,6 +7476,9 @@
         }
         av1_get_past_future_cur_ref_lists(cm, scores);
       }
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+      cm->cur_frame->num_ref_frames = cm->ref_frames_info.num_total_refs;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 
       if (!features->error_resilient_mode && frame_size_override_flag) {
         setup_frame_size_with_refs(cm, rb);
@@ -6975,7 +7602,7 @@
 
     if (!(current_frame->frame_type == INTRA_ONLY_FRAME) &&
         pbi->need_resync != 1) {
-      for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+      for (int i = 0; i < cm->ref_frames_info.num_total_refs; ++i) {
         const RefCntBuffer *const ref_buf = get_ref_frame_buf(cm, i);
         if (!ref_buf) continue;
         struct scale_factors *const ref_scale_factors =
@@ -7066,6 +7693,11 @@
     cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE;
+#endif
   }
 
 #if CONFIG_TIP
@@ -7182,6 +7814,11 @@
     cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE;
+#endif
   }
   setup_loopfilter(cm, rb);
 
@@ -7225,6 +7862,21 @@
     features->enable_bawp = 0;
 #endif  // CONFIG_BAWP
 
+#if CONFIG_CWP
+  features->enable_cwp = seq_params->enable_cwp;
+#endif  // CONFIG_CWP
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  features->allow_warpmv_mode = 0;
+  if (!frame_is_intra_only(cm) &&
+      (features->enabled_motion_modes & (1 << WARP_DELTA)) != 0) {
+    features->allow_warpmv_mode = aom_rb_read_bit(rb);
+  }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
+#if CONFIG_D071_IMP_MSK_BLD
+  features->enable_imp_msk_bld = seq_params->enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
   features->reduced_tx_set_used = aom_rb_read_bit(rb);
 
   if (features->allow_ref_frame_mvs && !frame_might_allow_ref_frame_mvs(cm)) {
@@ -7385,10 +8037,10 @@
   cm->mi_params.setup_mi(&cm->mi_params);
 
   if (cm->features.allow_ref_frame_mvs) av1_setup_motion_field(cm);
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
   else
     av1_setup_ref_frame_sides(cm);
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 #if CONFIG_PEF
   if (cm->seq_params.enable_pef && cm->features.allow_pef) {
@@ -7426,7 +8078,13 @@
 
   if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
       cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
-      cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
+      cm->rst_info[2].frame_restoration_type != RESTORE_NONE
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      || cm->rst_info[0].frame_cross_restoration_type != RESTORE_NONE ||
+      cm->rst_info[1].frame_cross_restoration_type != RESTORE_NONE ||
+      cm->rst_info[2].frame_cross_restoration_type != RESTORE_NONE
+#endif
+  ) {
     av1_alloc_restoration_buffers(cm);
   }
   const int buf_size = MC_TEMP_BUF_PELS << 1;
@@ -7450,7 +8108,16 @@
 #if CONFIG_LPF_MASK
   av1_loop_filter_frame_init(cm, 0, num_planes);
 #endif
-
+#if CONFIG_INSPECTION
+  aom_realloc_frame_buffer(
+      &cm->predicted_pixels, cm->width, cm->height,
+      cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
+      AOM_DEC_BORDER_IN_PIXELS, cm->features.byte_alignment, NULL, NULL, NULL);
+  aom_realloc_frame_buffer(
+      &cm->prefiltered_pixels, cm->width, cm->height,
+      cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
+      AOM_DEC_BORDER_IN_PIXELS, cm->features.byte_alignment, NULL, NULL, NULL);
+#endif  // CONFIG_INSPECTION
   if (pbi->max_threads > 1 && !(tiles->large_scale && !pbi->ext_tile_debug) &&
       pbi->row_mt)
     *p_data_end =
@@ -7466,6 +8133,11 @@
     set_planes_to_neutral_grey(&cm->seq_params, xd->cur_buf, 1);
   }
 
+#if CONFIG_INSPECTION
+  memcpy(cm->prefiltered_pixels.buffer_alloc, cm->cur_frame->buf.buffer_alloc,
+         cm->prefiltered_pixels.frame_size);
+#endif  // CONFIG_INSPECTION
+
   if (end_tile != tiles->rows * tiles->cols - 1) {
     return;
   }
@@ -7539,6 +8211,11 @@
 #endif
 
     const int do_loop_restoration =
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+        cm->rst_info[0].frame_cross_restoration_type != RESTORE_NONE ||
+        cm->rst_info[1].frame_cross_restoration_type != RESTORE_NONE ||
+        cm->rst_info[2].frame_cross_restoration_type != RESTORE_NONE ||
+#endif
         cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[2].frame_restoration_type != RESTORE_NONE;
@@ -7633,7 +8310,7 @@
   if (pbi->inspect_cb != NULL) {
     (*pbi->inspect_cb)(pbi, pbi->inspect_ctx);
   }
-#endif
+#endif  // CONFIG_INSPECTION
 
   // Non frame parallel update frame context here.
   if (!tiles->large_scale) {
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c
index 7467a62..6c8ce50 100644
--- a/av1/decoder/decodemv.c
+++ b/av1/decoder/decodemv.c
@@ -17,6 +17,9 @@
 #include "av1/common/cdef_block.h"
 #include "av1/common/cfl.h"
 #include "av1/common/common.h"
+#if CONFIG_ATC_DCTX_ALIGNED
+#include "av1/common/txb_common.h"
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 #include "av1/common/entropy.h"
 #include "av1/common/entropymode.h"
 #include "av1/common/entropymv.h"
@@ -32,13 +35,11 @@
 
 #include "aom_dsp/aom_dsp_common.h"
 
-#define ACCT_STR __func__
-
 #define DEC_MISMATCH_DEBUG 0
 
 #if !CONFIG_AIMC
 static PREDICTION_MODE read_intra_mode(aom_reader *r, aom_cdf_prob *cdf) {
-  return (PREDICTION_MODE)aom_read_symbol(r, cdf, INTRA_MODES, ACCT_STR);
+  return (PREDICTION_MODE)aom_read_symbol(r, cdf, INTRA_MODES, ACCT_INFO());
 }
 #endif  // !CONFIG_AIMC
 
@@ -83,8 +84,8 @@
         get_mi_grid_idx(mi_params, xd->mi_row & first_block_mask,
                         xd->mi_col & first_block_mask);
     MB_MODE_INFO *const mbmi = mi_params->mi_grid_base[grid_idx];
-    mbmi->cdef_strength =
-        aom_read_literal(r, cm->cdef_info.cdef_bits, ACCT_STR);
+    mbmi->cdef_strength = aom_read_literal(r, cm->cdef_info.cdef_bits,
+                                           ACCT_INFO("cdef_strength"));
     xd->cdef_transmitted[index] = true;
   }
 }
@@ -105,7 +106,7 @@
   if (!(mi_row & blk_size_y) && !(mi_col & blk_size_x) &&
       cm->ccso_info.ccso_enable[0]) {
     const int blk_idc =
-        aom_read_symbol(r, xd->tile_ctx->ccso_cdf[0], 2, ACCT_STR);
+        aom_read_symbol(r, xd->tile_ctx->ccso_cdf[0], 2, ACCT_INFO("blk_idc"));
     xd->ccso_blk_y = blk_idc;
     mi_params
         ->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride +
@@ -118,10 +119,10 @@
 #if CONFIG_CCSO_EXT
       cm->ccso_info.ccso_enable[1]) {
     const int blk_idc =
-        aom_read_symbol(r, xd->tile_ctx->ccso_cdf[1], 2, ACCT_STR);
+        aom_read_symbol(r, xd->tile_ctx->ccso_cdf[1], 2, ACCT_INFO("blk_idc"));
 #else
       cm->ccso_info.ccso_enable[0]) {
-    const int blk_idc = aom_read_bit(r, ACCT_STR);
+    const int blk_idc = aom_read_bit(r, ACCT_INFO("blk_idc"));
 #endif
     xd->ccso_blk_u = blk_idc;
     mi_params
@@ -134,10 +135,10 @@
 #if CONFIG_CCSO_EXT
       cm->ccso_info.ccso_enable[2]) {
     const int blk_idc =
-        aom_read_symbol(r, xd->tile_ctx->ccso_cdf[2], 2, ACCT_STR);
+        aom_read_symbol(r, xd->tile_ctx->ccso_cdf[2], 2, ACCT_INFO("blk_idc"));
 #else
       cm->ccso_info.ccso_enable[1]) {
-    const int blk_idc = aom_read_bit(r, ACCT_STR);
+    const int blk_idc = aom_read_bit(r, ACCT_INFO("blk_idc"));
 #endif
     xd->ccso_blk_v = blk_idc;
     mi_params
@@ -159,17 +160,18 @@
   if ((bsize != cm->sb_size ||
        mbmi->skip_txfm[xd->tree_type == CHROMA_PART] == 0) &&
       read_delta_q_flag) {
-    abs = aom_read_symbol(r, ec_ctx->delta_q_cdf, DELTA_Q_PROBS + 1, ACCT_STR);
+    abs = aom_read_symbol(r, ec_ctx->delta_q_cdf, DELTA_Q_PROBS + 1,
+                          ACCT_INFO("abs"));
     const int smallval = (abs < DELTA_Q_SMALL);
 
     if (!smallval) {
-      const int rem_bits = aom_read_literal(r, 3, ACCT_STR) + 1;
+      const int rem_bits = aom_read_literal(r, 3, ACCT_INFO("rem_bits")) + 1;
       const int thr = (1 << rem_bits) + 1;
-      abs = aom_read_literal(r, rem_bits, ACCT_STR) + thr;
+      abs = aom_read_literal(r, rem_bits, ACCT_INFO("abs")) + thr;
     }
 
     if (abs) {
-      sign = aom_read_bit(r, ACCT_STR);
+      sign = aom_read_bit(r, ACCT_INFO("sign"));
     } else {
       sign = 1;
     }
@@ -190,34 +192,47 @@
   const int read_delta_lf_flag = (b_col == 0 && b_row == 0);
   if ((bsize != cm->sb_size || mbmi->skip_txfm[plane_type] == 0) &&
       read_delta_lf_flag) {
-    int abs = aom_read_symbol(r, cdf, DELTA_LF_PROBS + 1, ACCT_STR);
+    int abs = aom_read_symbol(r, cdf, DELTA_LF_PROBS + 1, ACCT_INFO("abs"));
     const int smallval = (abs < DELTA_LF_SMALL);
     if (!smallval) {
-      const int rem_bits = aom_read_literal(r, 3, ACCT_STR) + 1;
+      const int rem_bits = aom_read_literal(r, 3, ACCT_INFO("rem_bits")) + 1;
       const int thr = (1 << rem_bits) + 1;
-      abs = aom_read_literal(r, rem_bits, ACCT_STR) + thr;
+      abs = aom_read_literal(r, rem_bits, ACCT_INFO("abs")) + thr;
     }
-    const int sign = abs ? aom_read_bit(r, ACCT_STR) : 1;
+    const int sign = abs ? aom_read_bit(r, ACCT_INFO("sign")) : 1;
     reduced_delta_lflevel = sign ? -abs : abs;
   }
   return reduced_delta_lflevel;
 }
 
-static uint8_t read_mrl_index(FRAME_CONTEXT *ec_ctx, aom_reader *r) {
+static uint8_t read_mrl_index(FRAME_CONTEXT *ec_ctx, aom_reader *r
+#if CONFIG_EXT_DIR
+                              ,
+                              const MB_MODE_INFO *neighbor0,
+                              const MB_MODE_INFO *neighbor1
+#endif  // CONFIG_EXT_DIR
+) {
+#if CONFIG_EXT_DIR
+  int ctx = get_mrl_index_ctx(neighbor0, neighbor1);
+  aom_cdf_prob *mrl_cdf = ec_ctx->mrl_index_cdf[ctx];
   const uint8_t mrl_index =
-      aom_read_symbol(r, ec_ctx->mrl_index_cdf, MRL_LINE_NUMBER, ACCT_STR);
+      aom_read_symbol(r, mrl_cdf, MRL_LINE_NUMBER, ACCT_INFO());
+#else
+  const uint8_t mrl_index =
+      aom_read_symbol(r, ec_ctx->mrl_index_cdf, MRL_LINE_NUMBER, ACCT_INFO());
+#endif  // CONFIG_EXT_DIR
   return mrl_index;
 }
 
 static uint8_t read_fsc_mode(aom_reader *r, aom_cdf_prob *fsc_cdf) {
-  const uint8_t fsc_mode = aom_read_symbol(r, fsc_cdf, FSC_MODES, ACCT_STR);
+  const uint8_t fsc_mode = aom_read_symbol(r, fsc_cdf, FSC_MODES, ACCT_INFO());
   return fsc_mode;
 }
 
 #if CONFIG_IMPROVED_CFL
 static uint8_t read_cfl_index(FRAME_CONTEXT *ec_ctx, aom_reader *r) {
   uint8_t cfl_index =
-      aom_read_symbol(r, ec_ctx->cfl_index_cdf, CFL_TYPE_COUNT, ACCT_STR);
+      aom_read_symbol(r, ec_ctx->cfl_index_cdf, CFL_TYPE_COUNT, ACCT_INFO());
   return cfl_index;
 }
 #endif
@@ -229,25 +244,27 @@
                                              PREDICTION_MODE y_mode) {
   const UV_PREDICTION_MODE uv_mode =
       aom_read_symbol(r, ec_ctx->uv_mode_cdf[cfl_allowed][y_mode],
-                      UV_INTRA_MODES - !cfl_allowed, ACCT_STR);
+                      UV_INTRA_MODES - !cfl_allowed, ACCT_INFO());
   return uv_mode;
 }
 #endif  // !CONFIG_AIMC
 
 static uint8_t read_cfl_alphas(FRAME_CONTEXT *const ec_ctx, aom_reader *r,
                                int8_t *signs_out) {
-  const int8_t joint_sign =
-      aom_read_symbol(r, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS, "cfl:signs");
+  const int8_t joint_sign = aom_read_symbol(
+      r, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS, ACCT_INFO("cfl:signs"));
   uint8_t idx = 0;
   // Magnitudes are only coded for nonzero values
   if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
     aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
-    idx = (uint8_t)aom_read_symbol(r, cdf_u, CFL_ALPHABET_SIZE, "cfl:alpha_u")
+    idx = (uint8_t)aom_read_symbol(r, cdf_u, CFL_ALPHABET_SIZE,
+                                   ACCT_INFO("cfl:alpha_u"))
           << CFL_ALPHABET_SIZE_LOG2;
   }
   if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
     aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
-    idx += (uint8_t)aom_read_symbol(r, cdf_v, CFL_ALPHABET_SIZE, "cfl:alpha_v");
+    idx += (uint8_t)aom_read_symbol(r, cdf_v, CFL_ALPHABET_SIZE,
+                                    ACCT_INFO("cfl:alpha_v"));
   }
   *signs_out = joint_sign;
   return idx;
@@ -257,7 +274,7 @@
                                             int size_group) {
   const INTERINTRA_MODE ii_mode = (INTERINTRA_MODE)aom_read_symbol(
       r, xd->tile_ctx->interintra_mode_cdf[size_group], INTERINTRA_MODES,
-      ACCT_STR);
+      ACCT_INFO());
   return ii_mode;
 }
 
@@ -277,8 +294,9 @@
   int is_warpmv = 0;
   if (is_warpmv_mode_allowed(cm, mbmi, bsize)) {
     const int16_t iswarpmvmode_ctx = inter_warpmv_mode_ctx(cm, xd, mbmi);
-    is_warpmv = aom_read_symbol(
-        r, ec_ctx->inter_warp_mode_cdf[iswarpmvmode_ctx], 2, ACCT_STR);
+    is_warpmv =
+        aom_read_symbol(r, ec_ctx->inter_warp_mode_cdf[iswarpmvmode_ctx], 2,
+                        ACCT_INFO("is_warpmv"));
     if (is_warpmv) {
       return WARPMV;
     }
@@ -287,7 +305,7 @@
 
   return SINGLE_INTER_MODE_START +
          aom_read_symbol(r, ec_ctx->inter_single_mode_cdf[ismode_ctx],
-                         INTER_SINGLE_MODES, ACCT_STR);
+                         INTER_SINGLE_MODES, ACCT_INFO("inter_single_mode"));
 }
 
 static void read_drl_idx(int max_drl_bits, const int16_t mode_ctx,
@@ -295,50 +313,87 @@
                          MB_MODE_INFO *mbmi, aom_reader *r) {
   MACROBLOCKD *const xd = &dcb->xd;
   uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = 0;
+  mbmi->ref_mv_idx[1] = 0;
+#if !CONFIG_SKIP_MODE_ENHANCEMENT
+  assert(!mbmi->skip_mode);
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+  // if (has_second_drl(mbmi))
+  if (has_second_drl(mbmi)) {
+    if (mbmi->mode == NEAR_NEWMV)
+      max_drl_bits = AOMMIN(max_drl_bits, SEP_COMP_DRL_SIZE);
+    else
+      assert(mbmi->mode == NEAR_NEARMV);
+  }
+  for (int ref = 0; ref < 1 + has_second_drl(mbmi); ref++) {
+    for (int idx = 0; idx < max_drl_bits; ++idx) {
+      const uint16_t *weight = has_second_drl(mbmi)
+                                   ? xd->weight[mbmi->ref_frame[ref]]
+                                   : xd->weight[ref_frame_type];
+      aom_cdf_prob *drl_cdf =
+#if CONFIG_SKIP_MODE_ENHANCEMENT
+          mbmi->skip_mode ? ec_ctx->skip_drl_cdf[AOMMIN(idx, 2)]
+                          : av1_get_drl_cdf(ec_ctx, weight, mode_ctx, idx);
+#else
+          av1_get_drl_cdf(ec_ctx, xd->weight[ref_frame_type], mode_ctx, idx);
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+      int drl_idx = aom_read_symbol(r, drl_cdf, 2, ACCT_INFO("drl_idx"));
+      mbmi->ref_mv_idx[ref] = idx + drl_idx;
+      if (!drl_idx) break;
+    }
+    assert(mbmi->ref_mv_idx[ref] < max_drl_bits + 1);
+  }
+#else
   mbmi->ref_mv_idx = 0;
 #if !CONFIG_SKIP_MODE_ENHANCEMENT
   assert(!mbmi->skip_mode);
 #endif  // CONFIG_SKIP_MODE_ENHANCEMENT
   for (int idx = 0; idx < max_drl_bits; ++idx) {
     aom_cdf_prob *drl_cdf =
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         mbmi->skip_mode ? ec_ctx->skip_drl_cdf[AOMMIN(idx, 2)]
                         : av1_get_drl_cdf(ec_ctx, xd->weight[ref_frame_type],
                                           mode_ctx, idx);
 #else
         av1_get_drl_cdf(ec_ctx, xd->weight[ref_frame_type], mode_ctx, idx);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
-    int drl_idx = aom_read_symbol(r, drl_cdf, 2, ACCT_STR);
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+    int drl_idx = aom_read_symbol(r, drl_cdf, 2, ACCT_INFO("drl_idx"));
     mbmi->ref_mv_idx = idx + drl_idx;
     if (!drl_idx) break;
   }
   assert(mbmi->ref_mv_idx < max_drl_bits + 1);
+#endif  // CONFIG_SEP_COMP_DRL
 }
 
 #if CONFIG_WEDGE_MOD_EXT
 static int8_t read_wedge_mode(aom_reader *r, FRAME_CONTEXT *ec_ctx,
                               const BLOCK_SIZE bsize) {
-  int wedge_angle_dir =
-      aom_read_symbol(r, ec_ctx->wedge_angle_dir_cdf[bsize], 2, ACCT_STR);
+  int wedge_angle_dir = aom_read_symbol(r, ec_ctx->wedge_angle_dir_cdf[bsize],
+                                        2, ACCT_INFO("wedge_angle_dir"));
   int wedge_angle = WEDGE_ANGLES;
   if (wedge_angle_dir == 0) {
-    wedge_angle = aom_read_symbol(r, ec_ctx->wedge_angle_0_cdf[bsize],
-                                  H_WEDGE_ANGLES, ACCT_STR);
+    wedge_angle =
+        aom_read_symbol(r, ec_ctx->wedge_angle_0_cdf[bsize], H_WEDGE_ANGLES,
+                        ACCT_INFO("wedge_angle", "wedge_angle_0_cdf"));
   } else {
     wedge_angle =
-        H_WEDGE_ANGLES + aom_read_symbol(r, ec_ctx->wedge_angle_1_cdf[bsize],
-                                         H_WEDGE_ANGLES, ACCT_STR);
+        H_WEDGE_ANGLES +
+        aom_read_symbol(r, ec_ctx->wedge_angle_1_cdf[bsize], H_WEDGE_ANGLES,
+                        ACCT_INFO("wedge_angle", "wedge_angle_1_cdf"));
   }
   int wedge_dist = 0;
   if ((wedge_angle >= H_WEDGE_ANGLES) ||
       (wedge_angle == WEDGE_90 || wedge_angle == WEDGE_180)) {
-    wedge_dist = aom_read_symbol(r, ec_ctx->wedge_dist_cdf2[bsize],
-                                 NUM_WEDGE_DIST - 1, ACCT_STR) +
-                 1;
+    wedge_dist =
+        aom_read_symbol(r, ec_ctx->wedge_dist_cdf2[bsize], NUM_WEDGE_DIST - 1,
+                        ACCT_INFO("wedge_dist", "wedge_dist_cdf2")) +
+        1;
   } else {
     assert(wedge_angle < H_WEDGE_ANGLES);
-    wedge_dist = aom_read_symbol(r, ec_ctx->wedge_dist_cdf[bsize],
-                                 NUM_WEDGE_DIST, ACCT_STR);
+    wedge_dist =
+        aom_read_symbol(r, ec_ctx->wedge_dist_cdf[bsize], NUM_WEDGE_DIST,
+                        ACCT_INFO("wedge_dist", "wedge_dist_cdf"));
   }
   return wedge_angle_dist_2_index[wedge_angle][wedge_dist];
 }
@@ -356,11 +411,22 @@
   int max_idx_bits = mbmi->max_num_warp_candidates - 1;
   for (int bit_idx = 0; bit_idx < max_idx_bits; ++bit_idx) {
     aom_cdf_prob *warp_ref_idx_cdf = av1_get_warp_ref_idx_cdf(ec_ctx, bit_idx);
-    int warp_idx = aom_read_symbol(r, warp_ref_idx_cdf, 2, ACCT_STR);
+    int warp_idx =
+        aom_read_symbol(r, warp_ref_idx_cdf, 2, ACCT_INFO("warp_idx"));
     mbmi->warp_ref_idx = bit_idx + warp_idx;
     if (!warp_idx) break;
   }
 }
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
+static void read_warpmv_with_mvd_flag(FRAME_CONTEXT *ec_ctx, MB_MODE_INFO *mbmi,
+                                      aom_reader *r) {
+  mbmi->warpmv_with_mvd_flag = aom_read_symbol(
+      r, ec_ctx->warpmv_with_mvd_flag_cdf[mbmi->sb_type[PLANE_TYPE_Y]], 2,
+      ACCT_INFO("warpmv_with_mvd_flag"));
+}
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
 #endif  // CONFIG_WARP_REF_LIST
 // Read the delta for a single warp parameter
 // Each delta is coded as a symbol in the range
@@ -372,7 +438,7 @@
 
   int coded_value =
       aom_read_symbol(r, xd->tile_ctx->warp_delta_param_cdf[index_type],
-                      WARP_DELTA_NUM_SYMBOLS, ACCT_STR);
+                      WARP_DELTA_NUM_SYMBOLS, ACCT_INFO());
 
   return (coded_value - WARP_DELTA_CODED_MAX) * WARP_DELTA_STEP;
 }
@@ -414,7 +480,11 @@
 
   // TODO(rachelbarker): Allow signaling warp type?
 #if CONFIG_WARP_REF_LIST
-  if (allow_warp_parameter_signaling(mbmi)) {
+  if (allow_warp_parameter_signaling(
+#if CONFIG_CWG_D067_IMPROVED_WARP
+          cm,
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+          mbmi)) {
 #endif  // CONFIG_WARP_REF_LIST
     params->wmtype = ROTZOOM;
     params->wmmat[2] = base_params.wmmat[2] + read_warp_delta_param(xd, 2, r);
@@ -455,8 +525,9 @@
 #if CONFIG_WARPMV
   if (mbmi->mode == WARPMV) {
     if (allowed_motion_modes & (1 << WARPED_CAUSAL)) {
-      int use_warped_causal = aom_read_symbol(
-          r, xd->tile_ctx->warped_causal_warpmv_cdf[bsize], 2, ACCT_STR);
+      int use_warped_causal =
+          aom_read_symbol(r, xd->tile_ctx->warped_causal_warpmv_cdf[bsize], 2,
+                          ACCT_INFO("use_warped_causal"));
       return use_warped_causal ? WARPED_CAUSAL : WARP_DELTA;
     }
     return WARP_DELTA;
@@ -466,8 +537,9 @@
   mbmi->use_wedge_interintra = 0;
   if (allowed_motion_modes & (1 << INTERINTRA)) {
     const int bsize_group = size_group_lookup[bsize];
-    const int use_interintra = aom_read_symbol(
-        r, xd->tile_ctx->interintra_cdf[bsize_group], 2, ACCT_STR);
+    const int use_interintra =
+        aom_read_symbol(r, xd->tile_ctx->interintra_cdf[bsize_group], 2,
+                        ACCT_INFO("use_interintra"));
     assert(mbmi->ref_frame[1] == NONE_FRAME);
     if (use_interintra) {
       const INTERINTRA_MODE interintra_mode =
@@ -478,8 +550,9 @@
       mbmi->angle_delta[PLANE_TYPE_UV] = 0;
       mbmi->filter_intra_mode_info.use_filter_intra = 0;
       if (av1_is_wedge_used(bsize)) {
-        mbmi->use_wedge_interintra = aom_read_symbol(
-            r, xd->tile_ctx->wedge_interintra_cdf[bsize], 2, ACCT_STR);
+        mbmi->use_wedge_interintra =
+            aom_read_symbol(r, xd->tile_ctx->wedge_interintra_cdf[bsize], 2,
+                            ACCT_INFO("use_wedge_interintra"));
         if (mbmi->use_wedge_interintra) {
 #if CONFIG_WEDGE_MOD_EXT
           mbmi->interintra_wedge_index =
@@ -487,7 +560,8 @@
           assert(mbmi->interintra_wedge_index != -1);
 #else
           mbmi->interintra_wedge_index = (int8_t)aom_read_symbol(
-              r, xd->tile_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES, ACCT_STR);
+              r, xd->tile_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES,
+              ACCT_INFO("interintra_wedge_index"));
 #endif
         }
       }
@@ -496,8 +570,8 @@
   }
 
   if (allowed_motion_modes & (1 << OBMC_CAUSAL)) {
-    int use_obmc =
-        aom_read_symbol(r, xd->tile_ctx->obmc_cdf[bsize], 2, ACCT_STR);
+    int use_obmc = aom_read_symbol(r, xd->tile_ctx->obmc_cdf[bsize], 2,
+                                   ACCT_INFO("use_obmc"));
     if (use_obmc) {
       return OBMC_CAUSAL;
     }
@@ -506,8 +580,9 @@
   if (allowed_motion_modes & (1 << WARP_EXTEND)) {
     const int ctx1 = av1_get_warp_extend_ctx1(xd, mbmi);
     const int ctx2 = av1_get_warp_extend_ctx2(xd, mbmi);
-    int use_warp_extend = aom_read_symbol(
-        r, xd->tile_ctx->warp_extend_cdf[ctx1][ctx2], 2, ACCT_STR);
+    int use_warp_extend =
+        aom_read_symbol(r, xd->tile_ctx->warp_extend_cdf[ctx1][ctx2], 2,
+                        ACCT_INFO("use_warp_extend"));
     if (use_warp_extend) {
       return WARP_EXTEND;
     }
@@ -515,15 +590,16 @@
 
   if (allowed_motion_modes & (1 << WARPED_CAUSAL)) {
     int use_warped_causal =
-        aom_read_symbol(r, xd->tile_ctx->warped_causal_cdf[bsize], 2, ACCT_STR);
+        aom_read_symbol(r, xd->tile_ctx->warped_causal_cdf[bsize], 2,
+                        ACCT_INFO("use_warped_causal"));
     if (use_warped_causal) {
       return WARPED_CAUSAL;
     }
   }
 
   if (allowed_motion_modes & (1 << WARP_DELTA)) {
-    int use_warp_delta =
-        aom_read_symbol(r, xd->tile_ctx->warp_delta_cdf[bsize], 2, ACCT_STR);
+    int use_warp_delta = aom_read_symbol(r, xd->tile_ctx->warp_delta_cdf[bsize],
+                                         2, ACCT_INFO("use_warp_delta"));
     if (use_warp_delta) {
       mbmi->motion_mode = WARP_DELTA;
 #if !CONFIG_WARPMV
@@ -569,13 +645,14 @@
   if (last_motion_mode_allowed == SIMPLE_TRANSLATION) return SIMPLE_TRANSLATION;
 
   if (last_motion_mode_allowed == OBMC_CAUSAL) {
-    motion_mode = aom_read_symbol(
-        r, xd->tile_ctx->obmc_cdf[mbmi->sb_type[PLANE_TYPE_Y]], 2, ACCT_STR);
+    motion_mode =
+        aom_read_symbol(r, xd->tile_ctx->obmc_cdf[mbmi->sb_type[PLANE_TYPE_Y]],
+                        2, ACCT_INFO("motion_mode", "obmc_cdf"));
     return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode);
   } else {
     motion_mode = aom_read_symbol(
         r, xd->tile_ctx->motion_mode_cdf[mbmi->sb_type[PLANE_TYPE_Y]],
-        MOTION_MODES, ACCT_STR);
+        MOTION_MODES, ACCT_INFO("motion_mode", "motion_mode_cdf"));
     return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode);
   }
 }
@@ -593,17 +670,38 @@
                          : xd->tile_ctx->jmvd_scale_mode_cdf;
   const int jmvd_scale_cnt = is_joint_amvd_mode ? JOINT_AMVD_SCALE_FACTOR_CNT
                                                 : JOINT_NEWMV_SCALE_FACTOR_CNT;
-  const int jmvd_scale_mode =
-      aom_read_symbol(r, jmvd_scale_mode_cdf, jmvd_scale_cnt, ACCT_STR);
+  const int jmvd_scale_mode = aom_read_symbol(
+      r, jmvd_scale_mode_cdf, jmvd_scale_cnt, ACCT_INFO("jmvd_scale_mode"));
 #else
-  const int jmvd_scale_mode =
-      aom_read_symbol(r, xd->tile_ctx->jmvd_scale_mode_cdf,
-                      JOINT_NEWMV_SCALE_FACTOR_CNT, ACCT_STR);
+  const int jmvd_scale_mode = aom_read_symbol(
+      r, xd->tile_ctx->jmvd_scale_mode_cdf, JOINT_NEWMV_SCALE_FACTOR_CNT,
+      ACCT_INFO("jmvd_scale_mode"));
 #endif  // CONFIG_ADAPTIVE_MVD
   return jmvd_scale_mode;
 }
 #endif  // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
 
+#if CONFIG_CWP
+// Read index for the weighting factor of compound weighted prediction
+static int read_cwp_idx(MACROBLOCKD *xd, aom_reader *r, const AV1_COMMON *cm,
+                        MB_MODE_INFO *const mbmi) {
+  int8_t cwp_idx = 0;
+  int bit_cnt = 0;
+  const int ctx = 0;
+  for (int idx = 0; idx < MAX_CWP_NUM - 1; ++idx) {
+    const int tmp_idx = aom_read_symbol(
+        r, xd->tile_ctx->cwp_idx_cdf[ctx][bit_cnt], 2, ACCT_INFO());
+    cwp_idx = idx + tmp_idx;
+    if (!tmp_idx) break;
+    ++bit_cnt;
+  }
+  assert(cwp_idx <= CWP_MAX);
+
+  // convert index to weight
+  return get_cwp_coding_idx(cwp_idx, 0, cm, mbmi);
+}
+#endif  // CONFIG_CWP
+
 static PREDICTION_MODE read_inter_compound_mode(MACROBLOCKD *xd, aom_reader *r,
 #if CONFIG_OPTFLOW_REFINEMENT
                                                 const AV1_COMMON *cm,
@@ -614,17 +712,19 @@
   int use_optical_flow = 0;
   if (cm->features.opfl_refine_type == REFINE_SWITCHABLE &&
       is_opfl_refine_allowed(cm, mbmi)) {
-    use_optical_flow =
-        aom_read_symbol(r, xd->tile_ctx->use_optflow_cdf[ctx], 2, ACCT_STR);
+    use_optical_flow = aom_read_symbol(r, xd->tile_ctx->use_optflow_cdf[ctx], 2,
+                                       ACCT_INFO("use_optical_flow"));
   }
 #endif  // CONFIG_OPTFLOW_REFINEMENT
   const int mode =
 #if CONFIG_OPTFLOW_REFINEMENT
       aom_read_symbol(r, xd->tile_ctx->inter_compound_mode_cdf[ctx],
-                      INTER_COMPOUND_REF_TYPES, ACCT_STR);
+                      INTER_COMPOUND_REF_TYPES,
+                      ACCT_INFO("inter_compound_mode_cdf"));
 #else
       aom_read_symbol(r, xd->tile_ctx->inter_compound_mode_cdf[ctx],
-                      INTER_COMPOUND_MODES, ACCT_STR);
+                      INTER_COMPOUND_MODES,
+                      ACCT_INFO("inter_compound_mode_cdf"));
 #endif  // CONFIG_OPTFLOW_REFINEMENT
 #if CONFIG_OPTFLOW_REFINEMENT
   if (use_optical_flow) {
@@ -668,7 +768,8 @@
   struct segmentation *const seg = &cm->seg;
   struct segmentation_probs *const segp = &ec_ctx->seg;
   aom_cdf_prob *pred_cdf = segp->spatial_pred_seg_cdf[cdf_num];
-  const int coded_id = aom_read_symbol(r, pred_cdf, MAX_SEGMENTS, ACCT_STR);
+  const int coded_id =
+      aom_read_symbol(r, pred_cdf, MAX_SEGMENTS, ACCT_INFO("coded_id"));
   const int segment_id =
       av1_neg_deinterleave(coded_id, pred, seg->last_active_segid + 1);
 
@@ -789,7 +890,8 @@
     FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
     struct segmentation_probs *const segp = &ec_ctx->seg;
     aom_cdf_prob *pred_cdf = segp->pred_cdf[ctx];
-    mbmi->seg_id_predicted = aom_read_symbol(r, pred_cdf, 2, ACCT_STR);
+    mbmi->seg_id_predicted =
+        aom_read_symbol(r, pred_cdf, 2, ACCT_INFO("seg_id_predicted"));
     if (mbmi->seg_id_predicted) {
       segment_id = get_predicted_segment_id(cm, mi_offset, x_inside_boundary,
                                             y_inside_boundary);
@@ -823,8 +925,8 @@
 
   const int ctx = av1_get_skip_mode_context(xd);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-  const int skip_mode =
-      aom_read_symbol(r, ec_ctx->skip_mode_cdfs[ctx], 2, ACCT_STR);
+  const int skip_mode = aom_read_symbol(r, ec_ctx->skip_mode_cdfs[ctx], 2,
+                                        ACCT_INFO("skip_mode"));
   return skip_mode;
 }
 
@@ -835,13 +937,13 @@
   } else {
     const int ctx = av1_get_skip_txfm_context(xd);
     FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-    const int skip_txfm =
-        aom_read_symbol(r, ec_ctx->skip_txfm_cdfs[ctx], 2, ACCT_STR);
+    const int skip_txfm = aom_read_symbol(r, ec_ctx->skip_txfm_cdfs[ctx], 2,
+                                          ACCT_INFO("skip_txfm"));
     return skip_txfm;
   }
 }
 
-#if !CONFIG_INDEP_PALETTE_PARSING
+#if !CONFIG_PALETTE_IMPROVEMENTS
 // Merge the sorted list of cached colors(cached_colors[0...n_cached_colors-1])
 // and the sorted list of transmitted colors(colors[n_cached_colors...n-1]) into
 // one single sorted list(colors[...]).
@@ -860,27 +962,29 @@
     }
   }
 }
-#endif  //! CONFIG_INDEP_PALETTE_PARSING
+#endif  //! CONFIG_PALETTE_IMPROVEMENTS
 
 static void read_palette_colors_y(MACROBLOCKD *const xd, int bit_depth,
                                   PALETTE_MODE_INFO *const pmi, aom_reader *r) {
-#if CONFIG_INDEP_PALETTE_PARSING
+#if CONFIG_PALETTE_IMPROVEMENTS
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
   const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
   const int n = pmi->palette_size[0];
   int idx = 0;
   for (int i = 0; i < n_cache && idx < n; ++i) {
-    if (aom_read_bit(r, ACCT_STR)) pmi->palette_colors[idx++] = color_cache[i];
+    if (aom_read_bit(r, ACCT_INFO("color_cache")))
+      pmi->palette_colors[idx++] = color_cache[i];
   }
   if (idx < n) {
-    pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
+    pmi->palette_colors[idx++] =
+        aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors"));
     if (idx < n) {
       const int min_bits = bit_depth - 3;
-      int bits = min_bits + aom_read_literal(r, 2, ACCT_STR);
+      int bits = min_bits + aom_read_literal(r, 2, ACCT_INFO("bits"));
       int range = (1 << bit_depth) - pmi->palette_colors[idx - 1] - 1;
       for (; idx < n; ++idx) {
         assert(range >= 0);
-        const int delta = aom_read_literal(r, bits, ACCT_STR) + 1;
+        const int delta = aom_read_literal(r, bits, ACCT_INFO("delta")) + 1;
         pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta,
                                          0, (1 << bit_depth) - 1);
         range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]);
@@ -905,17 +1009,19 @@
   const int n = pmi->palette_size[0];
   int idx = 0;
   for (int i = 0; i < n_cache && idx < n; ++i)
-    if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i];
+    if (aom_read_bit(r, ACCT_INFO("color_cache")))
+      cached_colors[idx++] = color_cache[i];
   if (idx < n) {
     const int n_cached_colors = idx;
-    pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
+    pmi->palette_colors[idx++] =
+        aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors"));
     if (idx < n) {
       const int min_bits = bit_depth - 3;
-      int bits = min_bits + aom_read_literal(r, 2, ACCT_STR);
+      int bits = min_bits + aom_read_literal(r, 2, ACCT_INFO("bits"));
       int range = (1 << bit_depth) - pmi->palette_colors[idx - 1] - 1;
       for (; idx < n; ++idx) {
         assert(range >= 0);
-        const int delta = aom_read_literal(r, bits, ACCT_STR) + 1;
+        const int delta = aom_read_literal(r, bits, ACCT_INFO("delta")) + 1;
         pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta,
                                          0, (1 << bit_depth) - 1);
         range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]);
@@ -926,29 +1032,31 @@
   } else {
     memcpy(pmi->palette_colors, cached_colors, n * sizeof(cached_colors[0]));
   }
-#endif  // CONFIG_INDEP_PALETTE_PARSING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 }
 
 static void read_palette_colors_uv(MACROBLOCKD *const xd, int bit_depth,
                                    PALETTE_MODE_INFO *const pmi,
                                    aom_reader *r) {
-#if CONFIG_INDEP_PALETTE_PARSING
+#if CONFIG_PALETTE_IMPROVEMENTS
   const int n = pmi->palette_size[1];
   // U channel colors.
   uint16_t color_cache[2 * PALETTE_MAX_SIZE];
   const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
   int idx = PALETTE_MAX_SIZE;
   for (int i = 0; i < n_cache && idx < PALETTE_MAX_SIZE + n; ++i)
-    if (aom_read_bit(r, ACCT_STR)) pmi->palette_colors[idx++] = color_cache[i];
+    if (aom_read_bit(r, ACCT_INFO("color_cache")))
+      pmi->palette_colors[idx++] = color_cache[i];
   if (idx < PALETTE_MAX_SIZE + n) {
-    pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
+    pmi->palette_colors[idx++] =
+        aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors"));
     if (idx < PALETTE_MAX_SIZE + n) {
       const int min_bits = bit_depth - 3;
-      int bits = min_bits + aom_read_literal(r, 2, ACCT_STR);
+      int bits = min_bits + aom_read_literal(r, 2, ACCT_INFO("bits"));
       int range = (1 << bit_depth) - pmi->palette_colors[idx - 1];
       for (; idx < PALETTE_MAX_SIZE + n; ++idx) {
         assert(range >= 0);
-        const int delta = aom_read_literal(r, bits, ACCT_STR);
+        const int delta = aom_read_literal(r, bits, ACCT_INFO("delta"));
         pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta,
                                          0, (1 << bit_depth) - 1);
         range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]);
@@ -976,18 +1084,20 @@
   const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
   int idx = 0;
   for (int i = 0; i < n_cache && idx < n; ++i)
-    if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i];
+    if (aom_read_bit(r, ACCT_INFO("color_cache")))
+      cached_colors[idx++] = color_cache[i];
   if (idx < n) {
     const int n_cached_colors = idx;
     idx += PALETTE_MAX_SIZE;
-    pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR);
+    pmi->palette_colors[idx++] =
+        aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors"));
     if (idx < PALETTE_MAX_SIZE + n) {
       const int min_bits = bit_depth - 3;
-      int bits = min_bits + aom_read_literal(r, 2, ACCT_STR);
+      int bits = min_bits + aom_read_literal(r, 2, ACCT_INFO("bits"));
       int range = (1 << bit_depth) - pmi->palette_colors[idx - 1];
       for (; idx < PALETTE_MAX_SIZE + n; ++idx) {
         assert(range >= 0);
-        const int delta = aom_read_literal(r, bits, ACCT_STR);
+        const int delta = aom_read_literal(r, bits, ACCT_INFO("delta"));
         pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta,
                                          0, (1 << bit_depth) - 1);
         range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]);
@@ -1000,17 +1110,17 @@
     memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, cached_colors,
            n * sizeof(cached_colors[0]));
   }
-#endif  // CONFIG_INDEP_PALETTE_PARSING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
   // V channel colors.
-  if (aom_read_bit(r, ACCT_STR)) {  // Delta encoding.
+  if (aom_read_bit(r, ACCT_INFO("use_delta"))) {  // Delta encoding.
     const int min_bits_v = bit_depth - 4;
     const int max_val = 1 << bit_depth;
-    int bits = min_bits_v + aom_read_literal(r, 2, ACCT_STR);
+    int bits = min_bits_v + aom_read_literal(r, 2, ACCT_INFO("bits"));
     pmi->palette_colors[2 * PALETTE_MAX_SIZE] =
-        aom_read_literal(r, bit_depth, ACCT_STR);
+        aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors"));
     for (int i = 1; i < n; ++i) {
-      int delta = aom_read_literal(r, bits, ACCT_STR);
-      if (delta && aom_read_bit(r, ACCT_STR)) delta = -delta;
+      int delta = aom_read_literal(r, bits, ACCT_INFO("delta"));
+      if (delta && aom_read_bit(r, ACCT_INFO("negate"))) delta = -delta;
       int val = (int)pmi->palette_colors[2 * PALETTE_MAX_SIZE + i - 1] + delta;
       if (val < 0) val += max_val;
       if (val >= max_val) val -= max_val;
@@ -1019,7 +1129,7 @@
   } else {
     for (int i = 0; i < n; ++i) {
       pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] =
-          aom_read_literal(r, bit_depth, ACCT_STR);
+          aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors"));
     }
   }
 }
@@ -1036,11 +1146,11 @@
     const int palette_mode_ctx = av1_get_palette_mode_ctx(xd);
     const int modev = aom_read_symbol(
         r, xd->tile_ctx->palette_y_mode_cdf[bsize_ctx][palette_mode_ctx], 2,
-        ACCT_STR);
+        ACCT_INFO("modev", "luma"));
     if (modev) {
       pmi->palette_size[0] =
           aom_read_symbol(r, xd->tile_ctx->palette_y_size_cdf[bsize_ctx],
-                          PALETTE_SIZES, ACCT_STR) +
+                          PALETTE_SIZES, ACCT_INFO("palette_size", "luma")) +
           2;
       read_palette_colors_y(xd, cm->seq_params.bit_depth, pmi, r);
     }
@@ -1049,11 +1159,12 @@
       mbmi->uv_mode == UV_DC_PRED && xd->is_chroma_ref) {
     const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
     const int modev = aom_read_symbol(
-        r, xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2, ACCT_STR);
+        r, xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2,
+        ACCT_INFO("modev", "chroma"));
     if (modev) {
       pmi->palette_size[1] =
           aom_read_symbol(r, xd->tile_ctx->palette_uv_size_cdf[bsize_ctx],
-                          PALETTE_SIZES, ACCT_STR) +
+                          PALETTE_SIZES, ACCT_INFO("palette_size", "chroma")) +
           2;
       read_palette_colors_uv(xd, cm->seq_params.bit_depth, pmi, r);
     }
@@ -1062,7 +1173,7 @@
 
 #if !CONFIG_AIMC
 static int read_angle_delta(aom_reader *r, aom_cdf_prob *cdf) {
-  const int sym = aom_read_symbol(r, cdf, 2 * MAX_ANGLE_DELTA + 1, ACCT_STR);
+  const int sym = aom_read_symbol(r, cdf, 2 * MAX_ANGLE_DELTA + 1, ACCT_INFO());
   return sym - MAX_ANGLE_DELTA;
 }
 #endif  // !CONFIG_AIMC
@@ -1075,10 +1186,11 @@
   if (av1_filter_intra_allowed(cm, mbmi) && xd->tree_type != CHROMA_PART) {
     filter_intra_mode_info->use_filter_intra = aom_read_symbol(
         r, xd->tile_ctx->filter_intra_cdfs[mbmi->sb_type[PLANE_TYPE_Y]], 2,
-        ACCT_STR);
+        ACCT_INFO("use_filter_intra"));
     if (filter_intra_mode_info->use_filter_intra) {
-      filter_intra_mode_info->filter_intra_mode = aom_read_symbol(
-          r, xd->tile_ctx->filter_intra_mode_cdf, FILTER_INTRA_MODES, ACCT_STR);
+      filter_intra_mode_info->filter_intra_mode =
+          aom_read_symbol(r, xd->tile_ctx->filter_intra_mode_cdf,
+                          FILTER_INTRA_MODES, ACCT_INFO("filter_intra_mode"));
     }
   } else {
     filter_intra_mode_info->use_filter_intra = 0;
@@ -1086,12 +1198,22 @@
 }
 
 void av1_read_tx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd, int blk_row,
-                      int blk_col, TX_SIZE tx_size, aom_reader *r) {
+                      int blk_col, TX_SIZE tx_size, aom_reader *r
+#if CONFIG_ATC_DCTX_ALIGNED
+                      ,
+                      const int plane, const int eob, const int dc_skip) {
+  if (plane != PLANE_TYPE_Y) return;
+#else
+) {
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   MB_MODE_INFO *mbmi = xd->mi[0];
   TX_TYPE *tx_type =
       &xd->tx_type_map[blk_row * xd->tx_type_map_stride + blk_col];
   *tx_type = DCT_DCT;
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  if (dc_skip == 1) return;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   // No need to read transform type if block is skipped.
   if (mbmi->skip_txfm[xd->tree_type == CHROMA_PART] ||
       segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
@@ -1114,9 +1236,16 @@
     const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
     FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
     if (inter_block) {
+#if CONFIG_ATC_DCTX_ALIGNED
+      const int eob_tx_ctx = get_lp2tx_ctx(tx_size, get_txb_bwl(tx_size), eob);
+      *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
+          r, ec_ctx->inter_ext_tx_cdf[eset][eob_tx_ctx][square_tx_size],
+          av1_num_ext_tx_set[tx_set_type], ACCT_INFO("tx_type"))];
+#else
       *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol(
           r, ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
-          av1_num_ext_tx_set[tx_set_type], ACCT_STR)];
+          av1_num_ext_tx_set[tx_set_type], ACCT_INFO("tx_type"))];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     } else {
       if (mbmi->fsc_mode[xd->tree_type == CHROMA_PART]) {
         *tx_type = IDTX;
@@ -1127,7 +1256,7 @@
               ? fimode_to_intradir[mbmi->filter_intra_mode_info
                                        .filter_intra_mode]
               : mbmi->mode;
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
 #if CONFIG_ATC_REDUCED_TXSET
       const int size_info = av1_size_class[tx_size];
       *tx_type = av1_tx_idx_to_type(
@@ -1138,21 +1267,21 @@
               cm->features.reduced_tx_set_used
                   ? av1_num_reduced_tx_set
                   : av1_num_ext_tx_set_intra[tx_set_type],
-              ACCT_STR),
+              ACCT_INFO("tx_type")),
           tx_set_type, intra_mode, size_info);
 #else
       const int size_info = av1_size_class[tx_size];
       *tx_type = av1_tx_idx_to_type(
           aom_read_symbol(
               r, ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_mode],
-              av1_num_ext_tx_set_intra[tx_set_type], ACCT_STR),
+              av1_num_ext_tx_set_intra[tx_set_type], ACCT_INFO("tx_type")),
           tx_set_type, intra_mode, size_info);
 #endif  // CONFIG_ATC_REDUCED_TXSET
 #else
       *tx_type = av1_ext_tx_inv_intra[tx_set_type][aom_read_symbol(
           r, ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_mode],
-          av1_num_ext_tx_set_intra[tx_set_type], ACCT_STR)];
-#endif  // CONFIG_ATC_NEWTXSETS
+          av1_num_ext_tx_set_intra[tx_set_type], ACCT_INFO("tx_type"))];
+#endif  // CONFIG_ATC
     }
   }
 }
@@ -1193,8 +1322,9 @@
   get_above_and_left_cctx_type(cm, xd, tx_size, &above_cctx, &left_cctx);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
   const int cctx_ctx = get_cctx_context(xd, &above_cctx, &left_cctx);
-  cctx_type = aom_read_symbol(
-      r, ec_ctx->cctx_type_cdf[square_tx_size][cctx_ctx], CCTX_TYPES, ACCT_STR);
+  cctx_type =
+      aom_read_symbol(r, ec_ctx->cctx_type_cdf[square_tx_size][cctx_ctx],
+                      CCTX_TYPES, ACCT_INFO("cctx_type"));
   update_cctx_array(xd, blk_row, blk_col, row_offset, col_offset, tx_size,
                     cctx_type);
 }
@@ -1222,8 +1352,9 @@
     const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
     if (!inter_block) {
       if (block_signals_sec_tx_type(xd, tx_size, *tx_type, *eob)) {
-        const uint8_t stx_flag = aom_read_symbol(
-            r, ec_ctx->stx_cdf[square_tx_size], STX_TYPES, ACCT_STR);
+        const uint8_t stx_flag =
+            aom_read_symbol(r, ec_ctx->stx_cdf[square_tx_size], STX_TYPES,
+                            ACCT_INFO("stx_flag"));
         *tx_type |= (stx_flag << 4);
       }
     }
@@ -1232,7 +1363,7 @@
     const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
     if (block_signals_sec_tx_type(xd, tx_size, *tx_type, *eob)) {
       const uint8_t stx_flag = aom_read_symbol(
-          r, ec_ctx->stx_cdf[square_tx_size], STX_TYPES, ACCT_STR);
+          r, ec_ctx->stx_cdf[square_tx_size], STX_TYPES, ACCT_INFO("stx_flag"));
       *tx_type |= (stx_flag << 4);
     }
   }
@@ -1258,12 +1389,12 @@
                             const int_mv *ref_mv, int mi_row, int mi_col,
                             BLOCK_SIZE bsize, aom_reader *r) {
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   const MB_MODE_INFO *const mbmi = xd->mi[0];
   if (mbmi->intrabc_mode == 1) {
     mv->as_int = ref_mv->as_int;
   } else {
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 #if CONFIG_FLEX_MVRES
     read_mv(r, &mv->as_mv, ref_mv->as_mv,
 #if CONFIG_ADAPTIVE_MVD
@@ -1278,9 +1409,9 @@
           &ec_ctx->ndvc, MV_SUBPEL_NONE);
 #endif
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
   // DV should not have sub-pel.
   assert((mv->as_mv.col & 7) == 0);
   assert((mv->as_mv.row & 7) == 0);
@@ -1292,21 +1423,21 @@
   return valid;
 }
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
 static void read_intrabc_drl_idx(int max_ref_bv_cnt, FRAME_CONTEXT *ec_ctx,
                                  MB_MODE_INFO *mbmi, aom_reader *r) {
   mbmi->intrabc_drl_idx = 0;
   int bit_cnt = 0;
   for (int idx = 0; idx < max_ref_bv_cnt - 1; ++idx) {
-    const int intrabc_drl_idx =
-        aom_read_symbol(r, ec_ctx->intrabc_drl_idx_cdf[bit_cnt], 2, ACCT_STR);
+    const int intrabc_drl_idx = aom_read_symbol(
+        r, ec_ctx->intrabc_drl_idx_cdf[bit_cnt], 2, ACCT_INFO());
     mbmi->intrabc_drl_idx = idx + intrabc_drl_idx;
     if (!intrabc_drl_idx) break;
     ++bit_cnt;
   }
   assert(mbmi->intrabc_drl_idx < max_ref_bv_cnt);
 }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 static void read_intrabc_info(AV1_COMMON *const cm, DecoderCodingBlock *dcb,
                               aom_reader *r) {
@@ -1314,16 +1445,18 @@
   MB_MODE_INFO *const mbmi = xd->mi[0];
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   assert(xd->tree_type != CHROMA_PART);
+#if !CONFIG_SKIP_TXFM_OPT
 #if CONFIG_NEW_CONTEXT_MODELING
   mbmi->use_intrabc[0] = 0;
   mbmi->use_intrabc[1] = 0;
   const int intrabc_ctx = get_intrabc_ctx(xd);
   mbmi->use_intrabc[xd->tree_type == CHROMA_PART] =
-      aom_read_symbol(r, ec_ctx->intrabc_cdf[intrabc_ctx], 2, ACCT_STR);
+      aom_read_symbol(r, ec_ctx->intrabc_cdf[intrabc_ctx], 2, ACCT_INFO());
 #else
   mbmi->use_intrabc[xd->tree_type == CHROMA_PART] =
-      aom_read_symbol(r, ec_ctx->intrabc_cdf, 2, ACCT_STR);
+      aom_read_symbol(r, ec_ctx->intrabc_cdf, 2, ACCT_INFO());
 #endif  // CONFIG_NEW_CONTEXT_MODELING
+#endif  // !CONFIG_SKIP_TXFM_OPT
   if (xd->tree_type == CHROMA_PART)
     assert(mbmi->use_intrabc[PLANE_TYPE_UV] == 0);
   if (mbmi->use_intrabc[xd->tree_type == CHROMA_PART]) {
@@ -1344,6 +1477,10 @@
     set_most_probable_mv_precision(cm, mbmi, bsize);
 #endif
 
+#if CONFIG_REFINEMV
+    mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_BAWP
     mbmi->bawp_flag = 0;
 #endif
@@ -1354,7 +1491,7 @@
     // TODO(kslu): Rework av1_find_mv_refs to avoid having this big array
     // ref_mvs
     int_mv ref_mvs[INTRA_FRAME + 1][MAX_MV_REF_CANDIDATES];
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
     for (int i = 0; i < MAX_REF_BV_STACK_SIZE; ++i) {
       xd->ref_mv_stack[INTRA_FRAME][i].this_mv.as_int = 0;
       xd->ref_mv_stack[INTRA_FRAME][i].comp_mv.as_int = 0;
@@ -1362,8 +1499,11 @@
       xd->ref_mv_stack[INTRA_FRAME][i].row_offset = OFFSET_NONSPATIAL;
       xd->ref_mv_stack[INTRA_FRAME][i].col_offset = OFFSET_NONSPATIAL;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
+#if CONFIG_CWP
+      xd->ref_mv_stack[INTRA_FRAME][i].cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
     }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
     av1_find_mv_refs(cm, xd, mbmi, INTRA_FRAME, dcb->ref_mv_count,
                      xd->ref_mv_stack, xd->weight, ref_mvs, /*global_mvs=*/NULL
@@ -1378,9 +1518,9 @@
 
     );
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
     mbmi->intrabc_mode =
-        aom_read_symbol(r, ec_ctx->intrabc_mode_cdf, 2, ACCT_STR);
+        aom_read_symbol(r, ec_ctx->intrabc_mode_cdf, 2, ACCT_INFO());
     read_intrabc_drl_idx(MAX_REF_BV_STACK_SIZE, ec_ctx, mbmi, r);
     int_mv dv_ref =
         xd->ref_mv_stack[INTRA_FRAME][mbmi->intrabc_drl_idx].this_mv;
@@ -1396,7 +1536,7 @@
     av1_find_best_ref_mvs(0, ref_mvs[INTRA_FRAME], &nearestmv, &nearmv, 0);
 #endif
     int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
     if (dv_ref.as_int == 0)
       av1_find_ref_dv(&dv_ref, &xd->tile, cm->mib_size, xd->mi_row);
     // Ref DV should not have sub-pel.
@@ -1467,14 +1607,17 @@
   uint8_t mode_idx = 0;
   const int context = get_y_mode_idx_ctx(xd);
   int mode_set_index =
-      aom_read_symbol(r, ec_ctx->y_mode_set_cdf, INTRA_MODE_SETS, ACCT_STR);
+      aom_read_symbol(r, ec_ctx->y_mode_set_cdf, INTRA_MODE_SETS,
+                      ACCT_INFO("mode_set_index", "y_mode_set_cdf"));
   if (mode_set_index == 0) {
-    mode_idx = aom_read_symbol(r, ec_ctx->y_mode_idx_cdf_0[context],
-                               FIRST_MODE_COUNT, ACCT_STR);
+    mode_idx =
+        aom_read_symbol(r, ec_ctx->y_mode_idx_cdf_0[context], FIRST_MODE_COUNT,
+                        ACCT_INFO("mode_idx", "y_mode_idx_cdf_0"));
   } else {
-    mode_idx = FIRST_MODE_COUNT + (mode_set_index - 1) * SECOND_MODE_COUNT +
-               aom_read_symbol(r, ec_ctx->y_mode_idx_cdf_1[context],
-                               SECOND_MODE_COUNT, ACCT_STR);
+    mode_idx =
+        FIRST_MODE_COUNT + (mode_set_index - 1) * SECOND_MODE_COUNT +
+        aom_read_symbol(r, ec_ctx->y_mode_idx_cdf_1[context], SECOND_MODE_COUNT,
+                        ACCT_INFO("mode_idx", "y_mode_idx_cdf_1"));
   }
   assert(mode_idx < LUMA_MODE_COUNT);
   get_y_intra_mode_set(mbmi, xd);
@@ -1493,7 +1636,7 @@
   const int context = av1_is_directional_mode(mbmi->mode) ? 1 : 0;
   const int uv_mode_idx =
       aom_read_symbol(r, ec_ctx->uv_mode_cdf[cfl_allowed][context],
-                      UV_INTRA_MODES - !cfl_allowed, ACCT_STR);
+                      UV_INTRA_MODES - !cfl_allowed, ACCT_INFO("uv_mode_idx"));
   assert(uv_mode_idx >= 0 && uv_mode_idx < UV_INTRA_MODES);
   get_uv_intra_mode_set(mbmi);
   mbmi->uv_mode = mbmi->uv_intra_mode_list[uv_mode_idx];
@@ -1520,8 +1663,30 @@
   mbmi->skip_mode = 0;
 #endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
+#if CONFIG_SKIP_TXFM_OPT
+  if (av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) {
+#if CONFIG_NEW_CONTEXT_MODELING
+    mbmi->use_intrabc[0] = 0;
+    mbmi->use_intrabc[1] = 0;
+    const int intrabc_ctx = get_intrabc_ctx(xd);
+    mbmi->use_intrabc[xd->tree_type == CHROMA_PART] =
+        aom_read_symbol(r, ec_ctx->intrabc_cdf[intrabc_ctx], 2,
+                        ACCT_INFO("use_intrabc", "chroma"));
+#else
+    mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = aom_read_symbol(
+        r, ec_ctx->intrabc_cdf, 2, ACCT_INFO("use_intrabc", "chroma"));
+#endif  // CONFIG_NEW_CONTEXT_MODELING
+  }
+  if (is_intrabc_block(mbmi, xd->tree_type)) {
+    mbmi->skip_txfm[xd->tree_type == CHROMA_PART] =
+        read_skip_txfm(cm, xd, mbmi->segment_id, r);
+  } else {
+    mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0;
+  }
+#else
   mbmi->skip_txfm[xd->tree_type == CHROMA_PART] =
       read_skip_txfm(cm, xd, mbmi->segment_id, r);
+#endif  // CONFIG_SKIP_TXFM_OPT
 
   if (!seg->segid_preskip)
     mbmi->segment_id = read_intra_segment_id(
@@ -1590,7 +1755,11 @@
 
     mbmi->mrl_index =
         (cm->seq_params.enable_mrls && av1_is_directional_mode(mbmi->mode))
+#if CONFIG_EXT_DIR
+            ? read_mrl_index(ec_ctx, r, xd->neighbors[0], xd->neighbors[1])
+#else
             ? read_mrl_index(ec_ctx, r)
+#endif  // CONFIG_EXT_DIR
             : 0;
   }
 
@@ -1649,13 +1818,13 @@
 static int read_mv_component_low_precision(aom_reader *r, nmv_component *mvcomp,
                                            MvSubpelPrecision precision) {
   int offset, mag;
-  const int sign = aom_read_symbol(r, mvcomp->sign_cdf, 2, ACCT_STR);
+  const int sign = aom_read_symbol(r, mvcomp->sign_cdf, 2, ACCT_INFO("sign"));
   const int num_mv_classes = MV_CLASSES - (precision <= MV_PRECISION_FOUR_PEL) -
                              (precision <= MV_PRECISION_8_PEL);
 
   int mv_class = aom_read_symbol(
       r, mvcomp->classes_cdf[av1_get_mv_class_context(precision)],
-      num_mv_classes, ACCT_STR);
+      num_mv_classes, ACCT_INFO("mv_class"));
 
   if (precision <= MV_PRECISION_FOUR_PEL && mv_class >= MV_CLASS_1)
     mv_class += (precision == MV_PRECISION_FOUR_PEL ? 1 : 2);
@@ -1673,7 +1842,8 @@
     const int n = (mv_class == MV_CLASS_0) ? 1 : mv_class;
     offset = 0;
     for (int i = start_lsb; i < n; ++i)
-      offset |= aom_read_symbol(r, mvcomp->bits_cdf[i], 2, ACCT_STR) << i;
+      offset |= aom_read_symbol(r, mvcomp->bits_cdf[i], 2, ACCT_INFO("offset"))
+                << i;
     const int base = mv_class ? (1 << mv_class) : 0;
     mag = (offset + base);  // int mv data
   }
@@ -1705,19 +1875,21 @@
 #endif
 
   int mag, d, fr, hp;
-  const int sign = aom_read_symbol(r, mvcomp->sign_cdf, 2, ACCT_STR);
+  const int sign = aom_read_symbol(r, mvcomp->sign_cdf, 2, ACCT_INFO("sign"));
   const int mv_class =
 #if CONFIG_ADAPTIVE_MVD
       is_adaptive_mvd
-          ? aom_read_symbol(r, mvcomp->amvd_classes_cdf, MV_CLASSES, ACCT_STR)
+          ? aom_read_symbol(r, mvcomp->amvd_classes_cdf, MV_CLASSES,
+                            ACCT_INFO("mv_class", "amvd_classes_cdf"))
           :
 #endif  // CONFIG_ADAPTIVE_MVD
 #if CONFIG_FLEX_MVRES
           aom_read_symbol(
               r, mvcomp->classes_cdf[av1_get_mv_class_context(precision)],
-              MV_CLASSES, ACCT_STR);
+              MV_CLASSES, ACCT_INFO("mv_class", "classes_cdf"));
 #else
-      aom_read_symbol(r, mvcomp->classes_cdf, MV_CLASSES, ACCT_STR);
+      aom_read_symbol(r, mvcomp->classes_cdf, MV_CLASSES,
+                      ACCT_INFO("mv_class", "classes_cdf"));
 #endif
 
   const int class0 = mv_class == MV_CLASS_0;
@@ -1729,13 +1901,15 @@
 #endif  // CONFIG_ADAPTIVE_MVD
     // Integer part
     if (class0) {
-      d = aom_read_symbol(r, mvcomp->class0_cdf, CLASS0_SIZE, ACCT_STR);
+      d = aom_read_symbol(r, mvcomp->class0_cdf, CLASS0_SIZE,
+                          ACCT_INFO("class0_cdf"));
       mag = 0;
     } else {
       const int n = mv_class + CLASS0_BITS - 1;  // number of bits
       d = 0;
       for (int i = 0; i < n; ++i)
-        d |= aom_read_symbol(r, mvcomp->bits_cdf[i], 2, ACCT_STR) << i;
+        d |= aom_read_symbol(r, mvcomp->bits_cdf[i], 2, ACCT_INFO("bits_cdf"))
+             << i;
       mag = CLASS0_SIZE << (mv_class + 2);
     }
 #if CONFIG_ADAPTIVE_MVD
@@ -1771,17 +1945,19 @@
 #if CONFIG_FLEX_MVRES
     fr = aom_read_symbol(
              r, class0 ? mvcomp->class0_fp_cdf[d][0] : mvcomp->fp_cdf[0], 2,
-             ACCT_STR)
+             ACCT_INFO("class0_fp_cdf"))
          << 1;
-    fr += precision > MV_PRECISION_HALF_PEL
-              ? aom_read_symbol(r,
-                                class0 ? mvcomp->class0_fp_cdf[d][1 + (fr >> 1)]
-                                       : mvcomp->fp_cdf[1 + (fr >> 1)],
-                                2, ACCT_STR)
-              : 1;
+    fr +=
+        precision > MV_PRECISION_HALF_PEL
+            ? aom_read_symbol(r,
+                              class0 ? mvcomp->class0_fp_cdf[d][1 + (fr >> 1)]
+                                     : mvcomp->fp_cdf[1 + (fr >> 1)],
+                              2, ACCT_INFO(class0 ? "class0_fp_cdf" : "fp_cdf"))
+            : 1;
 #else
     fr = aom_read_symbol(r, class0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf,
-                         MV_FP_SIZE, ACCT_STR);
+                         MV_FP_SIZE,
+                         ACCT_INFO(class0 ? "class0_fp_cdf" : "fp_cdf"));
 #endif  // CONFIG_FLEX_MVRES
 
 #if CONFIG_FLEX_MVRES
@@ -1790,9 +1966,9 @@
 #else
     hp = usehp
 #endif
-             ? aom_read_symbol(r,
-                               class0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf,
-                               2, ACCT_STR)
+             ? aom_read_symbol(
+                   r, class0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf, 2,
+                   ACCT_INFO(class0 ? "class0_hp_cdf" : "hp_cdf"))
              : 1;
   } else {
     fr = 3;
@@ -1825,12 +2001,14 @@
 #endif  // IMPROVED_AMVD && CONFIG_JOINT_MVD
   const MV_JOINT_TYPE joint_type =
 #if CONFIG_ADAPTIVE_MVD
-      is_adaptive_mvd ? (MV_JOINT_TYPE)aom_read_symbol(r, ctx->amvd_joints_cdf,
-                                                       MV_JOINTS, ACCT_STR)
-                      :
+      is_adaptive_mvd
+          ? (MV_JOINT_TYPE)aom_read_symbol(
+                r, ctx->amvd_joints_cdf, MV_JOINTS,
+                ACCT_INFO("joint_type", "amvd_joints_cdf"))
+          :
 #endif  // CONFIG_ADAPTIVE_MVD
-                      (MV_JOINT_TYPE)aom_read_symbol(r, ctx->joints_cdf,
-                                                     MV_JOINTS, ACCT_STR);
+          (MV_JOINT_TYPE)aom_read_symbol(r, ctx->joints_cdf, MV_JOINTS,
+                                         ACCT_INFO("joint_type", "joints_cdf"));
   if (mv_joint_vertical(joint_type))
     diff.row = read_mv_component(r, &ctx->comps[0],
 #if CONFIG_ADAPTIVE_MVD
@@ -1879,7 +2057,7 @@
   if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
     const int ctx = av1_get_reference_mode_context(cm, xd);
     const REFERENCE_MODE mode = (REFERENCE_MODE)aom_read_symbol(
-        r, xd->tile_ctx->comp_inter_cdf[ctx], 2, ACCT_STR);
+        r, xd->tile_ctx->comp_inter_cdf[ctx], 2, ACCT_INFO());
     return mode;  // SINGLE_REFERENCE or COMPOUND_REFERENCE
   } else {
     assert(cm->current_frame.reference_mode == SINGLE_REFERENCE);
@@ -1893,7 +2071,7 @@
   const int n_refs = ref_frames_info->num_total_refs;
   for (int i = 0; i < n_refs - 1; i++) {
     const int bit = aom_read_symbol(
-        r, av1_get_pred_cdf_single_ref(xd, i, n_refs), 2, ACCT_STR);
+        r, av1_get_pred_cdf_single_ref(xd, i, n_refs), 2, ACCT_INFO());
     if (bit) {
       ref_frame[0] = i;
       return;
@@ -1924,7 +2102,7 @@
                         : aom_read_symbol(r,
                                           av1_get_pred_cdf_compound_ref(
                                               xd, i, n_bits, bit_type, n_refs),
-                                          2, ACCT_STR);
+                                          2, ACCT_INFO());
     if (bit) {
       ref_frame[n_bits++] = i;
 #if CONFIG_ALLOW_SAME_REF_COMPOUND
@@ -1968,7 +2146,8 @@
       is_tip_allowed_bsize(bsize)) {
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
     const int tip_ctx = get_tip_ctx(xd);
-    if (aom_read_symbol(r, xd->tile_ctx->tip_cdf[tip_ctx], 2, ACCT_STR)) {
+    if (aom_read_symbol(r, xd->tile_ctx->tip_cdf[tip_ctx], 2,
+                        ACCT_INFO("tip_cdf"))) {
       ref_frame[0] = TIP_FRAME;
     }
   }
@@ -2015,7 +2194,8 @@
   } else {
     const int ctx = av1_get_pred_context_switchable_interp(xd, 0);
     const InterpFilter filter = (InterpFilter)aom_read_symbol(
-        r, ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS, ACCT_STR);
+        r, ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS,
+        ACCT_INFO("switchable_interp_cdf"));
     mbmi->interp_fltr = filter;
   }
 }
@@ -2040,6 +2220,10 @@
   mbmi->bawp_flag = 0;
 #endif
 
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
 
 #if CONFIG_AIMC
@@ -2074,7 +2258,11 @@
     // Parsing reference line index
     mbmi->mrl_index =
         (cm->seq_params.enable_mrls && av1_is_directional_mode(mbmi->mode))
+#if CONFIG_EXT_DIR
+            ? read_mrl_index(ec_ctx, r, xd->neighbors[0], xd->neighbors[1])
+#else
             ? read_mrl_index(ec_ctx, r)
+#endif  // CONFIG_EXT_DIR
             : 0;
 
   if (!cm->seq_params.monochrome && xd->is_chroma_ref) {
@@ -2153,6 +2341,9 @@
     allow_hp = MV_SUBPEL_NONE;
   }
 #endif
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  (void)ref_warp_model;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #if CONFIG_JOINT_MVD
   int first_ref_dist = 0;
   int sec_ref_dist = 0;
@@ -2202,6 +2393,28 @@
     }
 #if CONFIG_WARPMV
     case WARPMV: {
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      mbmi->mv[0] = ref_mv[0];
+      if (mbmi->warpmv_with_mvd_flag) {
+        nmv_context *const nmvc = &ec_ctx->nmvc;
+        read_mv(r, &mv[0].as_mv,
+#if CONFIG_FLEX_MVRES
+                ref_mv[0].as_mv,
+#else
+                &ref_mv[0].as_mv,
+#endif
+#if CONFIG_ADAPTIVE_MVD
+                is_adaptive_mvd,
+#endif  // CONFIG_ADAPTIVE_MVD
+                nmvc,
+#if CONFIG_FLEX_MVRES
+                precision);
+#else
+                allow_hp);
+#endif
+      }
+
+#else
       assert(ref_warp_model);
       mbmi->mv[0] = get_mv_from_wrl(xd, ref_warp_model,
 
@@ -2211,6 +2424,8 @@
                                     1, 0,
 #endif
                                     bsize, xd->mi_col, xd->mi_row);
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
       break;
     }
 #endif  // CONFIG_WARPMV
@@ -2437,10 +2652,10 @@
 
 static int read_is_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd,
                                int segment_id, aom_reader *r
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
                                ,
                                const int skip_txfm
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
 ) {
   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
     return 1;
@@ -2448,11 +2663,12 @@
   const int ctx = av1_get_intra_inter_context(xd);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
   const int is_inter =
-#if CONFIG_CONTEXT_DERIVATION
-      aom_read_symbol(r, ec_ctx->intra_inter_cdf[skip_txfm][ctx], 2, ACCT_STR);
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
+      aom_read_symbol(r, ec_ctx->intra_inter_cdf[skip_txfm][ctx], 2,
+                      ACCT_INFO());
 #else
-      aom_read_symbol(r, ec_ctx->intra_inter_cdf[ctx], 2, ACCT_STR);
-#endif  // CONFIG_CONTEXT_DERIVATION
+      aom_read_symbol(r, ec_ctx->intra_inter_cdf[ctx], 2, ACCT_INFO());
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   return is_inter;
 }
 
@@ -2489,6 +2705,22 @@
 }
 #endif  // DEC_MISMATCH_DEBUG
 
+#if CONFIG_REFINEMV
+// This function read the refinemv_flag ( if require) from the bitstream
+static void read_refinemv_flag(AV1_COMMON *const cm, MACROBLOCKD *xd,
+                               aom_reader *r, BLOCK_SIZE bsize) {
+  MB_MODE_INFO *const mbmi = xd->mi[0];
+  mbmi->refinemv_flag = get_default_refinemv_flag(cm, mbmi);
+  int signal_refinemv = switchable_refinemv_flag(cm, mbmi);
+  if (signal_refinemv) {
+    const int refinemv_ctx = av1_get_refinemv_context(cm, xd, bsize);
+    mbmi->refinemv_flag =
+        aom_read_symbol(r, xd->tile_ctx->refinemv_flag_cdf[refinemv_ctx],
+                        REFINEMV_NUM_MODES, ACCT_INFO("refinemv_flag"));
+  }
+}
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_FLEX_MVRES
 MvSubpelPrecision av1_read_pb_mv_precision(AV1_COMMON *const cm,
                                            MACROBLOCKD *const xd,
@@ -2505,8 +2737,9 @@
          cm->features.most_probable_fr_mv_precision);
 
   const int mpp_flag_context = av1_get_mpp_flag_context(cm, xd);
-  const int mpp_flag = aom_read_symbol(
-      r, xd->tile_ctx->pb_mv_mpp_flag_cdf[mpp_flag_context], 2, ACCT_STR);
+  const int mpp_flag =
+      aom_read_symbol(r, xd->tile_ctx->pb_mv_mpp_flag_cdf[mpp_flag_context], 2,
+                      ACCT_INFO("mpp_flag"));
   if (mpp_flag) return mbmi->most_probable_pb_mv_precision;
   const PRECISION_SET *precision_def =
       &av1_mv_precision_sets[mbmi->mb_precision_set];
@@ -2515,7 +2748,7 @@
       r,
       xd->tile_ctx->pb_mv_precision_cdf[down_ctx]
                                        [max_precision - MV_PRECISION_HALF_PEL],
-      nsymbs, ACCT_STR);
+      nsymbs, ACCT_INFO("down"));
   return av1_get_precision_from_index(mbmi, down);
 }
 #endif  //  CONFIG_FLEX_MVRES
@@ -2561,6 +2794,10 @@
   mbmi->bawp_flag = 0;
 #endif
 
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
   av1_collect_neighbors_ref_counts(xd);
 
   read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
@@ -2573,6 +2810,7 @@
                                xd->valid_num_warp_candidates);
 #endif  // CONFIG_WARP_REF_LIST
 
+#if !CONFIG_SEP_COMP_DRL
   av1_find_mv_refs(
       cm, xd, mbmi, ref_frame, dcb->ref_mv_count, xd->ref_mv_stack, xd->weight,
       ref_mvs, /*global_mvs=*/NULL
@@ -2586,32 +2824,42 @@
       ref_frame < INTER_REFS_PER_FRAME ? MAX_WARP_REF_CANDIDATES : 0,
       xd->valid_num_warp_candidates
 #endif  // CONFIG_WARP_REF_LIST
-
   );
+#endif  // !CONFIG_SEP_COMP_DRL
 
 #if CONFIG_C076_INTER_MOD_CTX
   av1_find_mode_ctx(cm, xd, inter_mode_ctx, ref_frame);
 #endif  // CONFIG_C076_INTER_MOD_CTX
 
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = 0;
+  mbmi->ref_mv_idx[1] = 0;
+#else
   mbmi->ref_mv_idx = 0;
+#endif  // CONFIG_SEP_COMP_DRL
+
+#if CONFIG_CWP
+  mbmi->cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
+#if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
+  mbmi->jmvd_scale_mode = 0;
+#endif  // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
+
 #if CONFIG_WARP_REF_LIST
   mbmi->warp_ref_idx = 0;
   mbmi->max_num_warp_candidates = 0;
 #endif  // CONFIG_WARP_REF_LIST
 
 #if CONFIG_WARPMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
   mbmi->motion_mode = SIMPLE_TRANSLATION;
   WARP_CANDIDATE warp_param_stack[MAX_WARP_REF_CANDIDATES];
-  WarpedMotionParams ref_warp_model;
+  WarpedMotionParams ref_warp_model = default_warp_params;
 #endif  // CONFIG_WARPMV
   if (mbmi->skip_mode) {
     assert(is_compound);
-#if CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT
-    mbmi->mode =
-        (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV);
-#else
-    mbmi->mode = NEAR_NEARMV;
-#endif  // CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT
 
 #if CONFIG_SKIP_MODE_ENHANCEMENT
     read_drl_idx(cm->features.max_drl_bits,
@@ -2619,12 +2867,52 @@
                  ec_ctx, dcb, mbmi, r);
 #endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SEP_COMP_DRL
+    av1_find_mv_refs(
+        cm, xd, mbmi, ref_frame, dcb->ref_mv_count, xd->ref_mv_stack,
+        xd->weight, ref_mvs, /*global_mvs=*/NULL
+#if !CONFIG_C076_INTER_MOD_CTX
+        ,
+        inter_mode_ctx
+#endif  // !CONFIG_C076_INTER_MOD_CTX
+#if CONFIG_WARP_REF_LIST
+        ,
+        xd->warp_param_stack,
+        ref_frame < SINGLE_REF_FRAMES ? MAX_WARP_REF_CANDIDATES : 0,
+        xd->valid_num_warp_candidates
+#endif  // CONFIG_WARP_REF_LIST
+    );
+#endif  // CONFIG_SEP_COMP_DRL
+
+#if CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_SEP_COMP_DRL
+    mbmi->ref_frame[0] =
+        xd->skip_mvp_candidate_list.ref_frame0[get_ref_mv_idx(mbmi, 0)];
+    mbmi->ref_frame[1] =
+        xd->skip_mvp_candidate_list.ref_frame1[get_ref_mv_idx(mbmi, 1)];
+#else
     mbmi->ref_frame[0] =
         xd->skip_mvp_candidate_list.ref_frame0[mbmi->ref_mv_idx];
     mbmi->ref_frame[1] =
         xd->skip_mvp_candidate_list.ref_frame1[mbmi->ref_mv_idx];
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+
+#if CONFIG_REFINEMV && !CONFIG_CWP
+    mbmi->refinemv_flag = get_default_refinemv_flag(cm, mbmi);
+#endif  // CONFIG_REFINEMV
+
+#if CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT
+    mbmi->mode = (cm->features.opfl_refine_type
+#if CONFIG_CWP
+                          && !cm->features.enable_cwp
+#endif  // CONFIG_CWP
+                      ? NEAR_NEARMV_OPTFLOW
+                      : NEAR_NEARMV);
+#else
+    mbmi->mode = NEAR_NEARMV;
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT
+
   } else {
     if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) ||
         segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_GLOBALMV)) {
@@ -2646,12 +2934,29 @@
 #endif  // CONFIG_WARPMV
         );
 
+#if CONFIG_SEP_COMP_DRL
+      av1_find_mv_refs(
+          cm, xd, mbmi, ref_frame, dcb->ref_mv_count, xd->ref_mv_stack,
+          xd->weight, ref_mvs, /*global_mvs=*/NULL
+#if !CONFIG_C076_INTER_MOD_CTX
+          ,
+          inter_mode_ctx
+#endif  // !CONFIG_C076_INTER_MOD_CTX
+#if CONFIG_WARP_REF_LIST
+          ,
+          xd->warp_param_stack,
+          ref_frame < SINGLE_REF_FRAMES ? MAX_WARP_REF_CANDIDATES : 0,
+          xd->valid_num_warp_candidates
+#endif  // CONFIG_WARP_REF_LIST
+      );
+#endif  // CONFIG_SEP_COMP_DRL
+
 #if CONFIG_WARPMV
 #if CONFIG_BAWP
       if (cm->features.enable_bawp &&
           av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col)) {
-        mbmi->bawp_flag =
-            aom_read_symbol(r, xd->tile_ctx->bawp_cdf, 2, ACCT_STR);
+        mbmi->bawp_flag = aom_read_symbol(r, xd->tile_ctx->bawp_cdf, 2,
+                                          ACCT_INFO("bawp_flag"));
       }
 #endif
 
@@ -2671,12 +2976,15 @@
       av1_count_overlappable_neighbors(cm, xd);
       mbmi->motion_mode = read_motion_mode(cm, xd, mbmi, r);
       int is_warpmv_warp_causal =
-          (mbmi->motion_mode == WARPED_CAUSAL && mbmi->mode == WARPMV);
+          ((mbmi->motion_mode == WARPED_CAUSAL) && mbmi->mode == WARPMV);
       if (mbmi->motion_mode == WARP_DELTA || is_warpmv_warp_causal) {
-        mbmi->max_num_warp_candidates =
-            (mbmi->mode == GLOBALMV || mbmi->mode == NEARMV)
-                ? 1
-                : MAX_WARP_REF_CANDIDATES;
+        mbmi->max_num_warp_candidates = (mbmi->mode == GLOBALMV ||
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                                         mbmi->mode == AMVDNEWMV ||
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+                                         mbmi->mode == NEARMV)
+                                            ? 1
+                                            : MAX_WARP_REF_CANDIDATES;
         if (is_warpmv_warp_causal) {
           mbmi->max_num_warp_candidates = MAX_WARP_REF_CANDIDATES;
         }
@@ -2691,6 +2999,14 @@
       }
 #endif  // CONFIG_WARPMV
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      if (allow_warpmv_with_mvd_coding(cm, mbmi)) {
+        read_warpmv_with_mvd_flag(xd->tile_ctx, mbmi, r);
+      } else {
+        mbmi->warpmv_with_mvd_flag = 0;
+      }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
 #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
       mbmi->jmvd_scale_mode = read_jmvd_scale_mode(xd, r, mbmi);
 #endif  // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
@@ -2729,23 +3045,68 @@
                        mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   }
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  if (mbmi->mode == WARPMV) {
+    ref_mv[0] = get_mv_from_wrl(xd, &ref_warp_model,
+                                !mbmi->warpmv_with_mvd_flag
+                                    ? MV_PRECISION_ONE_EIGHTH_PEL
+                                    : mbmi->pb_mv_precision,
+                                bsize, xd->mi_col, xd->mi_row);
+
+  } else {
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+#if CONFIG_SEP_COMP_DRL
+    if (has_second_drl(mbmi))
+      ref_mv[0] =
+          xd->ref_mv_stack[mbmi->ref_frame[0]][get_ref_mv_idx(mbmi, 0)].this_mv;
+    else
+      ref_mv[0] = xd->ref_mv_stack[ref_frame][get_ref_mv_idx(mbmi, 0)].this_mv;
+#else
   ref_mv[0] = xd->ref_mv_stack[ref_frame][mbmi->ref_mv_idx].this_mv;
+#endif  // CONFIG_SEP_COMP_DRL
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
   if (is_compound && mbmi->mode != GLOBAL_GLOBALMV) {
+#if CONFIG_SEP_COMP_DRL
+    if (has_second_drl(mbmi))
+      ref_mv[1] =
+          xd->ref_mv_stack[mbmi->ref_frame[1]][get_ref_mv_idx(mbmi, 1)].this_mv;
+    else
+      ref_mv[1] = xd->ref_mv_stack[ref_frame][get_ref_mv_idx(mbmi, 1)].comp_mv;
+#else
     ref_mv[1] = xd->ref_mv_stack[ref_frame][mbmi->ref_mv_idx].comp_mv;
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     if (mbmi->skip_mode) {
+#if CONFIG_SEP_COMP_DRL
+      ref_mv[0] =
+          xd->skip_mvp_candidate_list.ref_mv_stack[get_ref_mv_idx(mbmi, 0)]
+              .this_mv;
+      ref_mv[1] =
+          xd->skip_mvp_candidate_list.ref_mv_stack[get_ref_mv_idx(mbmi, 1)]
+              .comp_mv;
+#else
       ref_mv[0] =
           xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].this_mv;
       ref_mv[1] =
           xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].comp_mv;
+#endif  // CONFIG_SEP_COMP_DRL
     }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
   }
 
   if (mbmi->skip_mode) {
 #if CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_CWP
+    assert(mbmi->mode ==
+           (cm->features.opfl_refine_type && !cm->features.enable_cwp
+                ? NEAR_NEARMV_OPTFLOW
+                : NEAR_NEARMV));
+#else   // CONFIG_CWP
     assert(mbmi->mode ==
            (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV));
+#endif  // CONFIG_CWP
 #else
     assert(mbmi->mode == NEAR_NEARMV);
 #endif  // CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT
@@ -2773,7 +3134,8 @@
 
 #if CONFIG_BAWP && !CONFIG_WARPMV
   if (cm->features.enable_bawp && av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col))
-    mbmi->bawp_flag = aom_read_symbol(r, xd->tile_ctx->bawp_cdf, 2, ACCT_STR);
+    mbmi->bawp_flag =
+        aom_read_symbol(r, xd->tile_ctx->bawp_cdf, 2, ACCT_INFO("bawp_flag"));
 #endif
 
 #if CONFIG_EXTENDED_WARP_PREDICTION
@@ -2812,8 +3174,8 @@
   if (cm->seq_params.enable_interintra_compound && !mbmi->skip_mode &&
       is_interintra_allowed(mbmi)) {
     const int bsize_group = size_group_lookup[bsize];
-    const int interintra =
-        aom_read_symbol(r, ec_ctx->interintra_cdf[bsize_group], 2, ACCT_STR);
+    const int interintra = aom_read_symbol(
+        r, ec_ctx->interintra_cdf[bsize_group], 2, ACCT_INFO("interintra"));
     assert(mbmi->ref_frame[1] == NONE_FRAME);
     if (interintra) {
       const INTERINTRA_MODE interintra_mode =
@@ -2824,15 +3186,17 @@
       mbmi->angle_delta[PLANE_TYPE_UV] = 0;
       mbmi->filter_intra_mode_info.use_filter_intra = 0;
       if (av1_is_wedge_used(bsize)) {
-        mbmi->use_wedge_interintra = aom_read_symbol(
-            r, ec_ctx->wedge_interintra_cdf[bsize], 2, ACCT_STR);
+        mbmi->use_wedge_interintra =
+            aom_read_symbol(r, ec_ctx->wedge_interintra_cdf[bsize], 2,
+                            ACCT_INFO("use_wedge_interintra"));
         if (mbmi->use_wedge_interintra) {
 #if CONFIG_WEDGE_MOD_EXT
           mbmi->interintra_wedge_index = read_wedge_mode(r, ec_ctx, bsize);
           assert(mbmi->interintra_wedge_index != -1);
 #else
           mbmi->interintra_wedge_index = (int8_t)aom_read_symbol(
-              r, ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES, ACCT_STR);
+              r, ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES,
+              ACCT_INFO("interintra_wedge_index"));
 #endif
         }
       }
@@ -2859,6 +3223,12 @@
     mbmi->motion_mode = read_motion_mode(cm, xd, mbmi, r);
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
+#if CONFIG_REFINEMV
+  if (!mbmi->skip_mode) {
+    read_refinemv_flag(cm, xd, r, bsize);
+  }
+#endif  // CONFIG_REFINEMV
+
   // init
   mbmi->comp_group_idx = 0;
   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
@@ -2867,6 +3237,9 @@
 #if CONFIG_OPTFLOW_REFINEMENT
       mbmi->mode < NEAR_NEARMV_OPTFLOW &&
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+      (!mbmi->refinemv_flag || !switchable_refinemv_flag(cm, mbmi)) &&
+#endif  // CONFIG_REFINEMV
 #if IMPROVED_AMVD && CONFIG_JOINT_MVD
       !is_joint_amvd_coding_mode(mbmi->mode) &&
 #endif  // IMPROVED_AMVD && CONFIG_JOINT_MVD
@@ -2878,7 +3251,8 @@
     if (masked_compound_used) {
       const int ctx_comp_group_idx = get_comp_group_idx_context(cm, xd);
       mbmi->comp_group_idx = (uint8_t)aom_read_symbol(
-          r, ec_ctx->comp_group_idx_cdf[ctx_comp_group_idx], 2, ACCT_STR);
+          r, ec_ctx->comp_group_idx_cdf[ctx_comp_group_idx], 2,
+          ACCT_INFO("comp_group_idx"));
     }
 
     if (mbmi->comp_group_idx == 0) {
@@ -2892,9 +3266,9 @@
       // compound_diffwtd, wedge
       if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
         mbmi->interinter_comp.type =
-            COMPOUND_WEDGE + aom_read_symbol(r,
-                                             ec_ctx->compound_type_cdf[bsize],
-                                             MASKED_COMPOUND_TYPES, ACCT_STR);
+            COMPOUND_WEDGE +
+            aom_read_symbol(r, ec_ctx->compound_type_cdf[bsize],
+                            MASKED_COMPOUND_TYPES, ACCT_INFO("comp_type"));
       } else {
         mbmi->interinter_comp.type = COMPOUND_DIFFWTD;
       }
@@ -2905,17 +3279,41 @@
         mbmi->interinter_comp.wedge_index = read_wedge_mode(r, ec_ctx, bsize);
         assert(mbmi->interinter_comp.wedge_index != -1);
 #else
-        mbmi->interinter_comp.wedge_index = (int8_t)aom_read_symbol(
-            r, ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES, ACCT_STR);
+        mbmi->interinter_comp.wedge_index =
+            (int8_t)aom_read_symbol(r, ec_ctx->wedge_idx_cdf[bsize],
+                                    MAX_WEDGE_TYPES, ACCT_INFO("wedge_index"));
 #endif  // CONFIG_WEDGE_MOD_EXT
-        mbmi->interinter_comp.wedge_sign = (int8_t)aom_read_bit(r, ACCT_STR);
+        mbmi->interinter_comp.wedge_sign =
+            (int8_t)aom_read_bit(r, ACCT_INFO("wedge_sign"));
       } else {
         assert(mbmi->interinter_comp.type == COMPOUND_DIFFWTD);
         mbmi->interinter_comp.mask_type =
-            aom_read_literal(r, MAX_DIFFWTD_MASK_BITS, ACCT_STR);
+            aom_read_literal(r, MAX_DIFFWTD_MASK_BITS, ACCT_INFO("mask_type"));
       }
     }
   }
+#if CONFIG_CWP && CONFIG_SKIP_MODE_ENHANCEMENT
+  mbmi->cwp_idx = CWP_EQUAL;
+  if (cm->features.enable_cwp) {
+    if (is_cwp_allowed(mbmi) && !mbmi->skip_mode)
+      mbmi->cwp_idx = read_cwp_idx(xd, r, cm, mbmi);
+    if (is_cwp_allowed(mbmi) && mbmi->skip_mode)
+      mbmi->cwp_idx =
+#if CONFIG_SEP_COMP_DRL
+          xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx[0]].cwp_idx;
+#else
+          xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].cwp_idx;
+#endif  // CONFIG_SEP_COMP_DRL
+  }
+#if CONFIG_REFINEMV
+  if (mbmi->skip_mode) {
+    mbmi->refinemv_flag =
+        (mbmi->cwp_idx == CWP_EQUAL && is_refinemv_allowed_skip_mode(cm, mbmi))
+            ? 1
+            : 0;
+  }
+#endif  // CONFIG_REFINEMV
+#endif  // CONFIG_CWP && CONFIG_SKIP_MODE_ENHANCEMENT
 
   read_mb_interp_filter(xd, features->interp_filter, cm, mbmi, r);
 
@@ -2946,7 +3344,12 @@
   }
 
   if (mbmi->motion_mode == WARP_EXTEND) {
+#if CONFIG_SEP_COMP_DRL
+    CANDIDATE_MV *neighbor =
+        &xd->ref_mv_stack[ref_frame][get_ref_mv_idx(mbmi, 0)];
+#else
     CANDIDATE_MV *neighbor = &xd->ref_mv_stack[ref_frame][mbmi->ref_mv_idx];
+#endif
     POSITION base_pos = { 0, 0 };
     if (!get_extend_base_pos(cm, xd, mbmi, neighbor->row_offset,
                              neighbor->col_offset, &base_pos)) {
@@ -3006,14 +3409,14 @@
 
   if (xd->tree_type != LUMA_PART) xd->cfl.store_y = store_cfl_required(cm, xd);
 
-#if CONFIG_REF_MV_BANK && !CONFIG_BVP_IMPROVEMENT
+#if CONFIG_REF_MV_BANK && !CONFIG_IBC_BV_IMPROVEMENT
 #if CONFIG_IBC_SR_EXT
   if (cm->seq_params.enable_refmvbank && !is_intrabc_block(mbmi, xd->tree_type))
 #else
   if (cm->seq_params.enable_refmvbank)
 #endif  // CONFIG_IBC_SR_EXT
     av1_update_ref_mv_bank(cm, xd, mbmi);
-#endif  // CONFIG_REF_MV_BANK && !CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_REF_MV_BANK && !CONFIG_IBC_BV_IMPROVEMENT
 
 #if DEC_MISMATCH_DEBUG
   dec_dump_logs(cm, mi, mi_row, mi_col, mode_ctx);
@@ -3045,10 +3448,70 @@
   mbmi->bawp_flag = 0;
 #endif
 
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
   mbmi->segment_id = read_inter_segment_id(cm, xd, 1, r);
 
   mbmi->skip_mode = read_skip_mode(cm, xd, mbmi->segment_id, r);
 
+  mbmi->fsc_mode[PLANE_TYPE_Y] = 0;
+  mbmi->fsc_mode[PLANE_TYPE_UV] = 0;
+
+#if CONFIG_CWP
+  mbmi->cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
+
+#if CONFIG_WARP_REF_LIST
+  mbmi->warp_ref_idx = 0;
+  mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+#endif  // CONFIG_WARP_REF_LIST
+#if CONFIG_NEW_CONTEXT_MODELING
+  mbmi->use_intrabc[0] = 0;
+  mbmi->use_intrabc[1] = 0;
+#endif  // CONFIG_NEW_CONTEXT_MODELING
+
+#if CONFIG_SKIP_TXFM_OPT
+  if (!mbmi->skip_mode) {
+    inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
+  }
+
+#if CONFIG_IBC_SR_EXT
+  if (!inter_block && av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) {
+#if CONFIG_NEW_CONTEXT_MODELING
+    mbmi->use_intrabc[0] = 0;
+    mbmi->use_intrabc[1] = 0;
+    const int intrabc_ctx = get_intrabc_ctx(xd);
+    mbmi->use_intrabc[xd->tree_type == CHROMA_PART] =
+        aom_read_symbol(r, xd->tile_ctx->intrabc_cdf[intrabc_ctx], 2,
+                        ACCT_INFO("use_intrabc", "chroma"));
+#else
+    mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = aom_read_symbol(
+        r, ec_ctx->intrabc_cdf, 2, ACCT_INFO("use_intrabc", "chroma"));
+#endif  // CONFIG_NEW_CONTEXT_MODELING
+  }
+#endif  // CONFIG_IBC_SR_EXT
+
+  if (inter_block
+#if CONFIG_IBC_SR_EXT
+      || (!inter_block && is_intrabc_block(mbmi, xd->tree_type))
+#endif  // CONFIG_IBC_SR_EXT
+  ) {
+#if !CONFIG_SKIP_MODE_ENHANCEMENT
+    if (mbmi->skip_mode)
+      mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 1;
+    else
+#endif  // !CONFIG_SKIP_MODE_ENHANCEMENT
+      mbmi->skip_txfm[xd->tree_type == CHROMA_PART] =
+          read_skip_txfm(cm, xd, mbmi->segment_id, r);
+  } else {
+    mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0;
+  }
+#else
 #if !CONFIG_SKIP_MODE_ENHANCEMENT
   if (mbmi->skip_mode)
     mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 1;
@@ -3056,17 +3519,8 @@
 #endif  // !CONFIG_SKIP_MODE_ENHANCEMENT
     mbmi->skip_txfm[xd->tree_type == CHROMA_PART] =
         read_skip_txfm(cm, xd, mbmi->segment_id, r);
+#endif  // CONFIG_SKIP_TXFM_OPT
 
-  mbmi->fsc_mode[PLANE_TYPE_Y] = 0;
-  mbmi->fsc_mode[PLANE_TYPE_UV] = 0;
-#if CONFIG_WARP_REF_LIST
-  mbmi->warp_ref_idx = 0;
-  mbmi->max_num_warp_candidates = 0;
-#endif  // CONFIG_WARP_REF_LIST
-#if CONFIG_NEW_CONTEXT_MODELING
-  mbmi->use_intrabc[0] = 0;
-  mbmi->use_intrabc[1] = 0;
-#endif  // CONFIG_NEW_CONTEXT_MODELING
   if (!cm->seg.segid_preskip)
     mbmi->segment_id = read_inter_segment_id(cm, xd, 0, r);
 
@@ -3078,6 +3532,7 @@
 
   read_delta_q_params(cm, xd, r);
 
+#if !CONFIG_SKIP_TXFM_OPT
   if (!mbmi->skip_mode)
     inter_block =
         read_is_inter_block(cm, xd, mbmi->segment_id, r
@@ -3086,6 +3541,7 @@
                             mbmi->skip_txfm[xd->tree_type == CHROMA_PART]
 #endif  // CONFIG_CONTEXT_DERIVATION
         );
+#endif  // !CONFIG_SKIP_TXFM_OPT
 
   mbmi->current_qindex = xd->current_base_qindex;
 
@@ -3100,6 +3556,17 @@
     mbmi->ref_frame[1] = NONE_FRAME;
     mbmi->palette_mode_info.palette_size[0] = 0;
     mbmi->palette_mode_info.palette_size[1] = 0;
+#if CONFIG_NEW_CONTEXT_MODELING
+    mbmi->use_intrabc[0] = 0;
+    mbmi->use_intrabc[1] = 0;
+    const int intrabc_ctx = get_intrabc_ctx(xd);
+    mbmi->use_intrabc[xd->tree_type == CHROMA_PART] =
+        aom_read_symbol(r, xd->tile_ctx->intrabc_cdf[intrabc_ctx], 2,
+                        ACCT_INFO("use_intrabc", "chroma"));
+#else
+    mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = aom_read_symbol(
+        r, xd->tile_ctx->intrabc_cdf, 2, ACCT_INFO("use_intrabc", "chroma"));
+#endif  // CONFIG_NEW_CONTEXT_MODELING
     read_intrabc_info(cm, dcb, r);
     if (is_intrabc_block(mbmi, xd->tree_type)) return;
   }
@@ -3160,31 +3627,33 @@
   MACROBLOCKD *const xd = &dcb->xd;
   MB_MODE_INFO *const mi = xd->mi[0];
   mi->use_intrabc[xd->tree_type == CHROMA_PART] = 0;
-
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  mi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
   if (xd->tree_type == SHARED_PART)
     mi->sb_type[PLANE_TYPE_UV] = mi->sb_type[PLANE_TYPE_Y];
 
   if (frame_is_intra_only(cm)) {
     read_intra_frame_mode_info(cm, dcb, r);
-#if CONFIG_BVP_IMPROVEMENT && CONFIG_REF_MV_BANK
+#if CONFIG_IBC_BV_IMPROVEMENT && CONFIG_REF_MV_BANK
     if (cm->seq_params.enable_refmvbank) {
       MB_MODE_INFO *const mbmi = xd->mi[0];
       if (is_intrabc_block(mbmi, xd->tree_type))
         av1_update_ref_mv_bank(cm, xd, mbmi);
     }
-#endif  // CONFIG_BVP_IMPROVEMENT && CONFIG_REF_MV_BANK
+#endif  // CONFIG_IBC_BV_IMPROVEMENT && CONFIG_REF_MV_BANK
     if (cm->seq_params.order_hint_info.enable_ref_frame_mvs)
       intra_copy_frame_mvs(cm, xd->mi_row, xd->mi_col, x_inside_boundary,
                            y_inside_boundary);
   } else {
     read_inter_frame_mode_info(pbi, dcb, r);
-#if CONFIG_BVP_IMPROVEMENT && CONFIG_REF_MV_BANK
+#if CONFIG_IBC_BV_IMPROVEMENT && CONFIG_REF_MV_BANK
     if (cm->seq_params.enable_refmvbank) {
       MB_MODE_INFO *const mbmi = xd->mi[0];
       if (is_inter_block(mbmi, xd->tree_type))
         av1_update_ref_mv_bank(cm, xd, mbmi);
     }
-#endif  // CONFIG_BVP_IMPROVEMENT && CONFIG_REF_MV_BANK
+#endif  // CONFIG_IBC_BV_IMPROVEMENT && CONFIG_REF_MV_BANK
 
 #if CONFIG_WARP_REF_LIST
     MB_MODE_INFO *const mbmi_tmp = xd->mi[0];
diff --git a/av1/decoder/decodemv.h b/av1/decoder/decodemv.h
index ac2e813..208f5c5 100644
--- a/av1/decoder/decodemv.h
+++ b/av1/decoder/decodemv.h
@@ -34,7 +34,12 @@
                           uint16_t *eob, aom_reader *r);
 
 void av1_read_tx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd, int blk_row,
-                      int blk_col, TX_SIZE tx_size, aom_reader *r);
+                      int blk_col, TX_SIZE tx_size, aom_reader *r
+#if CONFIG_ATC_DCTX_ALIGNED
+                      ,
+                      const int plane, const int eob, const int dc_skip
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+);
 
 #if CONFIG_CROSS_CHROMA_TX
 void av1_read_cctx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd,
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c
index 56b05f5..4bb23aa 100644
--- a/av1/decoder/decoder.c
+++ b/av1/decoder/decoder.c
@@ -486,8 +486,37 @@
       update_subgop_stats(cm, &pbi->subgop_stats, cm->cur_frame->order_hint,
                           pbi->enable_subgop_stats);
     }
-
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    if (cm->seq_params.order_hint_info.enable_order_hint &&
+        cm->seq_params.enable_frame_output_order && cm->show_frame &&
+        !cm->show_existing_frame) {
+      // Refresh the reference slots of output frames in the output queue.
+      if (pbi->num_output_frames > 0) {
+        decrease_ref_count(pbi->output_frames[0], pool);
+      }
+      // Add the currently decoded frame into the output queue.
+      pbi->output_frames[0] = cm->cur_frame;
+      pbi->num_output_frames = 1;
+      // Add the next frames (showable_frame == 1) into the output queue.
+      int successive_output = 1;
+      for (int k = 1; k <= REF_FRAMES && successive_output > 0; k++) {
+        unsigned int next_disp_order = cm->cur_frame->display_order_hint + k;
+        successive_output = 0;
+        for (int i = 0; i < REF_FRAMES; i++) {
+          if (cm->ref_frame_map[i]->display_order_hint == next_disp_order &&
+              cm->ref_frame_map[i]->showable_frame == 1) {
+            pbi->output_frames[k] = cm->ref_frame_map[i];
+            pbi->num_output_frames++;
+            successive_output++;
+          }
+        }
+      }
+    } else if ((!cm->seq_params.order_hint_info.enable_order_hint ||
+                !cm->seq_params.enable_frame_output_order) &&
+               (cm->show_existing_frame || cm->show_frame)) {
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     if (cm->show_existing_frame || cm->show_frame) {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       if (pbi->output_all_layers) {
         // Append this frame to the output queue
         if (pbi->num_output_frames >= MAX_NUM_SPATIAL_LAYERS) {
@@ -645,7 +674,15 @@
 // TODO(rachelbarker): What should this do?
 int av1_get_frame_to_show(AV1Decoder *pbi, YV12_BUFFER_CONFIG *frame) {
   if (pbi->num_output_frames == 0) return -1;
-
-  *frame = pbi->output_frames[pbi->num_output_frames - 1]->buf;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  const size_t out_frame_idx =
+      (pbi->common.seq_params.order_hint_info.enable_order_hint &&
+       pbi->common.seq_params.enable_frame_output_order)
+          ? 0
+          : pbi->num_output_frames - 1;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  const size_t out_frame_idx = pbi->num_output_frames - 1;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  *frame = pbi->output_frames[out_frame_idx]->buf;
   return 0;
 }
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h
index b4f734b..11dc3f7 100644
--- a/av1/decoder/decoder.h
+++ b/av1/decoder/decoder.h
@@ -62,6 +62,13 @@
    * with appropriate offset for the current superblock, for each plane.
    */
   tran_low_t *dqcoeff_block[MAX_MB_PLANE];
+#if CONFIG_INSPECTION
+  // dqcoeff_block gets clobbered before the inspect callback happens, so keep a
+  // copy here.
+  tran_low_t *dqcoeff_block_copy[MAX_MB_PLANE];
+  tran_low_t *qcoeff_block[MAX_MB_PLANE];
+  tran_low_t *dequant_values[MAX_MB_PLANE];
+#endif
   /*!
    * cb_offset[p] is the offset into the dqcoeff_block[p] for the current coding
    * block, for each plane 'p'.
@@ -76,6 +83,13 @@
    * with appropriate offset for the current superblock, for each plane.
    */
   eob_info *eob_data[MAX_MB_PLANE];
+#if CONFIG_ATC_DCTX_ALIGNED
+  /*!
+   * Pointer to 'bob_data' inside 'td->cb_buffer_base' or 'pbi->cb_buffer_base'
+   * with appropriate offset for the current superblock, for each plane.
+   */
+  eob_info *bob_data[MAX_MB_PLANE];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   /*!
    * txb_offset[p] is the offset into the eob_data[p] for the current coding
    * block, for each plane 'p'.
@@ -273,7 +287,11 @@
   // Note: The saved buffers are released at the start of the next time the
   // application calls aom_codec_decode().
   int output_all_layers;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  RefCntBuffer *output_frames[REF_FRAMES];  // Use only for single layer
+#else                        // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   RefCntBuffer *output_frames[MAX_NUM_SPATIAL_LAYERS];
+#endif                       // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   size_t num_output_frames;  // How many frames are queued up so far?
 
   // In order to properly support random-access decoding, we need
@@ -296,7 +314,10 @@
   int sequence_header_ready;
   int sequence_header_changed;
 #if CONFIG_INSPECTION
+  // Inspection callback at the end of each frame.
   aom_inspect_cb inspect_cb;
+  // Inspection callback at the end of each superblock.
+  aom_inspect_cb inspect_sb_cb;
   void *inspect_ctx;
 #endif
   int operating_point;
@@ -408,16 +429,15 @@
   }
 }
 
-#define ACCT_STR __func__
 static INLINE int av1_read_uniform(aom_reader *r, int n) {
   const int l = get_unsigned_bits(n);
   const int m = (1 << l) - n;
-  const int v = aom_read_literal(r, l - 1, ACCT_STR);
+  const int v = aom_read_literal(r, l - 1, ACCT_INFO("v"));
   assert(l != 0);
   if (v < m)
     return v;
   else
-    return (v << 1) - m + aom_read_literal(r, 1, ACCT_STR);
+    return (v << 1) - m + aom_read_literal(r, 1, ACCT_INFO());
 }
 
 typedef void (*palette_visitor_fn_t)(MACROBLOCKD *const xd, int plane,
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c
index 91046a3..cb43098 100644
--- a/av1/decoder/decodetxb.c
+++ b/av1/decoder/decodetxb.c
@@ -20,24 +20,22 @@
 #include "av1/common/reconintra.h"
 #include "av1/decoder/decodemv.h"
 
-#define ACCT_STR __func__
-
 static int read_golomb(MACROBLOCKD *xd, aom_reader *r) {
   int x = 1;
   int length = 0;
 
 #if CONFIG_BYPASS_IMPROVEMENT
-  length = aom_read_unary(r, 21, ACCT_STR);
+  length = aom_read_unary(r, 21, ACCT_INFO("length"));
   if (length > 20) {
     aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
                        "Invalid length in read_golomb");
   }
   x = 1 << length;
-  x += aom_read_literal(r, length, ACCT_STR);
+  x += aom_read_literal(r, length, ACCT_INFO());
 #else
   int i = 0;
   while (!i) {
-    i = aom_read_bit(r, ACCT_STR);
+    i = aom_read_bit(r, ACCT_INFO());
     ++length;
     if (length > 20) {
       aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME,
@@ -47,7 +45,7 @@
   }
   for (i = 0; i < length - 1; ++i) {
     x <<= 1;
-    x += aom_read_bit(r, ACCT_STR);
+    x += aom_read_bit(r, ACCT_INFO());
   }
 #endif  // CONFIG_BYPASS_IMPROVEMENT
 
@@ -73,42 +71,45 @@
 
 static INLINE void read_coeffs_reverse_2d(
     aom_reader *r,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
     TX_SIZE tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
     int start_si, int end_si, const int16_t *scan, int bwl, uint8_t *levels,
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     base_lf_cdf_arr base_lf_cdf, br_cdf_arr br_lf_cdf, int plane,
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     base_cdf_arr base_cdf, br_cdf_arr br_cdf) {
   for (int c = end_si; c >= start_si; --c) {
     const int pos = scan[c];
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     int level = 0;
     const int row = pos >> bwl;
     const int col = pos - (row << bwl);
     int limits = get_lf_limits(row, col, 0, plane);
     if (limits) {
       const int coeff_ctx = get_lower_levels_ctx_lf_2d(levels, pos, bwl);
-      level +=
-          aom_read_symbol(r, base_lf_cdf[coeff_ctx], LF_BASE_SYMBOLS, ACCT_STR);
+      level += aom_read_symbol(r, base_lf_cdf[coeff_ctx], LF_BASE_SYMBOLS,
+                               ACCT_INFO("level", "base_lf_cdf"));
       if (level > LF_NUM_BASE_LEVELS) {
         const int br_ctx = get_br_lf_ctx_2d(levels, pos, bwl);
         aom_cdf_prob *cdf = br_lf_cdf[br_ctx];
         for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-          const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+          const int k =
+              aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_lf_cdf"));
           level += k;
           if (k < BR_CDF_SIZE - 1) break;
         }
       }
     } else {
       const int coeff_ctx = get_lower_levels_ctx_2d(levels, pos, bwl);
-      level += aom_read_symbol(r, base_cdf[coeff_ctx], 4, ACCT_STR);
+      level += aom_read_symbol(r, base_cdf[coeff_ctx], 4,
+                               ACCT_INFO("level", "base_cdf"));
       if (level > NUM_BASE_LEVELS) {
         const int br_ctx = get_br_ctx_2d(levels, pos, bwl);
         aom_cdf_prob *cdf = br_cdf[br_ctx];
         for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-          const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+          const int k =
+              aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf"));
           level += k;
           if (k < BR_CDF_SIZE - 1) break;
         }
@@ -117,62 +118,67 @@
 #else
     const int coeff_ctx = get_lower_levels_ctx_2d(levels, pos, bwl, tx_size);
     const int nsymbs = 4;
-    int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_STR);
+    int level =
+        aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_INFO("level"));
     if (level > NUM_BASE_LEVELS) {
       const int br_ctx = get_br_ctx_2d(levels, pos, bwl);
       aom_cdf_prob *cdf = br_cdf[br_ctx];
       for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-        const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+        const int k =
+            aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf"));
         level += k;
         if (k < BR_CDF_SIZE - 1) break;
       }
     }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     levels[get_padded_idx(pos, bwl)] = level;
   }
 }
 
 static INLINE void read_coeffs_reverse(aom_reader *r,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                                        TX_SIZE tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
                                        TX_CLASS tx_class, int start_si,
                                        int end_si, const int16_t *scan, int bwl,
                                        uint8_t *levels,
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                                        base_lf_cdf_arr base_lf_cdf,
                                        br_cdf_arr br_lf_cdf, int plane,
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
                                        base_cdf_arr base_cdf,
                                        br_cdf_arr br_cdf) {
   for (int c = end_si; c >= start_si; --c) {
     const int pos = scan[c];
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     int level = 0;
     const int row = pos >> bwl;
     const int col = pos - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
     if (limits) {
       const int coeff_ctx = get_lower_levels_lf_ctx(levels, pos, bwl, tx_class);
-      level +=
-          aom_read_symbol(r, base_lf_cdf[coeff_ctx], LF_BASE_SYMBOLS, ACCT_STR);
+      level += aom_read_symbol(r, base_lf_cdf[coeff_ctx], LF_BASE_SYMBOLS,
+                               ACCT_INFO("level", "base_lf_cdf"));
       if (level > LF_NUM_BASE_LEVELS) {
         const int br_ctx = get_br_lf_ctx(levels, pos, bwl, tx_class);
         aom_cdf_prob *cdf = br_lf_cdf[br_ctx];
         for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-          const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+          const int k =
+              aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_lf_cdf"));
           level += k;
           if (k < BR_CDF_SIZE - 1) break;
         }
       }
     } else {
       const int coeff_ctx = get_lower_levels_ctx(levels, pos, bwl, tx_class);
-      level += aom_read_symbol(r, base_cdf[coeff_ctx], 4, ACCT_STR);
+      level += aom_read_symbol(r, base_cdf[coeff_ctx], 4,
+                               ACCT_INFO("level", "base_cdf"));
       if (level > NUM_BASE_LEVELS) {
         const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
         aom_cdf_prob *cdf = br_cdf[br_ctx];
         for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-          const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+          const int k =
+              aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf"));
           level += k;
           if (k < BR_CDF_SIZE - 1) break;
         }
@@ -182,17 +188,19 @@
     const int coeff_ctx =
         get_lower_levels_ctx(levels, pos, bwl, tx_size, tx_class);
     const int nsymbs = 4;
-    int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_STR);
+    int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs,
+                                ACCT_INFO("level", "base_cdf"));
     if (level > NUM_BASE_LEVELS) {
       const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
       aom_cdf_prob *cdf = br_cdf[br_ctx];
       for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-        const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+        const int k =
+            aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("br_cdf", "k"));
         level += k;
         if (k < BR_CDF_SIZE - 1) break;
       }
     }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     levels[get_padded_idx(pos, bwl)] = level;
   }
 }
@@ -206,12 +214,14 @@
     const int pos = scan[c];
     const int coeff_ctx = get_upper_levels_ctx_2d(levels, pos, bwl);
     const int nsymbs = 4;
-    int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_STR);
+    int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs,
+                                ACCT_INFO("level", "base_cdf"));
     if (level > NUM_BASE_LEVELS) {
       const int br_ctx = get_br_ctx_skip(levels, pos, bwl);
       aom_cdf_prob *cdf = br_cdf[br_ctx];
       for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-        const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+        const int k =
+            aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf"));
         level += k;
         if (k < BR_CDF_SIZE - 1) break;
       }
@@ -220,6 +230,98 @@
   }
 }
 
+#if CONFIG_ATC_DCTX_ALIGNED
+// Decode the end-of-block syntax.
+static INLINE void decode_eob(DecoderCodingBlock *dcb, aom_reader *const r,
+                              const int plane, const TX_SIZE tx_size) {
+  MACROBLOCKD *const xd = &dcb->xd;
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
+  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
+  eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane];
+  uint16_t *const eob = &(eob_data->eob);
+  eob_info *bob_data = dcb->bob_data[plane] + dcb->txb_offset[plane];
+  uint16_t *const bob = &(bob_data->eob);
+
+  int eob_extra = 0;
+  int eob_pt = 1;
+  const int eob_multi_size = txsize_log2_minus4[tx_size];
+  switch (eob_multi_size) {
+    case 0:
+      eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf16[plane_type],
+                               EOB_MAX_SYMS - 6,
+                               ACCT_INFO("eob_pt", "eob_multi_size:0")) +
+               1;
+      break;
+    case 1:
+      eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf32[plane_type],
+                               EOB_MAX_SYMS - 5,
+                               ACCT_INFO("eob_pt", "eob_multi_size:1")) +
+               1;
+      break;
+    case 2:
+      eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf64[plane_type],
+                               EOB_MAX_SYMS - 4,
+                               ACCT_INFO("eob_pt", "eob_multi_size:2")) +
+               1;
+      break;
+    case 3:
+      eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf128[plane_type],
+                               EOB_MAX_SYMS - 3,
+                               ACCT_INFO("eob_pt", "eob_multi_size:3")) +
+               1;
+      break;
+    case 4:
+      eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf256[plane_type],
+                               EOB_MAX_SYMS - 2,
+                               ACCT_INFO("eob_pt", "eob_multi_size:4")) +
+               1;
+      break;
+    case 5:
+      eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf512[plane_type],
+                               EOB_MAX_SYMS - 1,
+                               ACCT_INFO("eob_pt", "eob_multi_size:5")) +
+               1;
+      break;
+    case 6:
+    default:
+      eob_pt =
+          aom_read_symbol(r, ec_ctx->eob_flag_cdf1024[plane_type], EOB_MAX_SYMS,
+                          ACCT_INFO("eob_pt", "eob_multi_size:6")) +
+          1;
+      break;
+  }
+  const int eob_offset_bits = av1_eob_offset_bits[eob_pt];
+  if (eob_offset_bits > 0) {
+    const int eob_ctx = eob_pt - 3;
+    int bit =
+        aom_read_symbol(r, ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx],
+                        2, ACCT_INFO("eob_extra_cdf"));
+    if (bit) {
+      eob_extra += (1 << (eob_offset_bits - 1));
+    }
+#if CONFIG_BYPASS_IMPROVEMENT
+    eob_extra +=
+        aom_read_literal(r, eob_offset_bits - 1, ACCT_INFO("eob_extra"));
+#else
+    for (int i = 1; i < eob_offset_bits; i++) {
+      bit = aom_read_bit(r, ACCT_INFO("eob_offset_bits"));
+      if (bit) {
+        eob_extra += (1 << (eob_offset_bits - 1 - i));
+      }
+    }
+#endif  // CONFIG_BYPASS_IMPROVEMENT
+  }
+  *eob = rec_eob_pos(eob_pt, eob_extra);
+  *bob = *eob;  // escape character
+#if CONFIG_CONTEXT_DERIVATION
+  if (plane == AOM_PLANE_U) {
+    xd->eob_u = *eob;
+  }
+#endif  // CONFIG_CONTEXT_DERIVATION
+}
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 uint8_t av1_read_sig_txtype(const AV1_COMMON *const cm, DecoderCodingBlock *dcb,
                             aom_reader *const r, const int blk_row,
                             const int blk_col, const int plane,
@@ -229,6 +331,10 @@
   FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int is_inter = is_inter_block(xd->mi[0], xd->tree_type);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
   eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane];
   uint16_t *const eob = &(eob_data->eob);
   uint16_t *const max_scan_line = &(eob_data->max_scan_line);
@@ -250,15 +356,16 @@
   int all_zero;
   if (plane == AOM_PLANE_Y || plane == AOM_PLANE_U) {
     all_zero = aom_read_symbol(r, ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx],
-                               2, ACCT_STR);
+                               2, ACCT_INFO("all_zero", "plane_y_or_u"));
   } else {
     txb_skip_ctx += (xd->eob_u_flag ? V_TXB_SKIP_CONTEXT_OFFSET : 0);
-    all_zero =
-        aom_read_symbol(r, ec_ctx->v_txb_skip_cdf[txb_skip_ctx], 2, ACCT_STR);
+    all_zero = aom_read_symbol(r, ec_ctx->v_txb_skip_cdf[txb_skip_ctx], 2,
+                               ACCT_INFO("all_zero", "plane_v"));
   }
 #else
-  const int all_zero = aom_read_symbol(
-      r, ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2, ACCT_STR);
+  const int all_zero =
+      aom_read_symbol(r, ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx],
+                      2, ACCT_INFO("all_zero"));
 #endif  // CONFIG_CONTEXT_DERIVATION
 
 #if CONFIG_INSPECTION
@@ -276,7 +383,7 @@
   }
 #endif  // CONFIG_CONTEXT_DERIVATION
 
-#if CONFIG_CROSS_CHROMA_TX
+#if CONFIG_CROSS_CHROMA_TX && !CONFIG_ATC_DCTX_ALIGNED
   if (plane == AOM_PLANE_U && is_cctx_allowed(cm, xd)) {
     if (!all_zero) {
       av1_read_cctx_type(cm, xd, blk_row, blk_col, tx_size, r);
@@ -291,7 +398,7 @@
                         CCTX_NONE);
     }
   }
-#endif  // CONFIG_CROSS_CHROMA_TX
+#endif  // CONFIG_CROSS_CHROMA_TX && !CONFIG_ATC_DCTX_ALIGNED
 
   if (all_zero) {
     *max_scan_line = 0;
@@ -300,9 +407,33 @@
     }
     return 0;
   }
+#if CONFIG_ATC_DCTX_ALIGNED
+  decode_eob(dcb, r, plane, tx_size);
+  av1_read_tx_type(cm, xd, blk_row, blk_col, tx_size, r, plane, *eob,
+                   is_inter ? 0 : *eob);
+
+#if CONFIG_CROSS_CHROMA_TX
+  if (plane == AOM_PLANE_U && is_cctx_allowed(cm, xd)) {
+    const int skip_cctx = is_inter ? 0 : (*eob == 1);
+    if (!all_zero && !skip_cctx) {
+      av1_read_cctx_type(cm, xd, blk_row, blk_col, tx_size, r);
+    } else {
+      int row_offset, col_offset;
+#if CONFIG_EXT_RECUR_PARTITIONS
+      get_chroma_mi_offsets(xd, &row_offset, &col_offset);
+#else
+      get_chroma_mi_offsets(xd, tx_size, &row_offset, &col_offset);
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
+      update_cctx_array(xd, blk_row, blk_col, row_offset, col_offset, tx_size,
+                        CCTX_NONE);
+    }
+  }
+#endif  // CONFIG_CROSS_CHROMA_TX
+#else
   if (plane == AOM_PLANE_Y) {  // only y plane's tx_type is transmitted
     av1_read_tx_type(cm, xd, blk_row, blk_col, tx_size, r);
   }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   return 1;
 }
 
@@ -324,6 +455,18 @@
   const int shift = av1_get_tx_scale(tx_size);
   const int bwl = get_txb_bwl(tx_size);
   const int width = get_txb_wide(tx_size);
+  const int height = get_txb_high(tx_size);
+#if CONFIG_INSPECTION
+  tran_low_t *const tcoeffs_copy =
+      dcb->dqcoeff_block_copy[plane] + dcb->cb_offset[plane];
+  tran_low_t *const quant_coeffs =
+      dcb->qcoeff_block[plane] + dcb->cb_offset[plane];
+  tran_low_t *const dequant_values =
+      dcb->dequant_values[plane] + dcb->cb_offset[plane];
+  memset(tcoeffs_copy, 0, sizeof(tran_low_t) * width * height);
+  memset(quant_coeffs, 0, sizeof(tran_low_t) * width * height);
+  memset(dequant_values, 0, sizeof(tran_low_t) * width * height);
+#endif  // CONFIG_INSPECTION
   int cul_level = 0;
   int dc_val = 0;
   uint8_t levels_buf[TX_PAD_2D];
@@ -333,12 +476,21 @@
   eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane];
   eob_data->max_scan_line = 0;
   eob_data->eob = av1_get_max_eob(tx_size);
+#if CONFIG_ATC_DCTX_ALIGNED
+  eob_info *bob_data = dcb->bob_data[plane] + dcb->txb_offset[plane];
+  bob_data->max_scan_line = 0;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
   const TX_TYPE tx_type =
       av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size,
                       cm->features.reduced_tx_set_used);
   const qm_val_t *iqmatrix =
       av1_get_iqmatrix(&cm->quant_params, xd, plane, tx_size, tx_type);
+#if CONFIG_INSPECTION
+  for (int c = 0; c < width * height; c++) {
+    dequant_values[c] = get_dqv(dequant, c, iqmatrix);
+  }
+#endif  // CONFIG_INSPECTION
   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
   const int16_t *const scan = scan_order->scan;
 
@@ -347,8 +499,34 @@
     memset(signs_buf, 0, sizeof(*signs_buf) * TX_PAD_2D);
     base_cdf_arr base_cdf = ec_ctx->coeff_base_cdf_idtx;
     br_cdf_arr br_cdf = ec_ctx->coeff_br_cdf_idtx;
+#if CONFIG_ATC_DCTX_ALIGNED
+    const int bob = av1_get_max_eob(tx_size) - bob_data->eob;
+    {
+      const int pos = scan[bob];
+      const int coeff_ctx_bob = get_lower_levels_ctx_bob(bwl, height, bob);
+      const int nsymbs_bob = 3;
+      aom_cdf_prob *cdf_bob = ec_ctx->coeff_base_bob_cdf[coeff_ctx_bob];
+      int level = aom_read_symbol(r, cdf_bob, nsymbs_bob,
+                                  ACCT_INFO("level", "cdf_bob")) +
+                  1;
+      if (level > NUM_BASE_LEVELS) {
+        const int br_ctx = get_br_ctx_skip(levels, pos, bwl);
+        aom_cdf_prob *cdf = br_cdf[br_ctx];
+        for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
+          const int k =
+              aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf"));
+          level += k;
+          if (k < BR_CDF_SIZE - 1) break;
+        }
+      }
+      levels[get_padded_idx_left(pos, bwl)] = level;
+    }
+    read_coeffs_forward_2d(r, bob + 1, eob_data->eob - 1, scan, bwl, levels,
+                           base_cdf, br_cdf);
+#else
     read_coeffs_forward_2d(r, 0, eob_data->eob - 1, scan, bwl, levels, base_cdf,
                            br_cdf);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   }
 
   for (int c = eob_data->eob - 1; c >= 0; --c) {
@@ -358,8 +536,8 @@
     if (level) {
       eob_data->max_scan_line = AOMMAX(eob_data->max_scan_line, pos);
       int idtx_sign_ctx = get_sign_ctx_skip(signs, levels, pos, bwl);
-      sign =
-          aom_read_symbol(r, ec_ctx->idtx_sign_cdf[idtx_sign_ctx], 2, ACCT_STR);
+      sign = aom_read_symbol(r, ec_ctx->idtx_sign_cdf[idtx_sign_ctx], 2,
+                             ACCT_INFO("sign"));
       signs[get_padded_idx(pos, bwl)] = sign > 0 ? -1 : 1;
       if (level >= MAX_BASE_BR_RANGE) {
         level += read_golomb(xd, r);
@@ -381,6 +559,10 @@
         dq_coeff = -dq_coeff;
       }
       tcoeffs[pos] = clamp(dq_coeff, min_value, max_value);
+#if CONFIG_INSPECTION
+      tcoeffs_copy[pos] = tcoeffs[pos];
+      quant_coeffs[pos] = sign ? -level : level;
+#endif  // CONFIG_INSPECTION
     }
   }
   cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level);
@@ -399,13 +581,14 @@
   int q_index;
   const int pos = scan[0];
   int ctx_idx = get_base_ctx_ph(levels, pos, bwl, tx_class);
-  q_index = aom_read_symbol(r, base_cdf_ph[ctx_idx], 4, ACCT_STR);
+  q_index = aom_read_symbol(r, base_cdf_ph[ctx_idx], 4, ACCT_INFO("q_index"));
 
   if (q_index > NUM_BASE_LEVELS) {
     ctx_idx = get_par_br_ctx(levels, pos, bwl, tx_class);
     aom_cdf_prob *cdf_br = br_cdf_ph[ctx_idx];
     for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-      const int k = aom_read_symbol(r, cdf_br, BR_CDF_SIZE, ACCT_STR);
+      const int k =
+          aom_read_symbol(r, cdf_br, BR_CDF_SIZE, ACCT_INFO("k", "cdf_br"));
       q_index += k;
       if (k < BR_CDF_SIZE - 1) break;
     }
@@ -436,15 +619,32 @@
   const int bwl = get_txb_bwl(tx_size);
   const int width = get_txb_wide(tx_size);
   const int height = get_txb_high(tx_size);
+#if CONFIG_INSPECTION
+  tran_low_t *const tcoeffs_copy =
+      dcb->dqcoeff_block_copy[plane] + dcb->cb_offset[plane];
+  tran_low_t *const quant_coeffs =
+      dcb->qcoeff_block[plane] + dcb->cb_offset[plane];
+  tran_low_t *const dequant_values =
+      dcb->dequant_values[plane] + dcb->cb_offset[plane];
+  memset(tcoeffs_copy, 0, sizeof(tran_low_t) * width * height);
+  memset(quant_coeffs, 0, sizeof(tran_low_t) * width * height);
+  memset(dequant_values, 0, sizeof(tran_low_t) * width * height);
+#endif  // CONFIG_INSPECTION
   int cul_level = 0;
   int dc_val = 0;
   uint8_t levels_buf[TX_PAD_2D];
   uint8_t *const levels = set_levels(levels_buf, width);
+#if !CONFIG_ATC_DCTX_ALIGNED
   eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane];
   uint16_t *const eob = &(eob_data->eob);
   uint16_t *const max_scan_line = &(eob_data->max_scan_line);
   *max_scan_line = 0;
   *eob = 0;
+#else
+  eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane];
+  uint16_t *const eob = &(eob_data->eob);
+  uint16_t *const max_scan_line = &(eob_data->max_scan_line);
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
 
 #if DEBUG_EXTQUANT
   fprintf(cm->fDecCoeffLog,
@@ -459,8 +659,14 @@
   const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)];
   const qm_val_t *iqmatrix =
       av1_get_iqmatrix(&cm->quant_params, xd, plane, tx_size, tx_type);
+#if CONFIG_INSPECTION
+  for (int c = 0; c < width * height; c++) {
+    dequant_values[c] = get_dqv(dequant, c, iqmatrix);
+  }
+#endif  // CONFIG_INSPECTION
   const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
   const int16_t *const scan = scan_order->scan;
+#if !CONFIG_ATC_DCTX_ALIGNED
   int eob_extra = 0;
   int eob_pt = 1;
 
@@ -470,44 +676,44 @@
     case 0:
       eob_pt =
           aom_read_symbol(r, ec_ctx->eob_flag_cdf16[plane_type][eob_multi_ctx],
-                          5, ACCT_STR) +
+                          5, ACCT_INFO("eob_pt", "eob_multi_size:0")) +
           1;
       break;
     case 1:
       eob_pt =
           aom_read_symbol(r, ec_ctx->eob_flag_cdf32[plane_type][eob_multi_ctx],
-                          6, ACCT_STR) +
+                          6, ACCT_INFO("eob_pt", "eob_multi_size:1")) +
           1;
       break;
     case 2:
       eob_pt =
           aom_read_symbol(r, ec_ctx->eob_flag_cdf64[plane_type][eob_multi_ctx],
-                          7, ACCT_STR) +
+                          7, ACCT_INFO("eob_pt", "eob_multi_size:2")) +
           1;
       break;
     case 3:
       eob_pt =
           aom_read_symbol(r, ec_ctx->eob_flag_cdf128[plane_type][eob_multi_ctx],
-                          8, ACCT_STR) +
+                          8, ACCT_INFO("eob_pt", "eob_multi_size:3")) +
           1;
       break;
     case 4:
       eob_pt =
           aom_read_symbol(r, ec_ctx->eob_flag_cdf256[plane_type][eob_multi_ctx],
-                          9, ACCT_STR) +
+                          9, ACCT_INFO("eob_pt", "eob_multi_size:4")) +
           1;
       break;
     case 5:
       eob_pt =
           aom_read_symbol(r, ec_ctx->eob_flag_cdf512[plane_type][eob_multi_ctx],
-                          10, ACCT_STR) +
+                          10, ACCT_INFO("eob_pt", "eob_multi_size:5")) +
           1;
       break;
     case 6:
     default:
       eob_pt = aom_read_symbol(
                    r, ec_ctx->eob_flag_cdf1024[plane_type][eob_multi_ctx], 11,
-                   ACCT_STR) +
+                   ACCT_INFO("eob_pt", "eob_multi_size:6")) +
                1;
       break;
   }
@@ -515,17 +721,19 @@
   const int eob_offset_bits = av1_eob_offset_bits[eob_pt];
   if (eob_offset_bits > 0) {
     const int eob_ctx = eob_pt - 3;
-    int bit = aom_read_symbol(
-        r, ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2, ACCT_STR);
+    int bit =
+        aom_read_symbol(r, ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx],
+                        2, ACCT_INFO("eob_offset_bits"));
     if (bit) {
       eob_extra += (1 << (eob_offset_bits - 1));
     }
 
 #if CONFIG_BYPASS_IMPROVEMENT
-    eob_extra += aom_read_literal(r, eob_offset_bits - 1, ACCT_STR);
+    eob_extra +=
+        aom_read_literal(r, eob_offset_bits - 1, ACCT_INFO("eob_extra"));
 #else
     for (int i = 1; i < eob_offset_bits; i++) {
-      bit = aom_read_bit(r, ACCT_STR);
+      bit = aom_read_bit(r, ACCT_INFO("eob_offset_bits"));
       if (bit) {
         eob_extra += (1 << (eob_offset_bits - 1 - i));
       }
@@ -539,10 +747,15 @@
     xd->eob_u = *eob;
   }
 #endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
 
   // read  sec_tx_type here
   // Only y plane's sec_tx_type is transmitted
-  if ((plane == AOM_PLANE_Y) && (cm->seq_params.enable_ist)) {
+  if ((plane == AOM_PLANE_Y) && (cm->seq_params.enable_ist)
+#if CONFIG_ATC_DCTX_ALIGNED
+      && (*eob != 1)
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+  ) {
     av1_read_sec_tx_type(cm, xd, blk_row, blk_col, tx_size, eob, r);
   }
   //
@@ -560,7 +773,7 @@
     const int c = *eob - 1;
     const int pos = scan[c];
     const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, c);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     int level = 0;
     const int row = pos >> bwl;
     const int col = pos - (row << bwl);
@@ -568,12 +781,15 @@
     if (limits) {
       aom_cdf_prob *cdf =
           ec_ctx->coeff_base_lf_eob_cdf[txs_ctx][plane_type][coeff_ctx];
-      level += aom_read_symbol(r, cdf, LF_BASE_SYMBOLS - 1, ACCT_STR) + 1;
+      level += aom_read_symbol(r, cdf, LF_BASE_SYMBOLS - 1,
+                               ACCT_INFO("level", "coeff_base_lf_eob_cdf")) +
+               1;
       if (level > LF_NUM_BASE_LEVELS) {
         const int br_ctx = get_br_ctx_lf_eob(pos, tx_class);
         cdf = ec_ctx->coeff_br_lf_cdf[plane_type][br_ctx];
         for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-          const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+          const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE,
+                                        ACCT_INFO("k", "coeff_br_lf_cdf"));
           level += k;
           if (k < BR_CDF_SIZE - 1) break;
         }
@@ -581,12 +797,15 @@
     } else {
       aom_cdf_prob *cdf =
           ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx];
-      level += aom_read_symbol(r, cdf, 3, ACCT_STR) + 1;
+      level +=
+          aom_read_symbol(r, cdf, 3, ACCT_INFO("level", "coeff_base_eob_cdf")) +
+          1;
       if (level > NUM_BASE_LEVELS) {
         const int br_ctx = 0; /* get_lf_ctx_eob */
         cdf = ec_ctx->coeff_br_cdf[plane_type][br_ctx];
         for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-          const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+          const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE,
+                                        ACCT_INFO("k", "coeff_br_cdf"));
           level += k;
           if (k < BR_CDF_SIZE - 1) break;
         }
@@ -596,17 +815,20 @@
     const int nsymbs = 3;
     aom_cdf_prob *cdf =
         ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx];
-    int level = aom_read_symbol(r, cdf, nsymbs, ACCT_STR) + 1;
+    int level = aom_read_symbol(r, cdf, nsymbs,
+                                ACCT_INFO("level", "coeff_base_eob_cdf")) +
+                1;
     if (level > NUM_BASE_LEVELS) {
       const int br_ctx = get_br_ctx_eob(pos, bwl, tx_class);
       cdf = ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx];
       for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
-        const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR);
+        const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE,
+                                      ACCT_INFO("k", "coeff_br_cdf"));
         level += k;
         if (k < BR_CDF_SIZE - 1) break;
       }
     }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     levels[get_padded_idx(pos, bwl)] = level;
   }
 #if CONFIG_PAR_HIDING
@@ -617,21 +839,21 @@
   bool is_hidden = false;
 #endif  // CONFIG_PAR_HIDING
   if (*eob > 1) {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     base_lf_cdf_arr base_lf_cdf =
         ec_ctx->coeff_base_lf_cdf[txs_ctx][plane_type];
     br_cdf_arr br_lf_cdf = ec_ctx->coeff_br_lf_cdf[plane_type];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     base_cdf_arr base_cdf = ec_ctx->coeff_base_cdf[txs_ctx][plane_type];
     br_cdf_arr br_cdf =
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
         ec_ctx->coeff_br_cdf[plane_type];
 #else
         ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #if CONFIG_PAR_HIDING
     if (tx_class == TX_CLASS_2D) {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       read_coeffs_reverse_2d(r, 1, *eob - 2, scan, bwl, levels, base_lf_cdf,
                              br_lf_cdf, plane, base_cdf, br_cdf);
       if (enable_parity_hiding) {
@@ -675,9 +897,9 @@
         read_coeffs_reverse(r, tx_size, tx_class, 0, 0, scan, bwl, levels,
                             base_cdf, br_cdf);
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     } else {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       read_coeffs_reverse(r, tx_class, 1, *eob - 2, scan, bwl, levels,
                           base_lf_cdf, br_lf_cdf, plane, base_cdf, br_cdf);
       if (enable_parity_hiding) {
@@ -721,11 +943,11 @@
         read_coeffs_reverse(r, tx_size, tx_class, 0, 0, scan, bwl, levels,
                             base_cdf, br_cdf);
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
 #else
     if (tx_class == TX_CLASS_2D) {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       read_coeffs_reverse_2d(r, 1, *eob - 2, scan, bwl, levels, base_lf_cdf,
                              br_lf_cdf, plane, base_cdf, br_cdf);
       read_coeffs_reverse(r, tx_class, 0, 0, scan, bwl, levels, base_lf_cdf,
@@ -735,15 +957,15 @@
                              base_cdf, br_cdf);
       read_coeffs_reverse(r, tx_size, tx_class, 0, 0, scan, bwl, levels,
                           base_cdf, br_cdf);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     } else {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       read_coeffs_reverse(r, tx_class, 0, *eob - 2, scan, bwl, levels,
                           base_lf_cdf, br_lf_cdf, plane, base_cdf, br_cdf);
 #else
       read_coeffs_reverse(r, tx_size, tx_class, 0, *eob - 2, scan, bwl, levels,
                           base_cdf, br_cdf);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
 #endif  // CONFIG_PAR_HIDING
   }
@@ -764,35 +986,37 @@
 #if CONFIG_CONTEXT_DERIVATION
         if (plane == AOM_PLANE_Y || plane == AOM_PLANE_U) {
           sign = aom_read_symbol(
-              r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], 2, ACCT_STR);
+              r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], 2,
+              ACCT_INFO("sign", "dc_sign_cdf", "plane_y_or_u"));
         } else {
           int32_t tmp_sign = 0;
           if (c < xd->eob_u) tmp_sign = xd->tmp_sign[0];
-          sign = aom_read_symbol(
-              r, ec_ctx->v_dc_sign_cdf[tmp_sign][dc_sign_ctx], 2, ACCT_STR);
+          sign =
+              aom_read_symbol(r, ec_ctx->v_dc_sign_cdf[tmp_sign][dc_sign_ctx],
+                              2, ACCT_INFO("sign", "v_dc_sign_cdf", "plane_v"));
         }
         if (plane == AOM_PLANE_U) xd->tmp_sign[0] = (sign ? 2 : 1);
 #else
         sign = aom_read_symbol(r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx],
-                               2, ACCT_STR);
+                               2, ACCT_INFO("sign", "dc_sign_cdf"));
 #endif  // CONFIG_CONTEXT_DERIVATION
       } else {
 #if CONFIG_CONTEXT_DERIVATION
         if (plane == AOM_PLANE_Y || plane == AOM_PLANE_U)
-          sign = aom_read_bit(r, ACCT_STR);
+          sign = aom_read_bit(r, ACCT_INFO("sign", "plane_y_or_u"));
         else {
           int32_t tmp_sign = 0;
           if (c < xd->eob_u) tmp_sign = xd->tmp_sign[pos];
-          sign =
-              aom_read_symbol(r, ec_ctx->v_ac_sign_cdf[tmp_sign], 2, ACCT_STR);
+          sign = aom_read_symbol(r, ec_ctx->v_ac_sign_cdf[tmp_sign], 2,
+                                 ACCT_INFO("sign", "v_ac_sign_cdf", "plane_v"));
         }
         if (plane == AOM_PLANE_U) xd->tmp_sign[pos] = (sign ? 2 : 1);
 #else
-        sign = aom_read_bit(r, ACCT_STR);
+        sign = aom_read_bit(r, ACCT_INFO("sign"));
 #endif  // CONFIG_CONTEXT_DERIVATION
       }
 #if CONFIG_PAR_HIDING
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       if (is_hidden && c == 0) {
         if (level >= (MAX_BASE_BR_RANGE << 1)) {
           level += (read_golomb(xd, r) << 1);
@@ -821,9 +1045,9 @@
           level += read_golomb(xd, r);
         }
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #else
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       const int row = pos >> bwl;
       const int col = pos - (row << bwl);
       int limits = get_lf_limits(row, col, tx_class, plane);
@@ -840,7 +1064,7 @@
       if (level >= MAX_BASE_BR_RANGE) {
         level += read_golomb(xd, r);
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #endif  // CONFIG_PAR_HIDING
       if (c == 0) dc_val = sign ? -level : level;
 
@@ -860,6 +1084,10 @@
         dq_coeff = -dq_coeff;
       }
       tcoeffs[pos] = clamp(dq_coeff, min_value, max_value);
+#if CONFIG_INSPECTION
+      tcoeffs_copy[pos] = tcoeffs[pos];
+      quant_coeffs[pos] = sign ? -level : level;
+#endif  // CONFIG_INSPECTION
     }
   }
 #if DEBUG_EXTQUANT
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c
index 1fb2975..b71fd40 100644
--- a/av1/decoder/detokenize.c
+++ b/av1/decoder/detokenize.c
@@ -17,8 +17,6 @@
 #include "av1/common/blockd.h"
 #include "av1/decoder/detokenize.h"
 
-#define ACCT_STR __func__
-
 #include "av1/common/common.h"
 #include "av1/common/entropy.h"
 #include "av1/common/idct.h"
@@ -33,13 +31,13 @@
   int rows = param->rows;
   int cols = param->cols;
 
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   IdentityRowCdf identity_row_cdf = param->identity_row_cdf;
   int prev_identity_row_flag = 0;
   for (int y = 0; y < rows; y++) {
     const int ctx = y == 0 ? 2 : prev_identity_row_flag;
-    int identity_row_flag =
-        aom_read_symbol(r, identity_row_cdf[ctx], 2, ACCT_STR);
+    int identity_row_flag = aom_read_symbol(r, identity_row_cdf[ctx], 2,
+                                            ACCT_INFO("identity_row_flag"));
     for (int x = 0; x < cols; x++) {
       if (identity_row_flag && x > 0) {
         color_map[y * plane_block_width + x] =
@@ -50,8 +48,9 @@
         const int color_ctx = av1_get_palette_color_index_context(
             color_map, plane_block_width, y, x, n, color_order, NULL,
             identity_row_flag, prev_identity_row_flag);
-        const int color_idx = aom_read_symbol(
-            r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, ACCT_STR);
+        const int color_idx =
+            aom_read_symbol(r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx],
+                            n, ACCT_INFO("color_idx"));
         assert(color_idx >= 0 && color_idx < n);
         color_map[y * plane_block_width + x] = color_order[color_idx];
       }
@@ -68,8 +67,9 @@
     for (int j = AOMMIN(i, cols - 1); j >= AOMMAX(0, i - rows + 1); --j) {
       const int color_ctx = av1_get_palette_color_index_context(
           color_map, plane_block_width, (i - j), j, n, color_order, NULL);
-      const int color_idx = aom_read_symbol(
-          r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, ACCT_STR);
+      const int color_idx =
+          aom_read_symbol(r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], n,
+                          ACCT_INFO("color_idx"));
       assert(color_idx >= 0 && color_idx < n);
       color_map[(i - j) * plane_block_width + j] = color_order[color_idx];
     }
@@ -98,10 +98,10 @@
       xd->plane[plane].color_index_map + xd->color_index_map_offset[plane];
   params.map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf
                          : xd->tile_ctx->palette_y_color_index_cdf;
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   params.identity_row_cdf = plane ? xd->tile_ctx->identity_row_cdf_uv
                                   : xd->tile_ctx->identity_row_cdf_y;
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
   const MB_MODE_INFO *const mbmi = xd->mi[0];
   params.n_colors = mbmi->palette_mode_info.palette_size[plane];
   av1_get_block_dimensions(mbmi->sb_type[plane > 0], plane, xd,
diff --git a/av1/decoder/inspection.c b/av1/decoder/inspection.c
index 75e06cc..2cba1fb 100644
--- a/av1/decoder/inspection.c
+++ b/av1/decoder/inspection.c
@@ -11,6 +11,7 @@
  */
 #include "av1/decoder/decoder.h"
 #include "av1/decoder/inspection.h"
+#include "av1/common/blockd.h"
 #include "av1/common/enums.h"
 #include "av1/common/cdef.h"
 
@@ -19,6 +20,12 @@
   fd->mi_rows = mi_rows;
   fd->mi_grid = (insp_mi_data *)aom_malloc(sizeof(insp_mi_data) * fd->mi_rows *
                                            fd->mi_cols);
+  fd->max_sb_rows =
+      (mi_rows + (1 << MIN_MIB_SIZE_LOG2) - 1) / (1 << MIN_MIB_SIZE_LOG2);
+  fd->max_sb_cols =
+      (mi_cols + (1 << MIN_MIB_SIZE_LOG2) - 1) / (1 << MIN_MIB_SIZE_LOG2);
+  fd->sb_grid = (insp_sb_data *)aom_calloc(sizeof(insp_sb_data),
+                                           fd->max_sb_rows * fd->max_sb_cols);
 }
 
 void ifd_init(insp_frame_data *fd, int frame_width, int frame_height) {
@@ -30,6 +37,70 @@
 void ifd_clear(insp_frame_data *fd) {
   aom_free(fd->mi_grid);
   fd->mi_grid = NULL;
+  for (int i = 0; i < fd->max_sb_rows; i++) {
+    for (int j = 0; j < fd->max_sb_cols; j++) {
+      insp_sb_data *sb = &fd->sb_grid[i * fd->max_sb_cols + j];
+      // Note: NULL checking happens within av1_free_ptree_recursive
+      av1_free_ptree_recursive(sb->partition_tree_luma);
+      av1_free_ptree_recursive(sb->partition_tree_chroma);
+    }
+  }
+  aom_free(fd->sb_grid);
+  fd->sb_grid = NULL;
+}
+
+PARTITION_TREE *copy_partition_tree(PARTITION_TREE *orig,
+                                    PARTITION_TREE *parent) {
+  PARTITION_TREE *copy = av1_alloc_ptree_node(NULL, 0);
+  memcpy(copy, orig, sizeof(PARTITION_TREE));
+  copy->parent = parent;
+  for (size_t i = 0; i < sizeof(copy->sub_tree) / sizeof(copy->sub_tree[0]);
+       i++) {
+    if (copy->sub_tree[i] != NULL) {
+      copy->sub_tree[i] = copy_partition_tree(orig->sub_tree[i], copy);
+    }
+  }
+  return copy;
+}
+
+int ifd_inspect_superblock(insp_frame_data *fd, void *decoder) {
+  struct AV1Decoder *pbi = (struct AV1Decoder *)decoder;
+  AV1_COMMON *const cm = &pbi->common;
+  const CommonModeInfoParams *const mi_params = &cm->mi_params;
+  if (fd->mi_rows != mi_params->mi_rows || fd->mi_cols != mi_params->mi_cols) {
+    ifd_clear(fd);
+    ifd_init_mi_rc(fd, mi_params->mi_rows, mi_params->mi_cols);
+  }
+
+  int sb_size = cm->seq_params.sb_size;
+  int sb_width = mi_size_wide[sb_size];
+  int sb_height = mi_size_high[sb_size];
+
+  int sb_row = pbi->td.dcb.xd.sbi->mi_row / sb_height;
+  int sb_col = pbi->td.dcb.xd.sbi->mi_col / sb_width;
+
+  PARTITION_TREE *luma_tree = pbi->td.dcb.xd.sbi->ptree_root[0];
+  PARTITION_TREE *chroma_tree = pbi->td.dcb.xd.sbi->ptree_root[1];
+  insp_sb_data *sb = &fd->sb_grid[sb_row * fd->max_sb_cols + sb_col];
+  sb->partition_tree_luma = copy_partition_tree(luma_tree, NULL);
+  // Semi-decoupled partitioning is enabled only for intra-frames.
+  int use_sdp = (frame_is_intra_only(cm) && !cm->seq_params.monochrome &&
+                 cm->seq_params.enable_sdp);
+  if (chroma_tree != NULL && use_sdp) {
+    sb->partition_tree_chroma = copy_partition_tree(chroma_tree, NULL);
+  } else {
+    // For consistency, use a copy of the luma tree when SDP is not enabled for
+    // the frame.
+    sb->partition_tree_chroma = copy_partition_tree(luma_tree, NULL);
+  }
+  sb->has_separate_chroma_partition_tree = use_sdp;
+
+  for (int i = 0; i < MAX_MB_PLANE; i++) {
+    memcpy(sb->dqcoeff[i], pbi->td.dcb.dqcoeff_block_copy[i], MAX_SB_SQUARE);
+    memcpy(sb->qcoeff[i], pbi->td.dcb.qcoeff_block[i], MAX_SB_SQUARE);
+    memcpy(sb->dequant_values[i], pbi->td.dcb.dequant_values[i], MAX_SB_SQUARE);
+  }
+  return 1;
 }
 
 /* TODO(negge) This function may be called by more than one thread when using
@@ -39,7 +110,9 @@
   AV1_COMMON *const cm = &pbi->common;
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
   const CommonQuantParams *quant_params = &cm->quant_params;
-
+  fd->recon_frame_buffer = cm->cur_frame->buf;
+  fd->predicted_frame_buffer = cm->predicted_pixels;
+  fd->prefiltered_frame_buffer = cm->prefiltered_pixels;
   if (fd->mi_rows != mi_params->mi_rows || fd->mi_cols != mi_params->mi_cols) {
     ifd_clear(fd);
     ifd_init_mi_rc(fd, mi_params->mi_rows, mi_params->mi_cols);
@@ -49,6 +122,7 @@
   fd->show_frame = cm->show_frame;
   fd->frame_type = cm->current_frame.frame_type;
   fd->base_qindex = quant_params->base_qindex;
+  fd->superblock_size = cm->seq_params.sb_size;
   // Set width and height of the first tile until generic support can be added
   TileInfo tile_info;
   av1_tile_set_row(&tile_info, cm, 0);
@@ -57,6 +131,11 @@
   fd->tile_mi_rows = tile_info.mi_row_end - tile_info.mi_row_start;
   fd->delta_q_present_flag = cm->delta_q_info.delta_q_present_flag;
   fd->delta_q_res = cm->delta_q_info.delta_q_res;
+  fd->bit_depth = cm->seq_params.bit_depth;
+  fd->width = cm->width;
+  fd->height = cm->height;
+  fd->render_width = cm->render_width;
+  fd->render_height = cm->render_height;
 #if CONFIG_ACCOUNTING
   fd->accounting = &pbi->accounting;
 #endif
@@ -101,14 +180,15 @@
 
       // Block Size
       mi->sb_type = mbmi->sb_type[0];
+      mi->sb_type_chroma = mbmi->sb_type[1];
       // Skip Flag
+      // TODO(comc): Check handling of skip_txfm vs tx_skip.
       mi->skip = mbmi->skip_txfm[0];
       mi->filter[0] = mbmi->interp_fltr;
       mi->filter[1] = mbmi->interp_fltr;
       mi->dual_filter_type = mi->filter[0] * 3 + mi->filter[1];
 
       // Transform
-      // TODO(anyone): extract tx type info from mbmi->txk_type[].
       const BLOCK_SIZE bsize = mbmi->sb_type[0];
       const int c = i % mi_size_wide[bsize];
       const int r = j % mi_size_high[bsize];
@@ -120,19 +200,19 @@
 
       if (skip_not_transform && mi->skip) mi->tx_size = -1;
 
-      if (mi->skip) {
-        const int tx_type_row = j - j % tx_size_high_unit[mi->tx_size];
-        const int tx_type_col = i - i % tx_size_wide_unit[mi->tx_size];
-        const int tx_type_map_idx =
-            tx_type_row * mi_params->mi_stride + tx_type_col;
-        mi->tx_type = mi_params->tx_type_map[tx_type_map_idx];
-      } else {
-        mi->tx_type = 0;
-      }
+      const int tx_type_row = j - j % tx_size_high_unit[mi->tx_size];
+      const int tx_type_col = i - i % tx_size_wide_unit[mi->tx_size];
+      const int tx_type_map_idx =
+          tx_type_row * mi_params->mi_stride + tx_type_col;
+      mi->tx_type = mi_params->tx_type_map[tx_type_map_idx];
+
+      bool skip = mbmi->tx_skip[av1_get_txk_type_index(bsize, r, c)];
+      mi->skip |= skip;
 
       if (skip_not_transform &&
-          (mi->skip || mbmi->tx_skip[av1_get_txk_type_index(bsize, r, c)]))
+          (mi->skip || mbmi->tx_skip[av1_get_txk_type_index(bsize, r, c)])) {
         mi->tx_type = -1;
+      }
 
       mi->cdef_level = cm->cdef_info.cdef_strengths[mbmi->cdef_strength] /
                        CDEF_SEC_STRENGTHS;
diff --git a/av1/decoder/inspection.h b/av1/decoder/inspection.h
index 19d4f5f..cb8175f 100644
--- a/av1/decoder/inspection.h
+++ b/av1/decoder/inspection.h
@@ -16,6 +16,7 @@
 extern "C" {
 #endif  // __cplusplus
 
+#include "av1/common/blockd.h"
 #include "av1/common/seg_common.h"
 #if CONFIG_ACCOUNTING
 #include "av1/decoder/accounting.h"
@@ -32,6 +33,12 @@
   int16_t col;
 };
 
+typedef struct insp_pixel_data insp_pixel_data;
+
+struct insp_pixel_data {
+  int16_t samples[MAX_SB_SIZE][MAX_SB_SIZE];
+};
+
 typedef struct insp_mi_data insp_mi_data;
 
 struct insp_mi_data {
@@ -40,6 +47,7 @@
   int16_t mode;
   int16_t uv_mode;
   int16_t sb_type;
+  int16_t sb_type_chroma;
   int16_t skip;
   int16_t segment_id;
   int16_t dual_filter_type;
@@ -58,6 +66,19 @@
   int16_t uv_palette;
 };
 
+typedef struct insp_sb_data insp_sb_data;
+
+struct insp_sb_data {
+  PARTITION_TREE *partition_tree_luma;
+  PARTITION_TREE *partition_tree_chroma;
+  bool has_separate_chroma_partition_tree;
+  int16_t prediction_samples[MAX_SB_SIZE][MAX_SB_SIZE];
+  int16_t recon_samples[MAX_SB_SIZE][MAX_SB_SIZE];
+  tran_low_t dqcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
+  tran_low_t qcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
+  tran_low_t dequant_values[MAX_MB_PLANE][MAX_SB_SQUARE];
+};
+
 typedef struct insp_frame_data insp_frame_data;
 
 struct insp_frame_data {
@@ -65,6 +86,9 @@
   Accounting *accounting;
 #endif
   insp_mi_data *mi_grid;
+  insp_sb_data *sb_grid;
+  int max_sb_rows;
+  int max_sb_cols;
   int16_t frame_number;
   int show_frame;
   int frame_type;
@@ -82,10 +106,21 @@
   int delta_q_present_flag;
   int delta_q_res;
   int show_existing_frame;
+  int superblock_size;
+  // Points to the same underlying allocations as the decoder
+  YV12_BUFFER_CONFIG recon_frame_buffer;
+  YV12_BUFFER_CONFIG predicted_frame_buffer;
+  YV12_BUFFER_CONFIG prefiltered_frame_buffer;
+  int bit_depth;
+  int render_width;
+  int render_height;
+  int width;
+  int height;
 };
 
 void ifd_init(insp_frame_data *fd, int frame_width, int frame_height);
 void ifd_clear(insp_frame_data *fd);
+int ifd_inspect_superblock(insp_frame_data *fd, void *decoder);
 int ifd_inspect(insp_frame_data *fd, void *decoder, int skip_not_transform);
 
 #ifdef __cplusplus
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c
index 5a695bc..252b2ab 100644
--- a/av1/encoder/bitstream.c
+++ b/av1/encoder/bitstream.c
@@ -145,27 +145,61 @@
   // 0 -> 0   10 -> 1   110 -> 2    111 -> 3
   // Also use the number of reference MVs for a frame type to reduce the
   // number of bits written if there are less than 4 valid DRL indices.
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SEP_COMP_DRL
+  if (has_second_drl(mbmi)) {
+    if (mbmi->mode == NEAR_NEWMV)
+      max_drl_bits = AOMMIN(max_drl_bits, SEP_COMP_DRL_SIZE);
+    else
+      assert(mbmi->mode == NEAR_NEARMV);
+  }
+
+#if CONFIG_SKIP_MODE_ENHANCEMENT
+  if (mbmi->skip_mode)
+    assert(mbmi->ref_mv_idx[0] <
+           mbmi_ext_frame->skip_mvp_candidate_list.ref_mv_count);
+  else
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+    assert(mbmi->ref_mv_idx[0] < mbmi_ext_frame->ref_mv_count[0]);
+  if (has_second_drl(mbmi))
+    assert(mbmi->ref_mv_idx[1] < mbmi_ext_frame->ref_mv_count[1]);
+  assert(mbmi->ref_mv_idx[0] < max_drl_bits + 1);
+  if (has_second_drl(mbmi)) assert(mbmi->ref_mv_idx[1] < max_drl_bits + 1);
+  for (int ref = 0; ref < 1 + has_second_drl(mbmi); ref++) {
+    for (int idx = 0; idx < max_drl_bits; ++idx) {
+      aom_cdf_prob *drl_cdf =
+#if CONFIG_SKIP_MODE_ENHANCEMENT
+          mbmi->skip_mode ? ec_ctx->skip_drl_cdf[AOMMIN(idx, 2)]
+                          : av1_get_drl_cdf(ec_ctx, mbmi_ext_frame->weight[ref],
+                                            mode_ctx, idx);
+#else
+          av1_get_drl_cdf(ec_ctx, mbmi_ext_frame->weight[ref], mode_ctx, idx);
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+      aom_write_symbol(w, mbmi->ref_mv_idx[ref] != idx, drl_cdf, 2);
+      if (mbmi->ref_mv_idx[ref] == idx) break;
+    }
+  }
+#else
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   if (mbmi->skip_mode)
     assert(mbmi->ref_mv_idx <
            mbmi_ext_frame->skip_mvp_candidate_list.ref_mv_count);
   else
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
     assert(mbmi->ref_mv_idx < mbmi_ext_frame->ref_mv_count);
-
   assert(mbmi->ref_mv_idx < max_drl_bits + 1);
   for (int idx = 0; idx < max_drl_bits; ++idx) {
     aom_cdf_prob *drl_cdf =
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         mbmi->skip_mode
             ? ec_ctx->skip_drl_cdf[AOMMIN(idx, 2)]
             : av1_get_drl_cdf(ec_ctx, mbmi_ext_frame->weight, mode_ctx, idx);
 #else
         av1_get_drl_cdf(ec_ctx, mbmi_ext_frame->weight, mode_ctx, idx);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
     aom_write_symbol(w, mbmi->ref_mv_idx != idx, drl_cdf, 2);
     if (mbmi->ref_mv_idx == idx) break;
   }
+#endif  // CONFIG_SEP_COMP_DRL
 }
 #if CONFIG_WARP_REF_LIST
 static void write_warp_ref_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi,
@@ -184,6 +218,16 @@
     if (mbmi->warp_ref_idx == bit_idx) break;
   }
 }
+#if CONFIG_CWG_D067_IMPROVED_WARP
+static void write_warpmv_with_mvd_flag(FRAME_CONTEXT *ec_ctx,
+                                       const MB_MODE_INFO *mbmi,
+                                       aom_writer *w) {
+  aom_write_symbol(
+      w, mbmi->warpmv_with_mvd_flag,
+      ec_ctx->warpmv_with_mvd_flag_cdf[mbmi->sb_type[PLANE_TYPE_Y]], 2);
+}
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
 #endif  // CONFIG_WARP_REF_LIST
 
 #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
@@ -208,6 +252,24 @@
 }
 #endif  // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
 
+#if CONFIG_CWP
+// Write the index for the weighting factor of compound weighted prediction
+static AOM_INLINE void write_cwp_idx(MACROBLOCKD *xd, aom_writer *w,
+                                     const AV1_COMMON *const cm,
+                                     const MB_MODE_INFO *const mbmi) {
+  const int8_t final_idx = get_cwp_coding_idx(mbmi->cwp_idx, 1, cm, mbmi);
+
+  int bit_cnt = 0;
+  const int ctx = 0;
+  for (int idx = 0; idx < MAX_CWP_NUM - 1; ++idx) {
+    aom_write_symbol(w, final_idx != idx,
+                     xd->tile_ctx->cwp_idx_cdf[ctx][bit_cnt], 2);
+    if (final_idx == idx) break;
+    ++bit_cnt;
+  }
+}
+#endif  // CONFIG_CWP
+
 static AOM_INLINE void write_inter_compound_mode(MACROBLOCKD *xd, aom_writer *w,
                                                  PREDICTION_MODE mode,
 #if CONFIG_OPTFLOW_REFINEMENT
@@ -397,10 +459,10 @@
 static AOM_INLINE void write_is_inter(const AV1_COMMON *cm,
                                       const MACROBLOCKD *xd, int segment_id,
                                       aom_writer *w, const int is_inter
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
                                       ,
                                       const int skip_txfm
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
 ) {
   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) {
     assert(is_inter);
@@ -408,11 +470,11 @@
   }
   const int ctx = av1_get_intra_inter_context(xd);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   aom_write_symbol(w, is_inter, ec_ctx->intra_inter_cdf[skip_txfm][ctx], 2);
 #else
   aom_write_symbol(w, is_inter, ec_ctx->intra_inter_cdf[ctx], 2);
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
 }
 
 #if CONFIG_WEDGE_MOD_EXT
@@ -464,7 +526,11 @@
 #if !CONFIG_WARPMV
   write_warp_ref_idx(xd->tile_ctx, mbmi, w);
 #endif  // !CONFIG_WARPMV
-  if (!allow_warp_parameter_signaling(mbmi)) {
+  if (!allow_warp_parameter_signaling(
+#if CONFIG_CWG_D067_IMPROVED_WARP
+          cm,
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+          mbmi)) {
     return;
   }
 #endif  // CONFIG_WARP_REF_LIST
@@ -501,7 +567,11 @@
     const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame, aom_writer *w) {
   const BLOCK_SIZE bsize = mbmi->sb_type[PLANE_TYPE_Y];
   const int allowed_motion_modes =
+#if CONFIG_SEP_COMP_DRL
+      motion_mode_allowed(cm, xd, mbmi_ext_frame->ref_mv_stack[0], mbmi);
+#else
       motion_mode_allowed(cm, xd, mbmi_ext_frame->ref_mv_stack, mbmi);
+#endif  // CONFIG_SEP_COMP_DRL
   assert((allowed_motion_modes & (1 << mbmi->motion_mode)) != 0);
   assert((cm->features.enabled_motion_modes & (1 << mbmi->motion_mode)) != 0);
 
@@ -662,7 +732,7 @@
   }
 }
 
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
 static AOM_INLINE void pack_map_tokens(aom_writer *w, const TokenExtra **tp,
                                        int n, int cols, int rows) {
   const TokenExtra *p = *tp;
@@ -693,7 +763,7 @@
   }
   *tp = p;
 }
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 
 static AOM_INLINE void av1_write_coeffs_txb_facade(
     aom_writer *w, AV1_COMMON *cm, MACROBLOCK *const x, MACROBLOCKD *xd,
@@ -1016,7 +1086,12 @@
 #if CONFIG_OPTFLOW_REFINEMENT
     // Sharp filter is always used whenever optical flow refinement is applied.
     int mb_interp_filter =
-        (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi))
+        (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)
+
+#if CONFIG_REFINEMV
+         || mbmi->refinemv_flag
+#endif  // CONFIG_REFINEMV
+         )
             ? MULTITAP_SHARP
             : cm->features.interp_filter;
 #else
@@ -1029,10 +1104,20 @@
   }
   if (cm->features.interp_filter == SWITCHABLE) {
 #if CONFIG_OPTFLOW_REFINEMENT
-    if (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)) {
+    if (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)
+#if CONFIG_REFINEMV
+        || mbmi->refinemv_flag
+#endif  // CONFIG_REFINEMV
+    ) {
+#if CONFIG_REFINEMV
+      assert(IMPLIES(mbmi->mode >= NEAR_NEARMV_OPTFLOW ||
+                         use_opfl_refine_all(cm, mbmi) || mbmi->refinemv_flag,
+                     mbmi->interp_fltr == MULTITAP_SHARP));
+#else
       assert(IMPLIES(
           mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi),
           mbmi->interp_fltr == MULTITAP_SHARP));
+#endif  // CONFIG_REFINEMV
       return;
     }
 #endif  // CONFIG_OPTFLOW_REFINEMENT
@@ -1202,16 +1287,23 @@
 }
 
 void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
-                       TX_TYPE tx_type, TX_SIZE tx_size, aom_writer *w) {
+                       TX_TYPE tx_type, TX_SIZE tx_size, aom_writer *w
+#if CONFIG_ATC_DCTX_ALIGNED
+                       ,
+                       const int plane, const int eob, const int dc_skip) {
+  if (plane != PLANE_TYPE_Y || dc_skip) return;
+#else
+) {
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   MB_MODE_INFO *mbmi = xd->mi[0];
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   PREDICTION_MODE intra_dir;
   if (mbmi->filter_intra_mode_info.use_filter_intra)
     intra_dir =
         fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode];
   else
     intra_dir = mbmi->mode;
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
   const FeatureFlags *const features = &cm->features;
   const int is_inter = is_inter_block(mbmi, xd->tree_type);
   if (get_ext_tx_types(tx_size, is_inter, features->reduced_tx_set_used) > 1 &&
@@ -1228,7 +1320,7 @@
     // eset == 0 should correspond to a set with only DCT_DCT and there
     // is no need to send the tx_type
     assert(eset > 0);
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
     const int size_info = av1_size_class[tx_size];
     if (!is_inter) {
       const int mode_info = av1_md_class[intra_dir];
@@ -1240,25 +1332,33 @@
     }
 #else
     assert(av1_ext_tx_used[tx_set_type][get_primary_tx_type(tx_type)]);
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
     if (is_inter) {
+#if CONFIG_ATC_DCTX_ALIGNED
+      const int eob_tx_ctx = get_lp2tx_ctx(tx_size, get_txb_bwl(tx_size), eob);
+      aom_write_symbol(
+          w, av1_ext_tx_ind[tx_set_type][tx_type],
+          ec_ctx->inter_ext_tx_cdf[eset][eob_tx_ctx][square_tx_size],
+          av1_num_ext_tx_set[tx_set_type]);
+#else
       aom_write_symbol(w, av1_ext_tx_ind[tx_set_type][tx_type],
                        ec_ctx->inter_ext_tx_cdf[eset][square_tx_size],
                        av1_num_ext_tx_set[tx_set_type]);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     } else {
       if (mbmi->fsc_mode[xd->tree_type == CHROMA_PART]) {
         return;
       }
-#if !CONFIG_ATC_NEWTXSETS
+#if !CONFIG_ATC
       PREDICTION_MODE intra_dir;
       if (mbmi->filter_intra_mode_info.use_filter_intra)
         intra_dir =
             fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode];
       else
         intra_dir = mbmi->mode;
-#endif  // !CONFIG_ATC_NEWTXSETS
+#endif  // !CONFIG_ATC
       aom_write_symbol(
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
           w,
           av1_tx_type_to_idx(get_primary_tx_type(tx_type), tx_set_type,
                              intra_dir, size_info),
@@ -1276,7 +1376,7 @@
           w, av1_ext_tx_ind_intra[tx_set_type][get_primary_tx_type(tx_type)],
           ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_dir],
           av1_num_ext_tx_set_intra[tx_set_type]);
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
     }
   }
 }
@@ -1347,9 +1447,19 @@
                    INTRA_MODES);
 }
 #endif  // !CONFIG_AIMC
-static AOM_INLINE void write_mrl_index(FRAME_CONTEXT *ec_ctx, uint8_t mrl_index,
-                                       aom_writer *w) {
+static AOM_INLINE void write_mrl_index(FRAME_CONTEXT *ec_ctx,
+#if CONFIG_EXT_DIR
+                                       const MB_MODE_INFO *neighbor0,
+                                       const MB_MODE_INFO *neighbor1,
+#endif  // CONFIG_EXT_DIR
+                                       uint8_t mrl_index, aom_writer *w) {
+#if CONFIG_EXT_DIR
+  int ctx = get_mrl_index_ctx(neighbor0, neighbor1);
+  aom_cdf_prob *mrl_cdf = ec_ctx->mrl_index_cdf[ctx];
+  aom_write_symbol(w, mrl_index, mrl_cdf, MRL_LINE_NUMBER);
+#else
   aom_write_symbol(w, mrl_index, ec_ctx->mrl_index_cdf, MRL_LINE_NUMBER);
+#endif  // CONFIG_EXT_DIR
 }
 
 static AOM_INLINE void write_fsc_mode(uint8_t fsc_mode, aom_writer *w,
@@ -1645,7 +1755,11 @@
 #endif  // CONFIG_AIMC
     // Encoding reference line index
     if (cm->seq_params.enable_mrls && av1_is_directional_mode(mode)) {
-      write_mrl_index(ec_ctx, mbmi->mrl_index, w);
+      write_mrl_index(ec_ctx,
+#if CONFIG_EXT_DIR
+                      xd->neighbors[0], xd->neighbors[1],
+#endif  // CONFIG_EXT_DIR
+                      mbmi->mrl_index, w);
     }
   }
 
@@ -1705,19 +1819,39 @@
 
 static INLINE int_mv get_ref_mv_from_stack(
     int ref_idx, const MV_REFERENCE_FRAME *ref_frame, int ref_mv_idx,
-    const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame) {
+    const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame
+#if CONFIG_SEP_COMP_DRL
+    ,
+    const MB_MODE_INFO *mbmi
+#endif  // CONFIG_SEP_COMP_DRL
+) {
   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
+#if CONFIG_SEP_COMP_DRL
+  const CANDIDATE_MV *curr_ref_mv_stack =
+      has_second_drl(mbmi) ? mbmi_ext_frame->ref_mv_stack[ref_idx]
+                           : mbmi_ext_frame->ref_mv_stack[0];
+#else
   const CANDIDATE_MV *curr_ref_mv_stack = mbmi_ext_frame->ref_mv_stack;
+#endif  // CONFIG_SEP_COMP_DRL
 
   if (is_inter_ref_frame(ref_frame[1])) {
     assert(ref_idx == 0 || ref_idx == 1);
+#if CONFIG_SEP_COMP_DRL
+    return ref_idx && !has_second_drl(mbmi)
+               ? curr_ref_mv_stack[ref_mv_idx].comp_mv
+#else
     return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv
-                   : curr_ref_mv_stack[ref_mv_idx].this_mv;
+#endif  // CONFIG_SEP_COMP_DRL
+               : curr_ref_mv_stack[ref_mv_idx].this_mv;
   }
 
   assert(ref_idx == 0);
 #if CONFIG_TIP
+#if CONFIG_SEP_COMP_DRL
+  if (ref_mv_idx < mbmi_ext_frame->ref_mv_count[0]) {
+#else
   if (ref_mv_idx < mbmi_ext_frame->ref_mv_count) {
+#endif  // CONFIG_SEP_COMP_DRL
     return curr_ref_mv_stack[ref_mv_idx].this_mv;
   } else if (is_tip_ref_frame(ref_frame_type)) {
     int_mv zero_mv;
@@ -1736,13 +1870,42 @@
 static INLINE int_mv get_ref_mv(const MACROBLOCK *x, int ref_idx) {
   const MACROBLOCKD *xd = &x->e_mbd;
   const MB_MODE_INFO *mbmi = xd->mi[0];
+#if CONFIG_SEP_COMP_DRL
+  const int ref_mv_idx = get_ref_mv_idx(mbmi, ref_idx);
+#else
   const int ref_mv_idx = mbmi->ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
   assert(IMPLIES(have_nearmv_newmv_in_inter_mode(mbmi->mode),
                  has_second_ref(mbmi)));
   return get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
+#if CONFIG_SEP_COMP_DRL
+                               x->mbmi_ext_frame, mbmi);
+#else
                                x->mbmi_ext_frame);
+#endif  // CONFIG_SEP_COMP_DRL
 }
 
+#if CONFIG_REFINEMV
+// This function write the refinemv_flag ( if require) to the bitstream
+static void write_refinemv_flag(const AV1_COMMON *const cm,
+                                MACROBLOCKD *const xd, aom_writer *w,
+                                BLOCK_SIZE bsize) {
+  const MB_MODE_INFO *const mbmi = xd->mi[0];
+  int signal_refinemv = switchable_refinemv_flag(cm, mbmi);
+
+  if (signal_refinemv) {
+    const int refinemv_ctx = av1_get_refinemv_context(cm, xd, bsize);
+    assert(mbmi->refinemv_flag < REFINEMV_NUM_MODES);
+    aom_write_symbol(w, mbmi->refinemv_flag,
+                     xd->tile_ctx->refinemv_flag_cdf[refinemv_ctx],
+                     REFINEMV_NUM_MODES);
+
+  } else {
+    assert(mbmi->refinemv_flag == get_default_refinemv_flag(cm, mbmi));
+  }
+}
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_FLEX_MVRES
 static void write_pb_mv_precision(const AV1_COMMON *const cm,
                                   MACROBLOCKD *const xd, aom_writer *w) {
@@ -1823,6 +1986,39 @@
 
   write_skip_mode(cm, xd, segment_id, mbmi, w);
 
+#if CONFIG_SKIP_TXFM_OPT
+  if (!mbmi->skip_mode) {
+    write_is_inter(cm, xd, mbmi->segment_id, w, is_inter);
+
+#if CONFIG_IBC_SR_EXT
+    if (!is_inter && av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) {
+      const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type);
+      if (xd->tree_type == CHROMA_PART) assert(use_intrabc == 0);
+#if CONFIG_NEW_CONTEXT_MODELING
+      const int intrabc_ctx = get_intrabc_ctx(xd);
+      aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf[intrabc_ctx], 2);
+#else
+      aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2);
+#endif  // CONFIG_NEW_CONTEXT_MODELING
+    }
+#endif  // CONFIG_IBC_SR_EXT
+  }
+
+  int skip = 0;
+  if (is_inter
+#if CONFIG_IBC_SR_EXT
+      || (!is_inter && is_intrabc_block(mbmi, xd->tree_type))
+#endif  // CONFIG_IBC_SR_EXT
+  ) {
+#if CONFIG_SKIP_MODE_ENHANCEMENT
+    skip = write_skip(cm, xd, segment_id, mbmi, w);
+#else
+    assert(IMPLIES(mbmi->skip_mode,
+                   mbmi->skip_txfm[xd->tree_type == CHROMA_PART]));
+    skip = mbmi->skip_mode ? 1 : write_skip(cm, xd, segment_id, mbmi, w);
+#endif  // !CONFIG_SKIP_MODE_ENHANCEMENT
+  }
+#else
 #if CONFIG_SKIP_MODE_ENHANCEMENT
   const int skip = write_skip(cm, xd, segment_id, mbmi, w);
 #else
@@ -1831,6 +2027,7 @@
   const int skip =
       mbmi->skip_mode ? 1 : write_skip(cm, xd, segment_id, mbmi, w);
 #endif  // !CONFIG_SKIP_MODE_ENHANCEMENT
+#endif  // CONFIG_SKIP_TXFM_OPT
   write_inter_segment_id(cpi, w, seg, segp, skip, 0);
 
   write_cdef(cm, xd, w, skip);
@@ -1841,13 +2038,31 @@
 
   write_delta_q_params(cpi, skip, w);
 
+#if CONFIG_REFINEMV
+  assert(IMPLIES(mbmi->refinemv_flag,
+                 mbmi->skip_mode ? is_refinemv_allowed_skip_mode(cm, mbmi)
+                                 : is_refinemv_allowed(cm, mbmi, bsize)));
+  if (mbmi->refinemv_flag && switchable_refinemv_flag(cm, mbmi)) {
+    assert(mbmi->interinter_comp.type == COMPOUND_AVERAGE);
+    assert(mbmi->comp_group_idx == 0);
+    assert(mbmi->bawp_flag == 0);
+  }
+#if CONFIG_CWP
+  assert(IMPLIES(mbmi->refinemv_flag, mbmi->cwp_idx == CWP_EQUAL));
+#endif  // CONFIG_CWP
+#endif  // CONFIG_REFINEMV
 #if CONFIG_WARPMV
   // Just for debugging purpose
   if (mbmi->mode == WARPMV) {
     assert(mbmi->skip_mode == 0);
     assert(mbmi->motion_mode == WARP_DELTA ||
            mbmi->motion_mode == WARPED_CAUSAL);
+#if CONFIG_SEP_COMP_DRL
+    assert(get_ref_mv_idx(mbmi, 0) == 0);
+    assert(get_ref_mv_idx(mbmi, 1) == 0);
+#else
     assert(mbmi->ref_mv_idx == 0);
+#endif  // CONFIG_SEP_COMP_DRL
     assert(!is_tip_ref_frame(mbmi->ref_frame[0]));
     assert(is_inter);
     assert(!have_drl_index(mode));
@@ -1860,6 +2075,7 @@
   }
 #endif  // CONFIG_WARPMV
 
+#if !CONFIG_SKIP_TXFM_OPT
   if (!mbmi->skip_mode)
     write_is_inter(cm, xd, mbmi->segment_id, w, is_inter
 #if CONFIG_CONTEXT_DERIVATION
@@ -1867,6 +2083,7 @@
                    skip
 #endif  // CONFIG_CONTEXT_DERIVATION
     );
+#endif  // !CONFIG_SKIP_TXFM_OPT
 
 #if CONFIG_SKIP_MODE_ENHANCEMENT
   if (mbmi->skip_mode) {
@@ -1881,6 +2098,13 @@
 
 #if CONFIG_IBC_SR_EXT
   if (!is_inter && av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) {
+#if CONFIG_NEW_CONTEXT_MODELING
+    const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type);
+    const int intrabc_ctx = get_intrabc_ctx(xd);
+    aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf[intrabc_ctx], 2);
+#else
+    aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2);
+#endif  // CONFIG_NEW_CONTEXT_MODELING
     write_intrabc_info(xd, mbmi_ext_frame, w);
     if (is_intrabc_block(mbmi, xd->tree_type)) return;
   }
@@ -1941,11 +2165,19 @@
 #endif
       write_motion_mode(cm, xd, mbmi, mbmi_ext_frame, w);
       int is_warpmv_warp_causal =
-          (mbmi->motion_mode == WARPED_CAUSAL && mbmi->mode == WARPMV);
+          ((mbmi->motion_mode == WARPED_CAUSAL) && mbmi->mode == WARPMV);
       if (mbmi->motion_mode == WARP_DELTA || is_warpmv_warp_causal)
         write_warp_ref_idx(xd->tile_ctx, mbmi, w);
 #endif  // CONFIG_WARPMV
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      if (allow_warpmv_with_mvd_coding(cm, mbmi)) {
+        write_warpmv_with_mvd_flag(xd->tile_ctx, mbmi, w);
+      } else {
+        assert(mbmi->warpmv_with_mvd_flag == 0);
+      }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
 #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
       write_jmvd_scale_mode(xd, w, mbmi);
 #endif  // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
@@ -1963,7 +2195,14 @@
 #endif  // IMPROVED_AMVD
             mbmi_ext_frame->mode_context, ec_ctx, mbmi, mbmi_ext_frame, w);
       else
+#if CONFIG_SEP_COMP_DRL
+      {
+        assert(get_ref_mv_idx(mbmi, 0) == 0);
+        assert(get_ref_mv_idx(mbmi, 1) == 0);
+      }
+#else
         assert(mbmi->ref_mv_idx == 0);
+#endif  // CONFIG_SEP_COMP_DRL
 #if CONFIG_FLEX_MVRES
       if (is_pb_mv_precision_active(cm, mbmi, bsize)) {
         write_pb_mv_precision(cm, xd, w);
@@ -1971,65 +2210,19 @@
 #endif  // CONFIG_FLEX_MVRES
     }
 
-    if (have_newmv_in_each_reference(mode)) {
-      for (ref = 0; ref < 1 + is_compound; ++ref) {
-        nmv_context *nmvc = &ec_ctx->nmvc;
-        const int_mv ref_mv = get_ref_mv(x, ref);
-
-        av1_encode_mv(cpi, w,
-#if CONFIG_FLEX_MVRES
-                      mbmi->mv[ref].as_mv, ref_mv.as_mv,
-#else
-                      &mbmi->mv[ref].as_mv, &ref_mv.as_mv,
-#endif
-                      nmvc,
-#if CONFIG_FLEX_MVRES
-                      pb_mv_precision);
-#else
-                      allow_hp);
-#endif
-      }
-    } else if (mode == NEAR_NEWMV
-#if CONFIG_OPTFLOW_REFINEMENT
-               || mode == NEAR_NEWMV_OPTFLOW
-#endif  // CONFIG_OPTFLOW_REFINEMENT
-#if CONFIG_JOINT_MVD
-               || (is_joint_mvd_coding_mode(mode) && jmvd_base_ref_list == 1)
-#endif  // CONFIG_JOINT_MVD
-    ) {
+#if CONFIG_CWG_D067_IMPROVED_WARP
+    if (mbmi->mode == WARPMV && mbmi->warpmv_with_mvd_flag) {
       nmv_context *nmvc = &ec_ctx->nmvc;
-      const int_mv ref_mv = get_ref_mv(x, 1);
-
-      av1_encode_mv(cpi, w,
-#if CONFIG_FLEX_MVRES
-                    mbmi->mv[1].as_mv, ref_mv.as_mv,
-#else
-                    &mbmi->mv[1].as_mv, &ref_mv.as_mv,
-#endif
-                    nmvc,
-#if CONFIG_FLEX_MVRES
-                    pb_mv_precision);
-#else
-                    allow_hp);
-#endif
-
-    } else if (mode == NEW_NEARMV
-#if CONFIG_OPTFLOW_REFINEMENT
-               || mode == NEW_NEARMV_OPTFLOW
-#endif  // CONFIG_OPTFLOW_REFINEMENT
-#if CONFIG_JOINT_MVD
-               || (is_joint_mvd_coding_mode(mode) && jmvd_base_ref_list == 0)
-#endif  // CONFIG_JOINT_MVD
-    ) {
-      nmv_context *nmvc = &ec_ctx->nmvc;
-      const int_mv ref_mv = get_ref_mv(x, 0);
-
+      WarpedMotionParams ref_warp_model =
+          x->mbmi_ext_frame->warp_param_stack[mbmi->warp_ref_idx].wm_params;
+      const int_mv ref_mv =
+          get_mv_from_wrl(xd, &ref_warp_model, mbmi->pb_mv_precision, bsize,
+                          xd->mi_col, xd->mi_row);
       av1_encode_mv(cpi, w,
 #if CONFIG_FLEX_MVRES
                     mbmi->mv[0].as_mv, ref_mv.as_mv,
-
 #else
-                    &mbmi->mv[0].as_mv, &ref_mv.as_mv,
+                    &mbmi->mv[ref].as_mv, &ref_mv.as_mv,
 #endif
                     nmvc,
 #if CONFIG_FLEX_MVRES
@@ -2038,6 +2231,81 @@
                     allow_hp);
 #endif
     }
+
+    else {
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
+      if (have_newmv_in_each_reference(mode)) {
+        for (ref = 0; ref < 1 + is_compound; ++ref) {
+          nmv_context *nmvc = &ec_ctx->nmvc;
+          const int_mv ref_mv = get_ref_mv(x, ref);
+
+          av1_encode_mv(cpi, w,
+#if CONFIG_FLEX_MVRES
+                        mbmi->mv[ref].as_mv, ref_mv.as_mv,
+#else
+                      &mbmi->mv[ref].as_mv, &ref_mv.as_mv,
+#endif
+                        nmvc,
+#if CONFIG_FLEX_MVRES
+                        pb_mv_precision);
+#else
+                      allow_hp);
+#endif
+        }
+      } else if (mode == NEAR_NEWMV
+#if CONFIG_OPTFLOW_REFINEMENT
+                 || mode == NEAR_NEWMV_OPTFLOW
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_JOINT_MVD
+                 || (is_joint_mvd_coding_mode(mode) && jmvd_base_ref_list == 1)
+#endif  // CONFIG_JOINT_MVD
+      ) {
+        nmv_context *nmvc = &ec_ctx->nmvc;
+        const int_mv ref_mv = get_ref_mv(x, 1);
+
+        av1_encode_mv(cpi, w,
+#if CONFIG_FLEX_MVRES
+                      mbmi->mv[1].as_mv, ref_mv.as_mv,
+#else
+                    &mbmi->mv[1].as_mv, &ref_mv.as_mv,
+#endif
+                      nmvc,
+#if CONFIG_FLEX_MVRES
+                      pb_mv_precision);
+#else
+                    allow_hp);
+#endif
+
+      } else if (mode == NEW_NEARMV
+#if CONFIG_OPTFLOW_REFINEMENT
+                 || mode == NEW_NEARMV_OPTFLOW
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_JOINT_MVD
+                 || (is_joint_mvd_coding_mode(mode) && jmvd_base_ref_list == 0)
+#endif  // CONFIG_JOINT_MVD
+      ) {
+        nmv_context *nmvc = &ec_ctx->nmvc;
+        const int_mv ref_mv = get_ref_mv(x, 0);
+
+        av1_encode_mv(cpi, w,
+#if CONFIG_FLEX_MVRES
+                      mbmi->mv[0].as_mv, ref_mv.as_mv,
+
+#else
+                    &mbmi->mv[0].as_mv, &ref_mv.as_mv,
+#endif
+                      nmvc,
+#if CONFIG_FLEX_MVRES
+                      pb_mv_precision);
+#else
+                    allow_hp);
+#endif
+      }
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
+    }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #if CONFIG_BAWP && !CONFIG_WARPMV
     if (cm->features.enable_bawp &&
         av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col)) {
@@ -2083,6 +2351,12 @@
     if (mbmi->ref_frame[1] != INTRA_FRAME) write_motion_mode(cm, xd, mbmi, w);
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
+#if CONFIG_REFINEMV
+    if (!mbmi->skip_mode) {
+      write_refinemv_flag(cm, xd, w, bsize);
+    }
+#endif  // CONFIG_REFINEMV
+
     // First write idx to indicate current compound inter prediction mode
     // group Group A (0): dist_wtd_comp, compound_average Group B (1):
     // interintra, compound_diffwtd, wedge
@@ -2091,6 +2365,9 @@
 #if CONFIG_OPTFLOW_REFINEMENT
         && mbmi->mode < NEAR_NEARMV_OPTFLOW
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+        && (!mbmi->refinemv_flag || !switchable_refinemv_flag(cm, mbmi))
+#endif  // CONFIG_REFINEMV
 #if IMPROVED_AMVD && CONFIG_JOINT_MVD
         && !is_joint_amvd_coding_mode(mbmi->mode)
 #endif  // IMPROVED_AMVD && CONFIG_JOINT_MVD
@@ -2139,17 +2416,25 @@
         }
       }
     }
+#if CONFIG_CWP
+    if (cm->features.enable_cwp && is_cwp_allowed(mbmi) && !mbmi->skip_mode)
+      write_cwp_idx(xd, w, cm, mbmi);
+#endif  // CONFIG_CWP
     write_mb_interp_filter(cm, xd, w);
   }
 }
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
 static void write_intrabc_drl_idx(int max_ref_bv_num, FRAME_CONTEXT *ec_ctx,
                                   const MB_MODE_INFO *mbmi,
                                   const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
                                   aom_writer *w) {
   assert(!mbmi->skip_mode);
+#if CONFIG_SEP_COMP_DRL
+  assert(mbmi->intrabc_drl_idx < mbmi_ext_frame->ref_mv_count[0]);
+#else
   assert(mbmi->intrabc_drl_idx < mbmi_ext_frame->ref_mv_count);
+#endif
   assert(mbmi->intrabc_drl_idx < max_ref_bv_num);
   (void)mbmi_ext_frame;
 
@@ -2161,7 +2446,7 @@
     ++bit_cnt;
   }
 }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 static AOM_INLINE void write_intrabc_info(
     MACROBLOCKD *xd, const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
@@ -2170,12 +2455,14 @@
   int use_intrabc = is_intrabc_block(mbmi, xd->tree_type);
   if (xd->tree_type == CHROMA_PART) assert(use_intrabc == 0);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+#if !CONFIG_SKIP_TXFM_OPT
 #if CONFIG_NEW_CONTEXT_MODELING
   const int intrabc_ctx = get_intrabc_ctx(xd);
   aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf[intrabc_ctx], 2);
 #else
   aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2);
 #endif  // CONFIG_NEW_CONTEXT_MODELING
+#endif  // !CONFIG_SKIP_TXFM_OPT
 
   if (use_intrabc) {
     assert(mbmi->mode == DC_PRED);
@@ -2185,9 +2472,13 @@
     assert(mbmi->pb_mv_precision == MV_PRECISION_ONE_PEL);
 #endif
 
+#if CONFIG_SEP_COMP_DRL
+    int_mv dv_ref = mbmi_ext_frame->ref_mv_stack[0][0].this_mv;
+#else
     int_mv dv_ref = mbmi_ext_frame->ref_mv_stack[0].this_mv;
+#endif
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
     aom_write_symbol(w, mbmi->intrabc_mode, ec_ctx->intrabc_mode_cdf, 2);
     write_intrabc_drl_idx(MAX_REF_BV_STACK_SIZE, ec_ctx, mbmi, mbmi_ext_frame,
                           w);
@@ -2196,7 +2487,7 @@
       av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc);
 #else
     av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc);
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
   }
 }
 
@@ -2212,8 +2503,25 @@
   if (seg->segid_preskip && seg->update_map)
     write_segment_id(cpi, mbmi, w, seg, segp, 0);
 
-  const int skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w);
+#if CONFIG_SKIP_TXFM_OPT
+  if (av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) {
+    const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type);
+    if (xd->tree_type == CHROMA_PART) assert(use_intrabc == 0);
+#if CONFIG_NEW_CONTEXT_MODELING
+    const int intrabc_ctx = get_intrabc_ctx(xd);
+    aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf[intrabc_ctx], 2);
+#else
+    aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2);
+#endif  // CONFIG_NEW_CONTEXT_MODELING
+  }
 
+  int skip = 0;
+  if (is_intrabc_block(mbmi, xd->tree_type)) {
+    skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w);
+  }
+#else
+  const int skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w);
+#endif  // CONFIG_SKIP_TXFM_OPT
   if (!seg->segid_preskip && seg->update_map)
     write_segment_id(cpi, mbmi, w, seg, segp, skip);
 
@@ -2550,11 +2858,11 @@
       av1_get_block_dimensions(mbmi->sb_type[plane], plane, xd, NULL, NULL,
                                &rows, &cols);
       assert(*tok < tok_end);
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
       pack_map_tokens(w, tok, palette_size_plane, cols, rows);
 #else
       pack_map_tokens(w, tok, palette_size_plane, rows * cols);
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
     }
   }
 
@@ -2597,7 +2905,8 @@
     write_tokens_b(cpi, w, tok, tok_end);
   }
 #if CONFIG_PC_WIENER
-  else {
+  else if (!is_global_intrabc_allowed(cm) && !cm->features.coded_lossless) {
+    // Assert only when LR is enabled.
     assert(1 == av1_get_txk_skip(cm, xd->mi_row, xd->mi_col, 0, 0, 0));
   }
 #endif  // CONFIG_PC_WIENER
@@ -2614,36 +2923,28 @@
                                        const PARTITION_TREE *ptree_luma,
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
                                        aom_writer *w) {
-  if (!is_partition_point(bsize)) {
-    return;
-  }
-
   const int plane = xd->tree_type == CHROMA_PART;
-  if (bsize == BLOCK_8X8 && plane > 0) {
-    return;
-  }
+#if !CONFIG_EXT_RECUR_PARTITIONS
+  if (!is_partition_point(bsize)) return;
+  if (bsize == BLOCK_8X8 && plane > 0) return;
+#endif  // !CONFIG_EXT_RECUR_PARTITIONS
 
 #if CONFIG_EXT_RECUR_PARTITIONS
   const int ssx = cm->seq_params.subsampling_x;
   const int ssy = cm->seq_params.subsampling_y;
-  if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) {
-    assert(p ==
-           sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ssx, ssy));
-    return;
-  }
-
-  PARTITION_TYPE implied_partition;
-  const bool is_part_implied = is_partition_implied_at_boundary(
-      &cm->mi_params, xd->tree_type, ssx, ssy, mi_row, mi_col, bsize,
-      &ptree->chroma_ref_info, &implied_partition);
-  if (is_part_implied) {
-    assert(p == implied_partition);
+  const PARTITION_TYPE derived_partition =
+      av1_get_normative_forced_partition_type(
+          &cm->mi_params, xd->tree_type, ssx, ssy, mi_row, mi_col, bsize,
+          ptree_luma, &ptree->chroma_ref_info);
+  if (derived_partition != PARTITION_INVALID) {
+    assert(p == derived_partition);
     return;
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+
 #if CONFIG_EXT_RECUR_PARTITIONS
   const bool do_split = p != PARTITION_NONE;
   aom_write_symbol(w, do_split, ec_ctx->do_split_cdf[plane][ctx], 2);
@@ -2658,20 +2959,42 @@
                      ec_ctx->do_square_split_cdf[plane][square_split_ctx], 2);
   }
   if (do_square_split) {
+    assert(p == PARTITION_SPLIT);
     return;
   }
   RECT_PART_TYPE rect_type = get_rect_part_type(p);
   if (rect_type_implied_by_bsize(bsize, xd->tree_type) == RECT_INVALID) {
-    aom_write_symbol(w, rect_type, ec_ctx->rect_type_cdf[plane][ctx], 2);
+    aom_write_symbol(w, rect_type, ec_ctx->rect_type_cdf[plane][ctx],
+                     NUM_RECT_PARTS);
   }
-  const bool disable_ext_part = !cm->seq_params.enable_ext_partitions;
   const bool ext_partition_allowed =
-      !disable_ext_part &&
+      cm->seq_params.enable_ext_partitions &&
       is_ext_partition_allowed(bsize, rect_type, xd->tree_type);
   if (ext_partition_allowed) {
     const bool do_ext_partition = (p >= PARTITION_HORZ_3);
     aom_write_symbol(w, do_ext_partition,
                      ec_ctx->do_ext_partition_cdf[plane][rect_type][ctx], 2);
+#if CONFIG_UNEVEN_4WAY
+    if (do_ext_partition) {
+      const bool uneven_4way_partition_allowed =
+          is_uneven_4way_partition_allowed(bsize, rect_type, xd->tree_type);
+      if (uneven_4way_partition_allowed) {
+        const bool do_uneven_4way_partition = (p >= PARTITION_HORZ_4A);
+        aom_write_symbol(
+            w, do_uneven_4way_partition,
+            ec_ctx->do_uneven_4way_partition_cdf[plane][rect_type][ctx], 2);
+        if (do_uneven_4way_partition) {
+          const UNEVEN_4WAY_PART_TYPE uneven_4way_type =
+              (p == PARTITION_HORZ_4A || p == PARTITION_VERT_4A) ? UNEVEN_4A
+                                                                 : UNEVEN_4B;
+          aom_write_symbol(
+              w, uneven_4way_type,
+              ec_ctx->uneven_4way_partition_type_cdf[plane][rect_type][ctx],
+              NUM_UNEVEN_4WAY_PARTS);
+        }
+      }
+    }
+#endif  // CONFIG_UNEVEN_4WAY
   }
 #else   // CONFIG_EXT_RECUR_PARTITIONS
   const int hbs_w = mi_size_wide[bsize] / 2;
@@ -2728,13 +3051,18 @@
   assert(bsize < BLOCK_SIZES_ALL);
   const int hbs_w = mi_size_wide[bsize] / 2;
   const int hbs_h = mi_size_high[bsize] / 2;
-#if !CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+  const int ebs_w = mi_size_wide[bsize] / 8;
+  const int ebs_h = mi_size_high[bsize] / 8;
+#endif  // CONFIG_UNEVEN_4WAY
+#if !CONFIG_EXT_RECUR_PARTITIONS
   const int qbs_w = mi_size_wide[bsize] / 4;
   const int qbs_h = mi_size_high[bsize] / 4;
-#endif  // !CONFIG_H_PARTITION
+#endif  // !CONFIG_EXT_RECUR_PARTITIONS
   assert(ptree);
   const PARTITION_TYPE partition = ptree->partition;
   const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
+  if (subsize == BLOCK_INVALID) return;
 
   if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return;
 
@@ -2743,7 +3071,12 @@
       get_partition_plane_end(xd->tree_type, av1_num_planes(cm));
   for (int plane = plane_start; plane < plane_end; ++plane) {
     int rcol0, rcol1, rrow0, rrow1;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    if ((cm->rst_info[plane].frame_restoration_type != RESTORE_NONE ||
+         cm->rst_info[plane].frame_cross_restoration_type != RESTORE_NONE) &&
+#else
     if (cm->rst_info[plane].frame_restoration_type != RESTORE_NONE &&
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
         av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize,
                                            &rcol0, &rcol1, &rrow0, &rrow1)) {
       const int rstride = cm->rst_info[plane].horz_units_per_tile;
@@ -2762,9 +3095,12 @@
 #if CONFIG_EXT_RECUR_PARTITIONS
   write_partition(cm, xd, mi_row, mi_col, partition, bsize, ptree, ptree_luma,
                   w);
-  if (!is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) {
+  const int track_ptree_luma =
+      is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize);
+  if (!track_ptree_luma) {
     ptree_luma = NULL;
   }
+  assert(IMPLIES(track_ptree_luma, ptree_luma));
 #else
   write_partition(cm, xd, mi_row, mi_col, partition, bsize, w);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
@@ -2805,7 +3141,96 @@
 #endif
       break;
 #if CONFIG_EXT_RECUR_PARTITIONS
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0],
+                     get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col,
+                     subsize);
+      if (mi_row + ebs_h >= mi_params->mi_rows) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1],
+                     get_partition_subtree_const(ptree_luma, 1), mi_row + ebs_h,
+                     mi_col, bsize_med);
+      if (mi_row + 3 * ebs_h >= mi_params->mi_rows) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2],
+                     get_partition_subtree_const(ptree_luma, 2),
+                     mi_row + 3 * ebs_h, mi_col, bsize_big);
+      if (mi_row + 7 * ebs_h >= mi_params->mi_rows) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[3],
+                     get_partition_subtree_const(ptree_luma, 3),
+                     mi_row + 7 * ebs_h, mi_col, subsize);
+      break;
+    }
+    case PARTITION_HORZ_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0],
+                     get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col,
+                     subsize);
+      if (mi_row + ebs_h >= mi_params->mi_rows) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1],
+                     get_partition_subtree_const(ptree_luma, 1), mi_row + ebs_h,
+                     mi_col, bsize_big);
+      if (mi_row + 5 * ebs_h >= mi_params->mi_rows) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2],
+                     get_partition_subtree_const(ptree_luma, 2),
+                     mi_row + 5 * ebs_h, mi_col, bsize_med);
+      if (mi_row + 7 * ebs_h >= mi_params->mi_rows) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[3],
+                     get_partition_subtree_const(ptree_luma, 3),
+                     mi_row + 7 * ebs_h, mi_col, subsize);
+      break;
+    }
+    case PARTITION_VERT_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0],
+                     get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col,
+                     subsize);
+      if (mi_col + ebs_w >= mi_params->mi_cols) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1],
+                     get_partition_subtree_const(ptree_luma, 1), mi_row,
+                     mi_col + ebs_w, bsize_med);
+      if (mi_col + 3 * ebs_w >= mi_params->mi_cols) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2],
+                     get_partition_subtree_const(ptree_luma, 2), mi_row,
+                     mi_col + 3 * ebs_w, bsize_big);
+      if (mi_col + 7 * ebs_w >= mi_params->mi_cols) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[3],
+                     get_partition_subtree_const(ptree_luma, 3), mi_row,
+                     mi_col + 7 * ebs_w, subsize);
+      break;
+    }
+    case PARTITION_VERT_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0],
+                     get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col,
+                     subsize);
+      if (mi_col + ebs_w >= mi_params->mi_cols) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1],
+                     get_partition_subtree_const(ptree_luma, 1), mi_row,
+                     mi_col + ebs_w, bsize_big);
+      if (mi_col + 5 * ebs_w >= mi_params->mi_cols) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2],
+                     get_partition_subtree_const(ptree_luma, 2), mi_row,
+                     mi_col + 5 * ebs_w, bsize_med);
+      if (mi_col + 7 * ebs_w >= mi_params->mi_cols) break;
+      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[3],
+                     get_partition_subtree_const(ptree_luma, 3), mi_row,
+                     mi_col + 7 * ebs_w, subsize);
+      break;
+    }
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_HORZ_3:
     case PARTITION_VERT_3:
       for (int i = 0; i < 4; ++i) {
@@ -2829,36 +3254,6 @@
                        this_mi_col, this_bsize);
       }
       break;
-#else
-    case PARTITION_HORZ_3:
-      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0],
-                     get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col,
-                     subsize);
-      if (mi_row + qbs_h >= mi_params->mi_rows) break;
-      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1],
-                     get_partition_subtree_const(ptree_luma, 1), mi_row,
-                     mi_row + qbs_h, mi_col,
-                     get_partition_subsize(bsize, PARTITION_HORZ));
-      if (mi_row + 3 * qbs_h >= mi_params->mi_rows) break;
-      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2],
-                     get_partition_subtree_const(ptree_luma, 2), mi_row,
-                     mi_row + 3 * qbs_h, mi_col, subsize);
-      break;
-    case PARTITION_VERT_3:
-      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0],
-                     get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col,
-                     subsize);
-      if (mi_col + qbs_w >= mi_params->mi_cols) break;
-      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1],
-                     get_partition_subtree_const(ptree_luma, 1), mi_row,
-                     mi_col + qbs_w,
-                     get_partition_subsize(bsize, PARTITION_VERT));
-      if (mi_col + 3 * qbs_w >= mi_params->mi_cols) break;
-      write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2],
-                     get_partition_subtree_const(ptree_luma, 2), mi_row,
-                     mi_col + 3 * qbs_w, subsize);
-      break;
-#endif  // CONFIG_H_PARTITION
     case PARTITION_SPLIT:
       write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0],
                      get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col,
@@ -2873,7 +3268,7 @@
                      get_partition_subtree_const(ptree_luma, 3), mi_row + hbs_h,
                      mi_col + hbs_w, subsize);
       break;
-#else
+#else   // CONFIG_EXT_RECUR_PARTITIONS
     case PARTITION_SPLIT:
       write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], mi_row,
                      mi_col, subsize);
@@ -3024,11 +3419,25 @@
   if (!cm->seq_params.enable_restoration) return;
   if (is_global_intrabc_allowed(cm)) return;
   const int num_planes = av1_num_planes(cm);
+#if CONFIG_FLEXIBLE_RU_SIZE
+  int luma_none = 1, chroma_none = 1;
+#else
   int all_none = 1, chroma_none = 1;
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
   for (int p = 0; p < num_planes; ++p) {
     RestorationInfo *rsi = &cm->rst_info[p];
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    if (rsi->frame_restoration_type != RESTORE_NONE ||
+        rsi->frame_cross_restoration_type != RESTORE_NONE) {
+      if (p == 0) assert(rsi->frame_cross_restoration_type == RESTORE_NONE);
+#else
     if (rsi->frame_restoration_type != RESTORE_NONE) {
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+#if CONFIG_FLEXIBLE_RU_SIZE
+      luma_none &= p > 0;
+#else
       all_none = 0;
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
       chroma_none &= p == 0;
     }
 #if CONFIG_LR_FLEX_SYNTAX
@@ -3046,11 +3455,17 @@
         aom_wb_write_bit(wb, 0);
       } else {
         aom_wb_write_bit(wb, 1);
+        int tools_count = cm->features.lr_tools_count[p];
         for (int i = 1; i < RESTORE_SWITCHABLE_TYPES; ++i) {
           if (!(plane_lr_tools_disable_mask & (1 << i))) {
-            aom_wb_write_bit(wb, ((sw_lr_tools_disable_mask >> i) & 1));
+            const int disable_tool = (sw_lr_tools_disable_mask >> i) & 1;
+            aom_wb_write_bit(wb, disable_tool);
             plane_lr_tools_disable_mask |=
                 (sw_lr_tools_disable_mask & (1 << i));
+            tools_count -= disable_tool;
+            // if tools_count becomes 2 break from the loop since we
+            // do not allow any other tool to be disabled.
+            if (tools_count == 2) break;
           }
         }
         av1_set_lr_tools(plane_lr_tools_disable_mask, p, &cm->features);
@@ -3107,7 +3522,29 @@
                                              ? NUM_WIENERNS_CLASS_INIT_LUMA
                                              : NUM_WIENERNS_CLASS_INIT_CHROMA));
 #endif  // CONFIG_WIENER_NONSEP
+
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    if (p > 0) {
+      aom_wb_write_bit(wb, rsi->frame_cross_restoration_type != RESTORE_NONE);
+    }
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   }
+#if CONFIG_FLEXIBLE_RU_SIZE
+  int size = cm->rst_info[0].max_restoration_unit_size;
+  if (!luma_none) {
+    aom_wb_write_bit(wb, cm->rst_info[0].restoration_unit_size == size >> 1);
+    if (cm->rst_info[0].restoration_unit_size != size >> 1)
+      aom_wb_write_bit(wb, cm->rst_info[0].restoration_unit_size == size);
+  }
+  if (!chroma_none) {
+    size = cm->rst_info[1].max_restoration_unit_size;
+    aom_wb_write_bit(wb, cm->rst_info[1].restoration_unit_size == size >> 1);
+    if (cm->rst_info[1].restoration_unit_size != size >> 1)
+      aom_wb_write_bit(wb, cm->rst_info[1].restoration_unit_size == size);
+    assert(cm->rst_info[2].restoration_unit_size ==
+           cm->rst_info[1].restoration_unit_size);
+  }
+#else
   if (!all_none) {
 #if CONFIG_BLOCK_256
     assert(cm->sb_size == BLOCK_64X64 || cm->sb_size == BLOCK_128X128 ||
@@ -3161,6 +3598,7 @@
              cm->rst_info[1].restoration_unit_size);
     }
   }
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
 }
 
 static AOM_INLINE void write_wiener_filter(MACROBLOCKD *xd, int wiener_win,
@@ -3313,8 +3751,9 @@
     aom_write_literal(wb, match, 1);
     if (match) break;
   }
-  assert(
-      IMPLIES(!match, ref == bank->bank_size_for_class[wiener_class_id] - 1));
+  assert(IMPLIES(
+      !match,
+      ref == AOMMAX(0, bank->bank_size_for_class[wiener_class_id] - 1)));
   return exact_match;
 }
 #endif  // CONFIG_LR_MERGE_COEFFS
@@ -3323,7 +3762,12 @@
     MACROBLOCKD *xd, int plane, const WienerNonsepInfo *wienerns_info,
     WienerNonsepInfoBank *bank, aom_writer *wb) {
   const WienernsFilterParameters *nsfilter_params =
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      get_wienerns_parameters(xd->current_base_qindex, plane != AOM_PLANE_Y,
+                              wienerns_info->is_cross_filter);
+#else
       get_wienerns_parameters(xd->current_base_qindex, plane != AOM_PLANE_Y);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   int skip_filter_write_for_class[WIENERNS_MAX_CLASSES] = { 0 };
   int ref_for_class[WIENERNS_MAX_CLASSES] = { 0 };
 #if CONFIG_LR_MERGE_COEFFS
@@ -3344,7 +3788,6 @@
   for (int c_id = 0; c_id < num_classes; ++c_id) {
     if (skip_filter_write_for_class[c_id]) continue;
     const int ref = ref_for_class[c_id];
-
     const WienerNonsepInfo *ref_wienerns_info =
         av1_constref_from_wienerns_bank(bank, ref, c_id);
     const int16_t *wienerns_info_nsfilter =
@@ -3437,13 +3880,25 @@
     aom_writer *const w, int plane, FRAME_COUNTS *counts) {
   const RestorationInfo *rsi = cm->rst_info + plane;
   RestorationType frame_rtype = rsi->frame_restoration_type;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  RestorationType frame_cross_rtype = rsi->frame_cross_restoration_type;
+  RestorationType unit_cross_rtype = rui->cross_restoration_type;
+  assert(frame_rtype != RESTORE_NONE || frame_cross_rtype != RESTORE_NONE);
+#else
   assert(frame_rtype != RESTORE_NONE);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 
   (void)counts;
   assert(!cm->features.all_lossless);
 
   const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN;
   RestorationType unit_rtype = rui->restoration_type;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  WienerNonsepInfo *info = (WienerNonsepInfo *)&rui->wienerns_info;
+  info->is_cross_filter = 0;
+  info = (WienerNonsepInfo *)&rui->wienerns_cross_info;
+  info->is_cross_filter = 1;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #if CONFIG_LR_FLEX_SYNTAX
   assert(((cm->features.lr_tools_disable_mask[plane] >> rui->restoration_type) &
           1) == 0);
@@ -3533,6 +3988,19 @@
     }
 #endif  // CONFIG_PC_WIENER
   }
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  if (frame_cross_rtype == RESTORE_WIENER_NONSEP) {
+    aom_write_symbol(w, unit_cross_rtype != RESTORE_NONE,
+                     xd->tile_ctx->wienerns_restore_cdf, 2);
+#if CONFIG_ENTROPY_STATS
+    ++counts->wienerns_restore[unit_cross_rtype != RESTORE_NONE];
+#endif  // CONFIG_ENTROPY_STATS
+    if (unit_cross_rtype != RESTORE_NONE) {
+      write_wienerns_filter(xd, plane, &rui->wienerns_cross_info,
+                            &xd->wienerns_cross_info[plane], w);
+    }
+  }
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 }
 
 static AOM_INLINE void encode_loopfilter(AV1_COMMON *cm,
@@ -4378,7 +4846,7 @@
     aom_wb_write_bit(wb, uv_neq_y);
     if (uv_neq_y) {
       for (int i = 1; i < RESTORE_SWITCHABLE_TYPES; ++i) {
-        if (DEF_UV_LR_TOOLS_DISABLE_MASK | (1 << i)) continue;
+        if (DEF_UV_LR_TOOLS_DISABLE_MASK & (1 << i)) continue;
         aom_wb_write_bit(wb, (seq_params->lr_tools_disable_mask[1] >> i) & 1);
       }
     }
@@ -4392,6 +4860,10 @@
   aom_wb_write_bit(wb, seq_params->enable_refmvbank);
 #endif  // CONFIG_REF_MV_BANK
   aom_wb_write_bit(wb, seq_params->explicit_ref_frame_map);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // 0 : show_existing_frame, 1: implicit derviation
+  aom_wb_write_bit(wb, seq_params->enable_frame_output_order);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   // A bit is sent here to indicate if the max number of references is 7. If
   // this bit is 0, then two more bits are sent to indicate the exact number
   // of references allowed (range: 3 to 6).
@@ -4416,6 +4888,12 @@
 #if CONFIG_BAWP
   aom_wb_write_bit(wb, seq_params->enable_bawp);
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  aom_wb_write_bit(wb, seq_params->enable_cwp);
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  aom_wb_write_bit(wb, seq_params->enable_imp_msk_bld);
+#endif  // CONFIG_D071_IMP_MSK_BLD
   aom_wb_write_bit(wb, seq_params->enable_fsc);
 #if CONFIG_CCSO
   aom_wb_write_bit(wb, seq_params->enable_ccso);
@@ -4426,6 +4904,9 @@
 #if CONFIG_ORIP
   aom_wb_write_bit(wb, seq_params->enable_orip);
 #endif
+#if CONFIG_IDIF
+  aom_wb_write_bit(wb, seq_params->enable_idif);
+#endif  // CONFIG_IDIF
 #if CONFIG_OPTFLOW_REFINEMENT
   if (seq_params->order_hint_info.enable_order_hint)
     aom_wb_write_literal(wb, seq_params->enable_opfl_refine, 2);
@@ -4435,6 +4916,10 @@
   aom_wb_write_bit(wb, seq_params->enable_adaptive_mvd);
 #endif  // CONFIG_ADAPTIVE_MVD
 
+#if CONFIG_REFINEMV
+  aom_wb_write_bit(wb, seq_params->enable_refinemv);
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_FLEX_MVRES
   aom_wb_write_bit(wb, seq_params->enable_flex_mvres);
 #endif  // CONFIG_FLEX_MVRES
@@ -4449,22 +4934,41 @@
 #if CONFIG_EXT_RECUR_PARTITIONS
   aom_wb_write_bit(wb, seq_params->enable_ext_partitions);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  if (seq_params->reduced_still_picture_hdr) {
+    assert(seq_params->enable_global_motion == 0);
+  } else {
+    aom_wb_write_bit(wb, seq_params->enable_global_motion);
+  }
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 }
 
 static AOM_INLINE void write_global_motion_params(
     const WarpedMotionParams *params, const WarpedMotionParams *ref_params,
 #if !CONFIG_FLEX_MVRES
     struct aom_write_bit_buffer *wb, int allow_hp) {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  (void)allow_hp;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 #else
     struct aom_write_bit_buffer *wb, MvSubpelPrecision precision) {
   const int precision_loss = get_gm_precision_loss(precision);
-#endif
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  (void)precision_loss;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+#endif  // !CONFIG_FLEX_MVRES
   const TransformationType type = params->wmtype;
 
   aom_wb_write_bit(wb, type != IDENTITY);
   if (type != IDENTITY) {
     aom_wb_write_bit(wb, type == ROTZOOM);
-    if (type != ROTZOOM) aom_wb_write_bit(wb, type == TRANSLATION);
+    if (type != ROTZOOM) {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+      assert(type == AFFINE);
+#else
+      aom_wb_write_bit(wb, type == TRANSLATION);
+#endif  // !CONFIG_IMPROVED_GLOBAL_MOTION
+    }
   }
 
   if (type >= ROTZOOM) {
@@ -4492,6 +4996,10 @@
   }
 
   if (type >= TRANSLATION) {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+    const int trans_prec_diff = GM_TRANS_PREC_DIFF;
+    const int trans_max = GM_TRANS_MAX;
+#else
 #if CONFIG_FLEX_MVRES
     const int trans_bits = (type == TRANSLATION)
                                ? GM_ABS_TRANS_ONLY_BITS - precision_loss
@@ -4506,14 +5014,16 @@
     const int trans_prec_diff = (type == TRANSLATION)
                                     ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
                                     : GM_TRANS_PREC_DIFF;
-#endif
+#endif  // CONFIG_FLEX_MVRES
+    const int trans_max = (1 << trans_bits);
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 
     aom_wb_write_signed_primitive_refsubexpfin(
-        wb, (1 << trans_bits) + 1, SUBEXPFIN_K,
+        wb, trans_max + 1, SUBEXPFIN_K,
         (ref_params->wmmat[0] >> trans_prec_diff),
         (params->wmmat[0] >> trans_prec_diff));
     aom_wb_write_signed_primitive_refsubexpfin(
-        wb, (1 << trans_bits) + 1, SUBEXPFIN_K,
+        wb, trans_max + 1, SUBEXPFIN_K,
         (ref_params->wmmat[1] >> trans_prec_diff),
         (params->wmmat[1] >> trans_prec_diff));
   }
@@ -4522,11 +5032,88 @@
 static AOM_INLINE void write_global_motion(AV1_COMP *cpi,
                                            struct aom_write_bit_buffer *wb) {
   AV1_COMMON *const cm = &cpi->common;
+  int num_total_refs = cm->ref_frames_info.num_total_refs;
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  assert(cm->cur_frame->num_ref_frames == num_total_refs);
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
   int frame;
-  for (frame = 0; frame < cm->ref_frames_info.num_total_refs; ++frame) {
+
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  const SequenceHeader *const seq_params = &cm->seq_params;
+  if (!seq_params->enable_global_motion) {
+    return;
+  }
+
+  bool use_global_motion = false;
+  for (frame = 0; frame < num_total_refs; ++frame) {
+    if (cm->global_motion[frame].wmtype != IDENTITY) {
+      use_global_motion = true;
+      break;
+    }
+  }
+
+  aom_wb_write_bit(wb, use_global_motion);
+  if (!use_global_motion) {
+    return;
+  }
+
+  int our_ref = cpi->gm_info.base_model_our_ref;
+  int their_ref = cpi->gm_info.base_model_their_ref;
+  aom_wb_write_primitive_quniform(wb, num_total_refs + 1, our_ref);
+  if (our_ref >= num_total_refs) {
+    // Special case: Use IDENTITY model
+    // Nothing more to code
+    assert(their_ref == -1);
+  } else {
+    RefCntBuffer *buf = get_ref_frame_buf(cm, our_ref);
+    assert(buf);
+    int their_num_refs = buf->num_ref_frames;
+    if (their_num_refs == 0) {
+      // Special case: if an intra/key frame is used as a ref, use an
+      // IDENTITY model
+      // Nothing more to code
+      assert(their_ref == -1);
+    } else {
+      aom_wb_write_primitive_quniform(wb, their_num_refs, their_ref);
+    }
+  }
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
+  for (frame = 0; frame < num_total_refs; ++frame) {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+    int temporal_distance;
+    if (seq_params->order_hint_info.enable_order_hint) {
+      const RefCntBuffer *const ref_buf = get_ref_frame_buf(cm, frame);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      const int ref_order_hint = ref_buf->display_order_hint;
+      const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
+        const int ref_order_hint = ref_buf->order_hint;
+        const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      temporal_distance = get_relative_dist(&seq_params->order_hint_info,
+                                            cur_order_hint, ref_order_hint);
+    } else {
+      temporal_distance = 1;
+    }
+
+    if (temporal_distance == 0) {
+      // Don't code global motion for frames at the same temporal instant
+      assert(cm->global_motion[frame].wmtype == IDENTITY);
+      continue;
+    }
+
+    WarpedMotionParams ref_params_;
+    av1_scale_warp_model(&cm->base_global_motion_model,
+                         cm->base_global_motion_distance, &ref_params_,
+                         temporal_distance);
+    WarpedMotionParams *ref_params = &ref_params_;
+#else
     const WarpedMotionParams *ref_params =
         cm->prev_frame ? &cm->prev_frame->global_motion[frame]
                        : &default_warp_params;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
     write_global_motion_params(&cm->global_motion[frame], ref_params, wb,
 #if !CONFIG_FLEX_MVRES
                                cm->features.allow_high_precision_mv);
@@ -4610,11 +5197,7 @@
     }
   }
   aom_wb_write_bit(wb, features->disable_cdf_update);
-#if DS_FRAME_LEVEL
-  if (current_frame->frame_type == KEY_FRAME) {
-    aom_wb_write_literal(wb, features->ds_filter_type, 2);
-  }
-#endif  // DS_FRAME_LEVEL
+
   if (seq_params->force_screen_content_tools == 2) {
     aom_wb_write_bit(wb, features->allow_screen_content_tools);
   } else {
@@ -4734,11 +5317,11 @@
       if (features->allow_global_intrabc) {
         aom_wb_write_bit(wb, features->allow_local_intrabc);
       }
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
       aom_wb_write_primitive_quniform(
           wb, MAX_MAX_DRL_BITS - MIN_MAX_DRL_BITS + 1,
           features->max_drl_bits - MIN_MAX_DRL_BITS);
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
     }
 #endif  // CONFIG_IBC_SR_EXT
   } else {
@@ -4753,11 +5336,11 @@
         if (features->allow_global_intrabc) {
           aom_wb_write_bit(wb, features->allow_local_intrabc);
         }
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
         aom_wb_write_primitive_quniform(
             wb, MAX_MAX_DRL_BITS - MIN_MAX_DRL_BITS + 1,
             features->max_drl_bits - MIN_MAX_DRL_BITS);
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
       }
 #endif  // CONFIG_IBC_SR_EXT
     } else if (current_frame->frame_type == INTER_FRAME ||
@@ -4997,6 +5580,15 @@
     aom_wb_write_bit(wb, features->enable_bawp);
 #endif  // CONFIG_BAWP
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  if (!frame_is_intra_only(cm) &&
+      (features->enabled_motion_modes & (1 << WARP_DELTA)) != 0) {
+    aom_wb_write_bit(wb, features->allow_warpmv_mode);
+  } else {
+    assert(IMPLIES(!frame_is_intra_only(cm), !features->allow_warpmv_mode));
+  }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
   aom_wb_write_bit(wb, features->reduced_tx_set_used);
 
   if (!frame_is_intra_only(cm)) write_global_motion(cpi, wb);
@@ -5881,7 +6473,16 @@
   }
 
   const int write_frame_header =
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      (cpi->num_tg > 1 ||
+       (encode_show_existing_frame(cm) &&
+        (!cm->seq_params.order_hint_info.enable_order_hint ||
+         !cm->seq_params.enable_frame_output_order)) ||
+       (encode_show_existing_frame(cm) &&
+        cm->cur_frame->frame_type == KEY_FRAME)
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       (cpi->num_tg > 1 || encode_show_existing_frame(cm)
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 #if CONFIG_TIP
        || (cm->features.tip_frame_mode == TIP_FRAME_AS_OUTPUT)
 #endif  // CONFIG_TIP
@@ -5907,7 +6508,19 @@
     data += fh_info.total_length;
   }
 
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // When enable_frame_output_order == 1, the OBU packet of show_existing_frame
+  // is not signaled for non-error-resilient mode.
+  // For error-resilienet mode, still an OBU is signaled.
+  if ((cm->seq_params.order_hint_info.enable_order_hint &&
+       cm->seq_params.enable_frame_output_order && cm->show_existing_frame &&
+       !cm->features.error_resilient_mode) ||
+      ((!cm->seq_params.order_hint_info.enable_order_hint ||
+        !cm->seq_params.enable_frame_output_order) &&
+       encode_show_existing_frame(cm))
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   if (encode_show_existing_frame(cm)
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 #if CONFIG_TIP
       || (cm->features.tip_frame_mode == TIP_FRAME_AS_OUTPUT)
 #endif  // CONFIG_TIP
@@ -5916,7 +6529,7 @@
   } else {
     // Since length_field is determined adaptively after frame header
     // encoding, saved_wb must be adjusted accordingly.
-    saved_wb.bit_buffer += length_field;
+    if (saved_wb.bit_buffer) saved_wb.bit_buffer += length_field;
 
     //  Each tile group obu will be preceded by 4-byte size of the tile group
     //  obu
diff --git a/av1/encoder/bitstream.h b/av1/encoder/bitstream.h
index 7257870..7148835 100644
--- a/av1/encoder/bitstream.h
+++ b/av1/encoder/bitstream.h
@@ -49,7 +49,12 @@
                            aom_writer *w);
 
 void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
-                       TX_TYPE tx_type, TX_SIZE tx_size, aom_writer *w);
+                       TX_TYPE tx_type, TX_SIZE tx_size, aom_writer *w
+#if CONFIG_ATC_DCTX_ALIGNED
+                       ,
+                       const int plane, const int eob, const int dc_skip
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+);
 
 #if CONFIG_CROSS_CHROMA_TX
 void av1_write_cctx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
diff --git a/av1/encoder/block.h b/av1/encoder/block.h
index 3051f9d..ac9663e 100644
--- a/av1/encoder/block.h
+++ b/av1/encoder/block.h
@@ -113,6 +113,10 @@
   tran_low_t *coeff;
   //! Location of the end of qcoeff (end of block).
   uint16_t *eobs;
+#if CONFIG_ATC_DCTX_ALIGNED
+  //! Location of the beginning of qcoeff (beginning of block).
+  uint16_t *bobs;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   //! Contexts used to code the transform coefficients.
   uint8_t *txb_entropy_ctx;
   //! A buffer containing the source frame.
@@ -154,7 +158,7 @@
   //! Cost to skip txfm for the current AOM_PLANE_V txfm block.
   int v_txb_skip_cost[V_TXB_SKIP_CONTEXTS][2];
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   //! Cost for encoding the base_eob level of a low-frequency coefficient
   int base_lf_eob_cost[SIG_COEF_CONTEXTS_EOB][LF_BASE_SYMBOLS - 1];
   //! Cost for encoding the base level of a low-frequency coefficient
@@ -162,7 +166,7 @@
   //! Cost for encoding an increment to the low-frequency coefficient
   int lps_lf_cost[LF_LEVEL_CONTEXTS]
                  [COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #if CONFIG_PAR_HIDING
   //! Cost for encoding the base level of a parity-hidden coefficient
   int base_ph_cost[COEFF_BASE_PH_CONTEXTS][4];
@@ -202,13 +206,24 @@
   //! Cost for encoding an increment to the coefficient for IDTX blocks
   int lps_cost_skip[IDTX_LEVEL_CONTEXTS]
                    [COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1];
+#if CONFIG_ATC_DCTX_ALIGNED
+  /*! \brief Cost for encoding the base_bob of a level for IDTX blocks.
+   *
+   * Decoder uses base_bob to derive the base_level as base_bob := base_bob+1.
+   */
+  int base_bob_cost[SIG_COEF_CONTEXTS_BOB][3];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 } LV_MAP_COEFF_COST;
 
 /*! \brief Costs for encoding the eob.
  */
 typedef struct {
   //! eob_cost.
+#if CONFIG_ATC_DCTX_ALIGNED
+  int eob_cost[EOB_MAX_SYMS];
+#else
   int eob_cost[2][11];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 } LV_MAP_EOB_COST;
 
 /*! \brief Stores the transforms coefficients for the whole superblock.
@@ -218,6 +233,10 @@
   tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
   //! Where the transformed coefficients end.
   uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+#if CONFIG_ATC_DCTX_ALIGNED
+  //! Where the transformed coefficients begin.
+  uint16_t bobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   /*! \brief Transform block entropy contexts.
    *
    * Each element is used as a bit field.
@@ -241,7 +260,7 @@
   //! Global mvs
   int_mv global_mvs[INTER_REFS_PER_FRAME];
   //! skip_mvp_candidate_list is the MVP list for skip mode.
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   SKIP_MODE_MVP_LIST skip_mvp_candidate_list;
 #endif
 
@@ -265,14 +284,23 @@
  * memory.
  */
 typedef struct {
+#if CONFIG_SEP_COMP_DRL
+  //! \copydoc MB_MODE_INFO_EXT::ref_mv_stack
+  CANDIDATE_MV ref_mv_stack[2][USABLE_REF_MV_STACK_SIZE];
+  //! \copydoc MB_MODE_INFO_EXT::weight
+  uint16_t weight[2][USABLE_REF_MV_STACK_SIZE];
+  //! \copydoc MB_MODE_INFO_EXT::ref_mv_count
+  uint8_t ref_mv_count[2];
+#else
   //! \copydoc MB_MODE_INFO_EXT::ref_mv_stack
   CANDIDATE_MV ref_mv_stack[USABLE_REF_MV_STACK_SIZE];
   //! \copydoc MB_MODE_INFO_EXT::weight
   uint16_t weight[USABLE_REF_MV_STACK_SIZE];
   //! \copydoc MB_MODE_INFO_EXT::ref_mv_count
   uint8_t ref_mv_count;
+#endif  // CONFIG_SEP_COMP_DRL
   //! skip_mvp_candidate_list is the MVP list for skip mode.
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   SKIP_MODE_MVP_LIST skip_mvp_candidate_list;
 #endif
   // TODO(Ravi/Remya): Reduce the buffer size of global_mvs
@@ -339,6 +367,10 @@
   int rate;
   //! Location of the end of non-zero entries.
   uint16_t eob;
+#if CONFIG_ATC_DCTX_ALIGNED
+  //! Location of the first of non-zero entries.
+  uint16_t bob;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   //! Transform type used on the current block.
   TX_TYPE tx_type;
   //! Unknown usage
@@ -390,11 +422,19 @@
   //! Current interpolation filter.
   InterpFilter interp_fltr;
   //! Refmv index in the drl.
+#if CONFIG_SEP_COMP_DRL
+  int ref_mv_idx[2];
+#else
   int ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
   //! Whether the predictors are GLOBALMV.
   int is_global[2];
   //! Current parameters for interinter mode.
   INTERINTER_COMPOUND_DATA interinter_comp;
+#if CONFIG_CWP
+  //! Index for compound weighted prediction parameters.
+  int cwp_idx;
+#endif  // CONFIG_CWP
 } COMP_RD_STATS;
 
 /*! \brief Contains buffers used to speed up rdopt for obmc.
@@ -495,19 +535,37 @@
 
 /*!\cond */
 #if CONFIG_BLOCK_256
+
 #define BLOCK_256_COUNT 1
 #define BLOCK_128_COUNT 3
 #define BLOCK_64_COUNT 7
+
+#if CONFIG_UNEVEN_4WAY
+#define BLOCK_32_COUNT 31
+#define BLOCK_16_COUNT 63
+#define BLOCK_8_COUNT 64
+#else
 #define BLOCK_32_COUNT 15
 #define BLOCK_16_COUNT 31
-#define BLOCK_8_COUNT 127
-#define BLOCK_4_COUNT 128
+#define BLOCK_8_COUNT 63
+#endif  // CONFIG_UNEVEN_4WAY
+
+#define BLOCK_4_COUNT 64
+
 #else
 #define BLOCK_128_COUNT 1
 #define BLOCK_64_COUNT 3
+
+#if CONFIG_UNEVEN_4WAY
+#define BLOCK_32_COUNT 15
+#define BLOCK_16_COUNT 31
+#define BLOCK_8_COUNT 32
+#else
 #define BLOCK_32_COUNT 7
 #define BLOCK_16_COUNT 15
 #define BLOCK_8_COUNT 31
+#endif  // CONFIG_UNEVEN_4WAY
+
 #define BLOCK_4_COUNT 32
 #endif  // CONFIG_BLOCK_256
 
@@ -768,6 +826,15 @@
   /*! Cost for sending do_ext_partition token. */
   int do_ext_partition_cost[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS]
                            [PARTITION_CONTEXTS][2];
+#if CONFIG_UNEVEN_4WAY
+  /*! Cost for sending do_uneven_4way_partition token. */
+  int do_uneven_4way_partition_cost[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS]
+                                   [PARTITION_CONTEXTS][2];
+  /*! Cost for sending uneven_4way_partition_type token. */
+  int uneven_4way_partition_type_cost[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS]
+                                     [PARTITION_CONTEXTS]
+                                     [NUM_UNEVEN_4WAY_PARTS];
+#endif  // CONFIG_UNEVEN_4WAY
   //! Cost for coding the partition.
   int partition_cost[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS]
                     [ALL_PARTITION_TYPES];
@@ -799,7 +866,11 @@
                       [2 * MAX_ANGLE_DELTA + 1];
 
   //! mrl_index_cost
+#if CONFIG_EXT_DIR
+  int mrl_index_cost[MRL_INDEX_CONTEXTS][MRL_LINE_NUMBER];
+#else
   int mrl_index_cost[MRL_LINE_NUMBER];
+#endif  // CONFIG_EXT_DIR
   //! Cost of signaling the forward skip coding mode
   int fsc_cost[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS][FSC_MODES];
 #if CONFIG_IMPROVED_CFL
@@ -834,12 +905,12 @@
 #else
   int intrabc_cost[2];
 #endif  // CONFIG_NEW_CONTEXT_MODELING
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   //! intrabc_mode_cost
   int intrabc_mode_cost[2];
   //! intrabc_drl_idx_cost
   int intrabc_drl_idx_cost[MAX_REF_BV_STACK_SIZE - 1][2];
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
   //! palette_y_size_cost
   int palette_y_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
@@ -855,12 +926,12 @@
   int palette_y_mode_cost[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2];
   //! palette_uv_mode_cost
   int palette_uv_mode_cost[PALETTE_UV_MODE_CONTEXTS][2];
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   //! palette_y_row_flag_cost
   int palette_y_row_flag_cost[PALETTE_ROW_FLAG_CONTEXTS][2];
   //! palette_uv_row_flag_cost
   int palette_uv_row_flag_cost[PALETTE_ROW_FLAG_CONTEXTS][2];
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
   /**@}*/
 
   /*****************************************************************************
@@ -887,10 +958,10 @@
   int pb_block_mv_precision_costs[MV_PREC_DOWN_CONTEXTS][FLEX_MV_COSTS_SIZE]
                                  [NUM_MV_PRECISIONS];
 #endif
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   //! skip_drl_mode_cost
   int skip_drl_mode_cost[3][2];
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
   /**@}*/
 
   /*****************************************************************************
@@ -924,11 +995,11 @@
    ****************************************************************************/
   /**@{*/
   //! intra_inter_cost
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   int intra_inter_cost[INTRA_INTER_SKIP_TXFM_CONTEXTS][INTRA_INTER_CONTEXTS][2];
 #else
   int intra_inter_cost[INTRA_INTER_CONTEXTS][2];
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   //! inter_compound_mode_cost
 #if CONFIG_OPTFLOW_REFINEMENT
   /*! use_optflow_cost */
@@ -941,6 +1012,10 @@
   int inter_compound_mode_cost[INTER_COMPOUND_MODE_CONTEXTS]
                               [INTER_COMPOUND_MODES];
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_CWP
+  //! cwp_idx_cost for compound weighted prediction
+  int cwp_idx_cost[MAX_CWP_CONTEXTS][MAX_CWP_NUM - 1][2];
+#endif  // CONFIG_CWP
 #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
   //! jmvd_scale_mode_cost for JOINT_NEWMV
   int jmvd_scale_mode_cost[JOINT_NEWMV_SCALE_FACTOR_CNT];
@@ -997,11 +1072,20 @@
   int warped_causal_warpmv_cost[BLOCK_SIZES_ALL][2];
 #endif  // CONFIG_WARPMV
 
+#if CONFIG_REFINEMV
+  //! refinemv_flag_cost
+  int refinemv_flag_cost[NUM_REFINEMV_CTX][REFINEMV_NUM_MODES];
+#endif  // CONFIG_REFINEMV
+
   //! warp_delta_param_cost
   int warp_delta_param_cost[2][WARP_DELTA_NUM_SYMBOLS];
 #if CONFIG_WARP_REF_LIST
   //! warp_ref_idx_cost
   int warp_ref_idx_cost[3][WARP_REF_CONTEXTS][2];
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  //! warpmv_with_mvd_flag_cost
+  int warpmv_with_mvd_flag_cost[BLOCK_SIZES_ALL][2];
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
   //! warp_extend_cost
   int warp_extend_cost[WARP_EXTEND_CTXS1][WARP_EXTEND_CTXS2][2];
@@ -1045,7 +1129,12 @@
   int txfm_partition_cost[TXFM_PARTITION_CONTEXTS][2];
 #endif  // CONFIG_NEW_TX_PARTITION
   //! inter_tx_type_costs
+#if CONFIG_ATC_DCTX_ALIGNED
+  int inter_tx_type_costs[EXT_TX_SETS_INTER][EOB_TX_CTXS][EXT_TX_SIZES]
+                         [TX_TYPES];
+#else
   int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   //! intra_tx_type_costs
   int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
                          [TX_TYPES];
@@ -1144,7 +1233,7 @@
   int *amvd_nmv_cost[2];
 #endif  // CONFIG_ADAPTIVE_MVD
 
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   /*! Costs for coding the zero components of dv cost. */
   int *dv_joint_cost;
 
@@ -1226,7 +1315,7 @@
 } IntraBCMvCosts;
 #endif
 
-#if CONFIG_BVCOST_UPDATE && !CONFIG_FLEX_MVRES
+#if CONFIG_IBC_BV_IMPROVEMENT && !CONFIG_FLEX_MVRES
 /*! \brief Holds mv costs for intrabc.
  */
 typedef struct {
@@ -1395,7 +1484,7 @@
   //! multipliers for motion search.
 #if CONFIG_FLEX_MVRES
   IntraBCMvCosts dv_costs;
-#elif CONFIG_BVCOST_UPDATE
+#elif CONFIG_IBC_BV_IMPROVEMENT
   IntraBCMVCosts dv_costs;
 #endif
 
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c
index 9b7c1dc..5cf2f22 100644
--- a/av1/encoder/compound_type.c
+++ b/av1/encoder/compound_type.c
@@ -38,6 +38,10 @@
   // Check if interp filter matches with previous case
   if (st->interp_fltr != mi->interp_fltr) return 0;
 
+#if CONFIG_CWP
+  if (st->cwp_idx != mi->cwp_idx) return 0;
+#endif  // CONFIG_CWP
+
   const MACROBLOCKD *const xd = &x->e_mbd;
   // Match MV and reference indices
   for (int i = 0; i < 2; ++i) {
@@ -87,6 +91,10 @@
                                         int32_t *comp_model_rate,
                                         int64_t *comp_model_dist, int *comp_rs2,
                                         int *match_index) {
+#if CONFIG_CWP
+  if (mbmi->cwp_idx != CWP_EQUAL) return 0;
+#endif  // CONFIG_CWP
+
   for (int j = 0; j < x->comp_rd_stats_idx; ++j) {
     if (is_comp_rd_match(cpi, x, &x->comp_rd_stats[j], mbmi, comp_rate,
                          comp_dist, comp_model_rate, comp_model_dist,
@@ -904,19 +912,23 @@
     COMPOUND_TYPE *valid_comp_types) {
   int valid_type_count = 0;
   int comp_type, valid_check;
-#if CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_OPTFLOW_REFINEMENT || CONFIG_REFINEMV
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = xd->mi[0];
   const PREDICTION_MODE this_mode = mbmi->mode;
   // For implementation simplicity, set compound type to COMPOUND_AVERAGE for
   // now to avoid compound type RD search. In practice, dist_wtd will always
   // be applied instead.
-  if (this_mode >= NEAR_NEARMV_OPTFLOW) {
+  if (this_mode >= NEAR_NEARMV_OPTFLOW
+#if CONFIG_REFINEMV
+      || (mbmi->refinemv_flag && switchable_refinemv_flag(&cpi->common, mbmi))
+#endif  // CONFIG_REFINEMV
+  ) {
     *try_average_and_distwtd_comp = 0;
     valid_comp_types[0] = COMPOUND_AVERAGE;
     return 1;
   }
-#endif  // CONFIG_OPTFLOW_REFINEMENT
+#endif  // CONFIG_OPTFLOW_REFINEMENT || CONFIG_REFINEMV
   int8_t enable_masked_type[MASKED_COMPOUND_TYPES] = { 0, 0 };
 
   const int try_average_comp = (mode_search_mask & (1 << COMPOUND_AVERAGE));
@@ -976,6 +988,9 @@
                                                  COMPOUND_TYPE cur_type) {
   mbmi->interinter_comp.type = cur_type;
   mbmi->comp_group_idx = (cur_type >= COMPOUND_WEDGE);
+#if CONFIG_CWP
+  mbmi->cwp_idx = (cur_type == COMPOUND_AVERAGE) ? mbmi->cwp_idx : CWP_EQUAL;
+#endif  // CONFIG_CWP
 }
 
 // When match is found, populate the compound type data
@@ -1010,6 +1025,9 @@
   best_type_stats->comp_best_model_rd = comp_model_rd_cur;
   best_type_stats->best_compound_data = mbmi->interinter_comp;
   best_type_stats->best_compmode_interinter_cost = rs2;
+#if CONFIG_CWP
+  best_type_stats->cwp_idx = mbmi->cwp_idx;
+#endif  // CONFIG_CWP
 }
 
 // Updates best_mv for masked compound types
@@ -1034,6 +1052,9 @@
     MACROBLOCK *x, const MB_MODE_INFO *const mbmi, const int32_t *comp_rate,
     const int64_t *comp_dist, const int32_t *comp_model_rate,
     const int64_t *comp_model_dist, const int_mv *cur_mv, const int *comp_rs2) {
+#if CONFIG_CWP
+  if (mbmi->cwp_idx != CWP_EQUAL) return;
+#endif  // CONFIG_CWP
   const int offset = x->comp_rd_stats_idx;
   if (offset < MAX_COMP_RD_STATS) {
     COMP_RD_STATS *const rd_stats = x->comp_rd_stats + offset;
@@ -1046,7 +1067,15 @@
     memcpy(rd_stats->ref_frames, mbmi->ref_frame, sizeof(rd_stats->ref_frames));
     rd_stats->mode = mbmi->mode;
     rd_stats->interp_fltr = mbmi->interp_fltr;
+#if CONFIG_SEP_COMP_DRL
+    rd_stats->ref_mv_idx[0] = mbmi->ref_mv_idx[0];
+    rd_stats->ref_mv_idx[1] = mbmi->ref_mv_idx[1];
+#else
     rd_stats->ref_mv_idx = mbmi->ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
+#if CONFIG_CWP
+    rd_stats->cwp_idx = mbmi->cwp_idx;
+#endif  // CONFIG_CWP
     const MACROBLOCKD *const xd = &x->e_mbd;
     for (int i = 0; i < 2; ++i) {
       const WarpedMotionParams *const wm =
@@ -1324,6 +1353,10 @@
   best_type_stats.best_compmode_interinter_cost = 0;
   best_type_stats.comp_best_model_rd = INT64_MAX;
 
+#if CONFIG_CWP
+  best_type_stats.cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
+
   int tmp_rate_mv;
   const int num_pix = 1 << num_pels_log2_lookup[bsize];
   const int mask_len = 2 * num_pix * sizeof(uint8_t);
@@ -1369,6 +1402,11 @@
     av1_zero_array(masked_type_cost, COMPOUND_TYPES);
   else
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+      if (mbmi->refinemv_flag && switchable_refinemv_flag(cm, mbmi))
+    av1_zero_array(masked_type_cost, COMPOUND_TYPES);
+  else
+#endif  // CONFIG_REFINEMV
     // Populates masked_type_cost local array for the 4 compound types
     calc_masked_type_cost(&x->mode_costs, bsize, comp_group_idx_ctx,
                           masked_compound_used, masked_type_cost);
@@ -1384,6 +1422,9 @@
 #if CONFIG_OPTFLOW_REFINEMENT
       this_mode < NEAR_NEARMV_OPTFLOW &&
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+      (!mbmi->refinemv_flag || !switchable_refinemv_flag(cm, mbmi)) &&
+#endif  // CONFIG_REFINEMV
       cpi->sf.inter_sf.reuse_compound_type_decision) {
     return populate_reuse_comp_type_data(x, mbmi, &best_type_stats, cur_mv,
                                          comp_rate, comp_dist, comp_rs2,
@@ -1396,6 +1437,7 @@
   // Loop over valid compound types
   for (int i = 0; i < valid_type_count; i++) {
     cur_type = valid_comp_types[i];
+
     comp_model_rd_cur = INT64_MAX;
     tmp_rate_mv = *rate_mv;
     best_rd_cur = INT64_MAX;
@@ -1404,6 +1446,13 @@
     if (cur_type < COMPOUND_WEDGE) {
       update_mbmi_for_compound_type(mbmi, cur_type);
       rs2 = masked_type_cost[cur_type];
+
+#if CONFIG_CWP
+      if (cm->features.enable_cwp && is_cwp_allowed(mbmi) && !mbmi->skip_mode) {
+        rs2 += av1_get_cwp_idx_cost(mbmi->cwp_idx, cm, x);
+      }
+#endif  // CONFIG_CWP
+
       const int64_t mode_rd = RDCOST(x->rdmult, rs2 + rd_stats->rate, 0);
       if (mode_rd < ref_best_rd) {
         // Reuse data if matching record is found
@@ -1506,6 +1555,11 @@
     mbmi->interinter_comp = best_type_stats.best_compound_data;
     memcpy(xd->seg_mask, buffers->tmp_best_mask_buf, mask_len);
   }
+#if CONFIG_CWP
+  // update best cwp_idx
+  mbmi->cwp_idx = best_type_stats.cwp_idx;
+#endif  // CONFIG_CWP
+
   if (have_newmv_in_inter_mode(this_mode)) {
     mbmi->mv[0].as_int = best_mv[0].as_int;
     mbmi->mv[1].as_int = best_mv[1].as_int;
diff --git a/av1/encoder/compound_type.h b/av1/encoder/compound_type.h
index 7543aa0..eb66e4f 100644
--- a/av1/encoder/compound_type.h
+++ b/av1/encoder/compound_type.h
@@ -25,6 +25,10 @@
   INTERINTER_COMPOUND_DATA best_compound_data;
   int64_t comp_best_model_rd;
   int best_compmode_interinter_cost;
+#if CONFIG_CWP
+  // Index for the weighting factor of compound weighted prediction
+  int8_t cwp_idx;
+#endif  // CONFIG_CWP
 } BEST_COMP_TYPE_STATS;
 
 #define IGNORE_MODE -1
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c
index bce1223..a057bc7 100644
--- a/av1/encoder/context_tree.c
+++ b/av1/encoder/context_tree.c
@@ -31,6 +31,7 @@
   dst_ctx->mbmi_ext_best = src_ctx->mbmi_ext_best;
 
   dst_ctx->num_4x4_blk = src_ctx->num_4x4_blk;
+  dst_ctx->num_4x4_blk_chroma = src_ctx->num_4x4_blk_chroma;
   dst_ctx->skippable = src_ctx->skippable;
 
   memcpy(dst_ctx->blk_skip, src_ctx->blk_skip,
@@ -52,8 +53,11 @@
   const int num_pix = src_ctx->num_4x4_blk * 16;
   if (num_pix <= MAX_PALETTE_SQUARE) {
     for (int i = 0; i < 2; ++i) {
+      const int num_blk =
+          (i == 0) ? src_ctx->num_4x4_blk : src_ctx->num_4x4_blk_chroma;
+      const int color_map_size = num_blk * 16;
       memcpy(dst_ctx->color_index_map[i], src_ctx->color_index_map[i],
-             sizeof(src_ctx->color_index_map[i][0]) * num_pix);
+             sizeof(src_ctx->color_index_map[i][0]) * color_map_size);
     }
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
@@ -83,8 +87,9 @@
   }
 }
 
-PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, int mi_row, int mi_col,
-                                 BLOCK_SIZE bsize, PC_TREE *parent,
+PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, TREE_TYPE tree_type,
+                                 int mi_row, int mi_col, BLOCK_SIZE bsize,
+                                 PC_TREE *parent,
                                  PARTITION_TYPE parent_partition, int index,
                                  int subsampling_x, int subsampling_y,
                                  PC_TREE_SHARED_BUFFERS *shared_bufs) {
@@ -95,7 +100,8 @@
   ctx->rd_mode_is_ready = 0;
   ctx->parent = parent;
   ctx->index = index;
-  set_chroma_ref_info(mi_row, mi_col, index, bsize, &ctx->chroma_ref_info,
+  set_chroma_ref_info(tree_type, mi_row, mi_col, index, bsize,
+                      &ctx->chroma_ref_info,
                       parent ? &parent->chroma_ref_info : NULL,
                       parent ? parent->block_size : BLOCK_INVALID,
                       parent_partition, subsampling_x, subsampling_y);
@@ -105,6 +111,19 @@
   const int num_pix = block_size_wide[bsize] * block_size_high[bsize];
   const int num_blk = num_pix / 16;
 
+#if CONFIG_UNEVEN_4WAY
+  // Biggest chroma block covering multiple luma blocks is of size 8X16 / 16X8,
+  // when a 16X32 / 32X16 block uses a HORZ / VERTICAL 4A/4B partition.
+  const int num_pix_chroma = AOMMAX(num_pix, 16 * 8);
+#else
+  // Biggest chroma block covering multiple luma blocks is of size 8X8,
+  // when a 16X16 block uses a HORZ_3 / VERTICAL_3 partition.
+  // However, we don't explicitly need to allocate that minimum, because palette
+  // is only allowed for bsize >= BLOCK_8X8, and all these block sizes have at
+  // least 64 pixels.
+  const int num_pix_chroma = num_pix;
+#endif  // CONFIG_UNEVEN_4WAY
+
   AOM_CHECK_MEM_ERROR(&error, ctx->blk_skip,
                       aom_calloc(num_blk, sizeof(*ctx->blk_skip)));
   AOM_CHECK_MEM_ERROR(&error, ctx->tx_type_map,
@@ -119,6 +138,7 @@
                       aom_calloc(num_blk, sizeof(*ctx->cctx_type_map)));
 #endif  // CONFIG_CROSS_CHROMA_TX
   ctx->num_4x4_blk = num_blk;
+  ctx->num_4x4_blk_chroma = num_pix_chroma / 16;
 
   for (int i = 0; i < num_planes; ++i) {
     ctx->coeff[i] = shared_bufs->coeff_buf[i];
@@ -126,6 +146,10 @@
     ctx->dqcoeff[i] = shared_bufs->dqcoeff_buf[i];
     AOM_CHECK_MEM_ERROR(&error, ctx->eobs[i],
                         aom_memalign(32, num_blk * sizeof(*ctx->eobs[i])));
+#if CONFIG_ATC_DCTX_ALIGNED
+    AOM_CHECK_MEM_ERROR(&error, ctx->bobs[i],
+                        aom_memalign(32, num_blk * sizeof(*ctx->bobs[i])));
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     AOM_CHECK_MEM_ERROR(
         &error, ctx->txb_entropy_ctx[i],
         aom_memalign(32, num_blk * sizeof(*ctx->txb_entropy_ctx[i])));
@@ -133,9 +157,10 @@
 
   if (num_pix <= MAX_PALETTE_SQUARE) {
     for (int i = 0; i < 2; ++i) {
+      const int color_map_size = (i == 0) ? num_pix : num_pix_chroma;
       AOM_CHECK_MEM_ERROR(
           &error, ctx->color_index_map[i],
-          aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i])));
+          aom_memalign(32, color_map_size * sizeof(*ctx->color_index_map[i])));
     }
   }
   av1_invalid_rd_stats(&ctx->rd_stats);
@@ -162,6 +187,10 @@
     ctx->dqcoeff[i] = NULL;
     aom_free(ctx->eobs[i]);
     ctx->eobs[i] = NULL;
+#if CONFIG_ATC_DCTX_ALIGNED
+    aom_free(ctx->bobs[i]);
+    ctx->bobs[i] = NULL;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     aom_free(ctx->txb_entropy_ctx[i]);
     ctx->txb_entropy_ctx[i] = NULL;
   }
@@ -174,8 +203,8 @@
   aom_free(ctx);
 }
 
-PC_TREE *av1_alloc_pc_tree_node(int mi_row, int mi_col, BLOCK_SIZE bsize,
-                                PC_TREE *parent,
+PC_TREE *av1_alloc_pc_tree_node(TREE_TYPE tree_type, int mi_row, int mi_col,
+                                BLOCK_SIZE bsize, PC_TREE *parent,
                                 PARTITION_TYPE parent_partition, int index,
                                 int is_last, int subsampling_x,
                                 int subsampling_y) {
@@ -196,7 +225,8 @@
   av1_invalid_rd_stats(&pc_tree->none_rd);
   pc_tree->skippable = false;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
-  set_chroma_ref_info(mi_row, mi_col, index, bsize, &pc_tree->chroma_ref_info,
+  set_chroma_ref_info(tree_type, mi_row, mi_col, index, bsize,
+                      &pc_tree->chroma_ref_info,
                       parent ? &parent->chroma_ref_info : NULL,
                       parent ? parent->block_size : BLOCK_INVALID,
                       parent_partition, subsampling_x, subsampling_y);
@@ -207,19 +237,20 @@
     pc_tree->vertical[i] = NULL;
   }
 #if CONFIG_EXT_RECUR_PARTITIONS
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+  for (int i = 0; i < 4; ++i) {
+    pc_tree->horizontal4a[i] = NULL;
+    pc_tree->horizontal4b[i] = NULL;
+    pc_tree->vertical4a[i] = NULL;
+    pc_tree->vertical4b[i] = NULL;
+  }
+#endif  // CONFIG_UNEVEN_4WAY
   for (int i = 0; i < 4; ++i) {
     pc_tree->horizontal3[i] = NULL;
     pc_tree->vertical3[i] = NULL;
   }
 #else
   for (int i = 0; i < 3; ++i) {
-    pc_tree->horizontal3[i] = NULL;
-    pc_tree->vertical3[i] = NULL;
-  }
-#endif  // CONFIG_H_PARTITION
-#else
-  for (int i = 0; i < 3; ++i) {
     pc_tree->horizontala[i] = NULL;
     pc_tree->horizontalb[i] = NULL;
     pc_tree->verticala[i] = NULL;
@@ -272,11 +303,45 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
   }
 #if CONFIG_EXT_RECUR_PARTITIONS
-#if CONFIG_H_PARTITION
+
+#if CONFIG_UNEVEN_4WAY
+  if (!keep_best || (partition != PARTITION_HORZ_4A)) {
+    for (int i = 0; i < 4; ++i) {
+      if (pc_tree->horizontal4a[i] != NULL) {
+        av1_free_pc_tree_recursive(pc_tree->horizontal4a[i], num_planes, 0, 0);
+        pc_tree->horizontal4a[i] = NULL;
+      }
+    }
+  }
+
+  if (!keep_best || (partition != PARTITION_HORZ_4B)) {
+    for (int i = 0; i < 4; ++i) {
+      if (pc_tree->horizontal4b[i] != NULL) {
+        av1_free_pc_tree_recursive(pc_tree->horizontal4b[i], num_planes, 0, 0);
+        pc_tree->horizontal4b[i] = NULL;
+      }
+    }
+  }
+
+  if (!keep_best || (partition != PARTITION_VERT_4A)) {
+    for (int i = 0; i < 4; ++i) {
+      if (pc_tree->vertical4a[i] != NULL) {
+        av1_free_pc_tree_recursive(pc_tree->vertical4a[i], num_planes, 0, 0);
+        pc_tree->vertical4a[i] = NULL;
+      }
+    }
+  }
+
+  if (!keep_best || (partition != PARTITION_VERT_4B)) {
+    for (int i = 0; i < 4; ++i) {
+      if (pc_tree->vertical4b[i] != NULL) {
+        av1_free_pc_tree_recursive(pc_tree->vertical4b[i], num_planes, 0, 0);
+        pc_tree->vertical4b[i] = NULL;
+      }
+    }
+  }
+#endif  // CONFIG_UNEVEN_4WAY
   for (int i = 0; i < 4; ++i) {
-#else
-  for (int i = 0; i < 3; ++i) {
-#endif  // CONFIG_H_PARTITION
     if ((!keep_best || (partition != PARTITION_HORZ_3)) &&
         pc_tree->horizontal3[i] != NULL) {
       av1_free_pc_tree_recursive(pc_tree->horizontal3[i], num_planes, 0, 0);
@@ -323,13 +388,20 @@
 void av1_copy_pc_tree_recursive(const AV1_COMMON *cm, PC_TREE *dst,
                                 PC_TREE *src, int ss_x, int ss_y,
                                 PC_TREE_SHARED_BUFFERS *shared_bufs,
-                                int num_planes) {
+                                TREE_TYPE tree_type, int num_planes) {
   // Copy the best partition type. For basic information like bsize and index,
   // we assume they have been set properly when initializing the dst PC_TREE
   dst->partitioning = src->partitioning;
   dst->rd_cost = src->rd_cost;
   dst->none_rd = src->none_rd;
   dst->skippable = src->skippable;
+#if CONFIG_MVP_IMPROVEMENT
+  dst->ref_mv_bank = src->ref_mv_bank;
+#endif  // CONFIG_MVP_IMPROVEMENT
+#if WARP_CU_BANK
+  dst->warp_param_bank = src->warp_param_bank;
+#endif  // WARP_CU_BANK
+
   const BLOCK_SIZE bsize = dst->block_size;
   const BLOCK_SIZE subsize = get_partition_subsize(bsize, src->partitioning);
   const int mi_row = src->mi_row;
@@ -341,7 +413,7 @@
       if (dst->none) av1_free_pmc(dst->none, num_planes);
       dst->none = NULL;
       if (src->none) {
-        dst->none = av1_alloc_pmc(cm, mi_row, mi_col, bsize, dst,
+        dst->none = av1_alloc_pmc(cm, tree_type, mi_row, mi_col, bsize, dst,
                                   PARTITION_NONE, 0, ss_x, ss_y, shared_bufs);
         av1_copy_tree_context(dst->none, src->none);
       }
@@ -358,10 +430,11 @@
             const int x_idx = (i & 1) * (mi_size_wide[bsize] >> 1);
             const int y_idx = (i >> 1) * (mi_size_high[bsize] >> 1);
             dst->split[i] = av1_alloc_pc_tree_node(
-                mi_row + y_idx, mi_col + x_idx, subsize, dst, PARTITION_SPLIT,
-                i, i == 3, ss_x, ss_y);
+                tree_type, mi_row + y_idx, mi_col + x_idx, subsize, dst,
+                PARTITION_SPLIT, i, i == 3, ss_x, ss_y);
             av1_copy_pc_tree_recursive(cm, dst->split[i], src->split[i], ss_x,
-                                       ss_y, shared_bufs, num_planes);
+                                       ss_y, shared_bufs, tree_type,
+                                       num_planes);
           }
         }
       }
@@ -376,12 +449,12 @@
           }
           if (src->horizontal[i]) {
             const int this_mi_row = mi_row + i * (mi_size_high[bsize] >> 1);
-            dst->horizontal[i] =
-                av1_alloc_pc_tree_node(this_mi_row, mi_col, subsize, dst,
-                                       PARTITION_HORZ, i, i == 1, ss_x, ss_y);
+            dst->horizontal[i] = av1_alloc_pc_tree_node(
+                tree_type, this_mi_row, mi_col, subsize, dst, PARTITION_HORZ, i,
+                i == 1, ss_x, ss_y);
             av1_copy_pc_tree_recursive(cm, dst->horizontal[i],
                                        src->horizontal[i], ss_x, ss_y,
-                                       shared_bufs, num_planes);
+                                       shared_bufs, tree_type, num_planes);
           }
         }
       }
@@ -396,16 +469,135 @@
           }
           if (src->vertical[i]) {
             const int this_mi_col = mi_col + i * (mi_size_wide[bsize] >> 1);
-            dst->vertical[i] =
-                av1_alloc_pc_tree_node(mi_row, this_mi_col, subsize, dst,
-                                       PARTITION_VERT, i, i == 1, ss_x, ss_y);
+            dst->vertical[i] = av1_alloc_pc_tree_node(
+                tree_type, mi_row, this_mi_col, subsize, dst, PARTITION_VERT, i,
+                i == 1, ss_x, ss_y);
             av1_copy_pc_tree_recursive(cm, dst->vertical[i], src->vertical[i],
-                                       ss_x, ss_y, shared_bufs, num_planes);
+                                       ss_x, ss_y, shared_bufs, tree_type,
+                                       num_planes);
           }
         }
       }
       break;
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+    // PARTITION_HORZ_4A
+    case PARTITION_HORZ_4A:
+      if (is_partition_valid(bsize, PARTITION_HORZ_4A)) {
+        const int ebh = (mi_size_high[bsize] >> 3);
+        const int mi_rows[4] = { mi_row, mi_row + ebh, mi_row + ebh * 3,
+                                 mi_row + ebh * 7 };
+        const BLOCK_SIZE bsize_big =
+            get_partition_subsize(bsize, PARTITION_HORZ);
+        const BLOCK_SIZE bsize_med =
+            get_partition_subsize(bsize_big, PARTITION_HORZ);
+        assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+        const BLOCK_SIZE subsizes[4] = { subsize, bsize_med, bsize_big,
+                                         subsize };
+        for (int i = 0; i < 4; ++i) {
+          if (dst->horizontal4a[i]) {
+            av1_free_pc_tree_recursive(dst->horizontal4a[i], num_planes, 0, 0);
+            dst->horizontal4a[i] = NULL;
+          }
+          if (src->horizontal4a[i]) {
+            dst->horizontal4a[i] = av1_alloc_pc_tree_node(
+                tree_type, mi_rows[i], mi_col, subsizes[i], dst,
+                PARTITION_HORZ_4A, i, i == 3, ss_x, ss_y);
+            av1_copy_pc_tree_recursive(cm, dst->horizontal4a[i],
+                                       src->horizontal4a[i], ss_x, ss_y,
+                                       shared_bufs, tree_type, num_planes);
+          }
+        }
+      }
+      break;
+    // PARTITION_HORZ_4B
+    case PARTITION_HORZ_4B:
+      if (is_partition_valid(bsize, PARTITION_HORZ_4B)) {
+        const int ebh = (mi_size_high[bsize] >> 3);
+        const int mi_rows[4] = { mi_row, mi_row + ebh, mi_row + ebh * 5,
+                                 mi_row + ebh * 7 };
+        const BLOCK_SIZE bsize_big =
+            get_partition_subsize(bsize, PARTITION_HORZ);
+        const BLOCK_SIZE bsize_med =
+            get_partition_subsize(bsize_big, PARTITION_HORZ);
+        assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+        const BLOCK_SIZE subsizes[4] = { subsize, bsize_big, bsize_med,
+                                         subsize };
+        for (int i = 0; i < 4; ++i) {
+          if (dst->horizontal4b[i]) {
+            av1_free_pc_tree_recursive(dst->horizontal4b[i], num_planes, 0, 0);
+            dst->horizontal4b[i] = NULL;
+          }
+          if (src->horizontal4b[i]) {
+            dst->horizontal4b[i] = av1_alloc_pc_tree_node(
+                tree_type, mi_rows[i], mi_col, subsizes[i], dst,
+                PARTITION_HORZ_4B, i, i == 3, ss_x, ss_y);
+            av1_copy_pc_tree_recursive(cm, dst->horizontal4b[i],
+                                       src->horizontal4b[i], ss_x, ss_y,
+                                       shared_bufs, tree_type, num_planes);
+          }
+        }
+      }
+      break;
+    // PARTITION_VERT_4A
+    case PARTITION_VERT_4A:
+      if (is_partition_valid(bsize, PARTITION_VERT_4A)) {
+        const int ebw = (mi_size_wide[bsize] >> 3);
+        const int mi_cols[4] = { mi_col, mi_col + ebw, mi_col + ebw * 3,
+                                 mi_col + ebw * 7 };
+        const BLOCK_SIZE bsize_big =
+            get_partition_subsize(bsize, PARTITION_VERT);
+        const BLOCK_SIZE bsize_med =
+            get_partition_subsize(bsize_big, PARTITION_VERT);
+        assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+        const BLOCK_SIZE subsizes[4] = { subsize, bsize_med, bsize_big,
+                                         subsize };
+        for (int i = 0; i < 4; ++i) {
+          if (dst->vertical4a[i]) {
+            av1_free_pc_tree_recursive(dst->vertical4a[i], num_planes, 0, 0);
+            dst->vertical4a[i] = NULL;
+          }
+          if (src->vertical4a[i]) {
+            dst->vertical4a[i] = av1_alloc_pc_tree_node(
+                tree_type, mi_row, mi_cols[i], subsizes[i], dst,
+                PARTITION_VERT_4A, i, i == 3, ss_x, ss_y);
+            av1_copy_pc_tree_recursive(cm, dst->vertical4a[i],
+                                       src->vertical4a[i], ss_x, ss_y,
+                                       shared_bufs, tree_type, num_planes);
+          }
+        }
+      }
+      break;
+    // PARTITION_VERT_4B
+    case PARTITION_VERT_4B:
+      if (is_partition_valid(bsize, PARTITION_VERT_4B)) {
+        const int ebw = (mi_size_wide[bsize] >> 3);
+        const int mi_cols[4] = { mi_col, mi_col + ebw, mi_col + ebw * 5,
+                                 mi_col + ebw * 7 };
+        const BLOCK_SIZE bsize_big =
+            get_partition_subsize(bsize, PARTITION_VERT);
+        const BLOCK_SIZE bsize_med =
+            get_partition_subsize(bsize_big, PARTITION_VERT);
+        assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+        const BLOCK_SIZE subsizes[4] = { subsize, bsize_big, bsize_med,
+                                         subsize };
+        for (int i = 0; i < 4; ++i) {
+          if (dst->vertical4b[i]) {
+            av1_free_pc_tree_recursive(dst->vertical4b[i], num_planes, 0, 0);
+            dst->vertical4b[i] = NULL;
+          }
+          if (src->vertical4b[i]) {
+            dst->vertical4b[i] = av1_alloc_pc_tree_node(
+                tree_type, mi_row, mi_cols[i], subsizes[i], dst,
+                PARTITION_VERT_4B, i, i == 3, ss_x, ss_y);
+            av1_copy_pc_tree_recursive(cm, dst->vertical4b[i],
+                                       src->vertical4b[i], ss_x, ss_y,
+                                       shared_bufs, tree_type, num_planes);
+          }
+        }
+      }
+      break;
+#endif  // CONFIG_UNEVEN_4WAY
+
     // PARTITION_HORZ_3
     case PARTITION_HORZ_3:
       if (is_partition_valid(bsize, PARTITION_HORZ_3)) {
@@ -423,11 +615,11 @@
           }
           if (src->horizontal3[i]) {
             dst->horizontal3[i] = av1_alloc_pc_tree_node(
-                mi_row + offset_mr, mi_col + offset_mc, this_subsize, dst,
-                PARTITION_HORZ_3, i, i == 3, ss_x, ss_y);
+                tree_type, mi_row + offset_mr, mi_col + offset_mc, this_subsize,
+                dst, PARTITION_HORZ_3, i, i == 3, ss_x, ss_y);
             av1_copy_pc_tree_recursive(cm, dst->horizontal3[i],
                                        src->horizontal3[i], ss_x, ss_y,
-                                       shared_bufs, num_planes);
+                                       shared_bufs, tree_type, num_planes);
           }
         }
       }
@@ -449,65 +641,15 @@
           }
           if (src->vertical3[i]) {
             dst->vertical3[i] = av1_alloc_pc_tree_node(
-                mi_row + offset_mr, mi_col + offset_mc, this_subsize, dst,
-                PARTITION_VERT_3, i, i == 3, ss_x, ss_y);
+                tree_type, mi_row + offset_mr, mi_col + offset_mc, this_subsize,
+                dst, PARTITION_VERT_3, i, i == 3, ss_x, ss_y);
             av1_copy_pc_tree_recursive(cm, dst->vertical3[i], src->vertical3[i],
-                                       ss_x, ss_y, shared_bufs, num_planes);
+                                       ss_x, ss_y, shared_bufs, tree_type,
+                                       num_planes);
           }
         }
       }
       break;
-#else
-    // PARTITION_HORZ_3
-    case PARTITION_HORZ_3:
-      if (is_partition_valid(bsize, PARTITION_HORZ_3)) {
-        const int mi_rows[3] = { mi_row, mi_row + (mi_size_high[bsize] >> 2),
-                                 mi_row + (mi_size_high[bsize] >> 2) * 3 };
-        const BLOCK_SIZE subsizes[3] = {
-          subsize, get_partition_subsize(bsize, PARTITION_HORZ), subsize
-        };
-
-        for (int i = 0; i < 3; ++i) {
-          if (dst->horizontal3[i]) {
-            av1_free_pc_tree_recursive(dst->horizontal3[i], num_planes, 0, 0);
-            dst->horizontal3[i] = NULL;
-          }
-          if (src->horizontal3[i]) {
-            dst->horizontal3[i] =
-                av1_alloc_pc_tree_node(mi_rows[i], mi_col, subsizes[i], dst,
-                                       PARTITION_HORZ_3, i, i == 2, ss_x, ss_y);
-            av1_copy_pc_tree_recursive(cm, dst->horizontal3[i],
-                                       src->horizontal3[i], ss_x, ss_y,
-                                       shared_bufs, num_planes);
-          }
-        }
-      }
-      break;
-    // PARTITION_VERT_3
-    case PARTITION_VERT_3:
-      if (is_partition_valid(bsize, PARTITION_VERT_3)) {
-        const int mi_cols[3] = { mi_col, mi_col + (mi_size_wide[bsize] >> 2),
-                                 mi_col + (mi_size_wide[bsize] >> 2) * 3 };
-        const BLOCK_SIZE subsizes[3] = {
-          subsize, get_partition_subsize(bsize, PARTITION_VERT), subsize
-        };
-
-        for (int i = 0; i < 3; ++i) {
-          if (dst->vertical3[i]) {
-            av1_free_pc_tree_recursive(dst->vertical3[i], num_planes, 0, 0);
-            dst->vertical3[i] = NULL;
-          }
-          if (src->vertical3[i]) {
-            dst->vertical3[i] =
-                av1_alloc_pc_tree_node(mi_row, mi_cols[i], subsizes[i], dst,
-                                       PARTITION_VERT_3, i, i == 2, ss_x, ss_y);
-            av1_copy_pc_tree_recursive(cm, dst->vertical3[i], src->vertical3[i],
-                                       ss_x, ss_y, shared_bufs, num_planes);
-          }
-        }
-      }
-      break;
-#endif  // CONFIG_H_PARTITION
     default: assert(0 && "Not a valid partition."); break;
   }
 }
@@ -647,11 +789,20 @@
   if (result) return result;
   result = look_for_counterpart_helper(pc_tree->vertical[0], target);
   if (result) return result;
+#if CONFIG_UNEVEN_4WAY
+  result = look_for_counterpart_helper(pc_tree->horizontal4a[0], target);
+  if (result) return result;
+  result = look_for_counterpart_helper(pc_tree->horizontal4b[0], target);
+  if (result) return result;
+  result = look_for_counterpart_helper(pc_tree->vertical4a[0], target);
+  if (result) return result;
+  result = look_for_counterpart_helper(pc_tree->vertical4b[0], target);
+  if (result) return result;
+#endif  // CONFIG_UNEVEN_4WAY
   result = look_for_counterpart_helper(pc_tree->horizontal3[0], target);
   if (result) return result;
   result = look_for_counterpart_helper(pc_tree->vertical3[0], target);
   if (result) return result;
-
   return NULL;
 }
 
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h
index 280b3e9..d99cb98 100644
--- a/av1/encoder/context_tree.h
+++ b/av1/encoder/context_tree.h
@@ -46,6 +46,9 @@
   tran_low_t *qcoeff[MAX_MB_PLANE];
   tran_low_t *dqcoeff[MAX_MB_PLANE];
   uint16_t *eobs[MAX_MB_PLANE];
+#if CONFIG_ATC_DCTX_ALIGNED
+  uint16_t *bobs[MAX_MB_PLANE];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   uint8_t *txb_entropy_ctx[MAX_MB_PLANE];
   TX_TYPE *tx_type_map;
 #if CONFIG_CROSS_CHROMA_TX
@@ -53,6 +56,7 @@
 #endif  // CONFIG_CROSS_CHROMA_TX
 
   int num_4x4_blk;
+  int num_4x4_blk_chroma;
   // For current partition, only if all Y, U, and V transform blocks'
   // coefficients are quantized to 0, skippable is set to 1.
   int skippable;
@@ -76,14 +80,15 @@
 #if CONFIG_EXT_RECUR_PARTITIONS
   struct PC_TREE *horizontal[2];
   struct PC_TREE *vertical[2];
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+  struct PC_TREE *horizontal4a[4];
+  struct PC_TREE *horizontal4b[4];
+  struct PC_TREE *vertical4a[4];
+  struct PC_TREE *vertical4b[4];
+#endif  // CONFIG_UNEVEN_4WAY
   struct PC_TREE *horizontal3[4];
   struct PC_TREE *vertical3[4];
 #else
-  struct PC_TREE *horizontal3[3];
-  struct PC_TREE *vertical3[3];
-#endif  // CONFIG_H_PARTITION
-#else
   PICK_MODE_CONTEXT *horizontal[2];
   PICK_MODE_CONTEXT *vertical[2];
   PICK_MODE_CONTEXT *horizontala[3];
@@ -104,9 +109,9 @@
 #if CONFIG_EXT_RECUR_PARTITIONS
   RD_STATS none_rd;
   bool skippable;
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   REF_MV_BANK ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if WARP_CU_BANK
   WARP_PARAM_BANK warp_param_bank;
 #endif  // WARP_CU_BANK
@@ -134,8 +139,8 @@
                                    PC_TREE_SHARED_BUFFERS *shared_bufs);
 void av1_free_shared_coeff_buffer(PC_TREE_SHARED_BUFFERS *shared_bufs);
 
-PC_TREE *av1_alloc_pc_tree_node(int mi_row, int mi_col, BLOCK_SIZE bsize,
-                                PC_TREE *parent,
+PC_TREE *av1_alloc_pc_tree_node(TREE_TYPE tree_type, int mi_row, int mi_col,
+                                BLOCK_SIZE bsize, PC_TREE *parent,
                                 PARTITION_TYPE parent_partition, int index,
                                 int is_last, int subsampling_x,
                                 int subsampling_y);
@@ -145,11 +150,12 @@
 void av1_copy_pc_tree_recursive(const AV1_COMMON *cm, PC_TREE *dst,
                                 PC_TREE *src, int ss_x, int ss_y,
                                 PC_TREE_SHARED_BUFFERS *shared_bufs,
-                                int num_planes);
+                                TREE_TYPE tree_type, int num_planes);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
-PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, int mi_row, int mi_col,
-                                 BLOCK_SIZE bsize, PC_TREE *parent,
+PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, TREE_TYPE tree_type,
+                                 int mi_row, int mi_col, BLOCK_SIZE bsize,
+                                 PC_TREE *parent,
                                  PARTITION_TYPE parent_partition, int index,
                                  int subsampling_x, int subsampling_y,
                                  PC_TREE_SHARED_BUFFERS *shared_bufs);
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c
index a931f84..06809d7 100644
--- a/av1/encoder/encode_strategy.c
+++ b/av1/encoder/encode_strategy.c
@@ -863,6 +863,12 @@
                                         source_buffer->metadata);
     }
   }
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // when enable_frame_output_order == 1, show_existing mechanism is
+  // used for alt_ref in encoder side internally, but the OBU with
+  // show_existing_frame == 1 is not signaled in the bitstream.
+  if (cm->seq_params.enable_frame_output_order) show_existing_alt_ref = 1;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   set_show_existing_alt_ref(&cpi->gf_group, apply_filtering,
                             oxcf->algo_cfg.enable_overlay,
                             show_existing_alt_ref);
@@ -1105,6 +1111,9 @@
         AOMMIN(cm->seq_params.num_same_ref_compound,
                cm->ref_frames_info.num_total_refs);
 #endif  // CONFIG_ALLOW_SAME_REF_COMPOUND
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+    cm->cur_frame->num_ref_frames = cm->ref_frames_info.num_total_refs;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 
     // ref_frame_flags is defined based on the external flag
     // max-reference-frames.
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c
index e1dca4f..ecd0ee6 100644
--- a/av1/encoder/encodeframe.c
+++ b/av1/encoder/encodeframe.c
@@ -572,8 +572,9 @@
                              : (loop_idx == 0 ? LUMA_PART : CHROMA_PART));
     init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col,
                       1);
-    PC_TREE *const pc_root = av1_alloc_pc_tree_node(
-        mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y);
+    PC_TREE *const pc_root =
+        av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, sb_size, NULL,
+                               PARTITION_NONE, 0, 1, ss_x, ss_y);
 #if CONFIG_EXT_RECUR_PARTITIONS
     const PARTITION_TREE *template_tree =
         multi_pass_params ? multi_pass_params->template_tree : NULL;
@@ -615,9 +616,9 @@
   // First pass
   SB_FIRST_PASS_STATS sb_fp_stats;
   av1_backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   REF_MV_BANK stored_mv_bank = td->mb.e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if WARP_CU_BANK
   WARP_PARAM_BANK stored_warp_bank = td->mb.e_mbd.warp_param_bank;
 #endif  // WARP_CU_BANK
@@ -632,9 +633,9 @@
   av1_reset_simple_motion_tree_partition(sms_root, sb_size);
 
   av1_restore_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col);
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   td->mb.e_mbd.ref_mv_bank = stored_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if WARP_CU_BANK
   td->mb.e_mbd.warp_param_bank = stored_warp_bank;
 #endif  // WARP_CU_BANK
@@ -666,13 +667,14 @@
     case PARTITION_NONE: num_subtrees = 0; break;
     case PARTITION_HORZ:
     case PARTITION_VERT: num_subtrees = 2; break;
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A:
+    case PARTITION_HORZ_4B:
+    case PARTITION_VERT_4A:
+    case PARTITION_VERT_4B: num_subtrees = 4; break;
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_HORZ_3:
     case PARTITION_VERT_3: num_subtrees = 4; break;
-#else
-    case PARTITION_HORZ_3:
-    case PARTITION_VERT_3: num_subtrees = 3; break;
-#endif  // CONFIG_H_PARTITION
     case PARTITION_SPLIT: num_subtrees = 4; break;
     default:
       assert(0 && "Invalid partition type in set_min_none_to_invalid!");
@@ -797,8 +799,9 @@
           cm, xd->tree_type, mi_row, mi_col, bsize,
           xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)]);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
-      PC_TREE *const pc_root = av1_alloc_pc_tree_node(
-          mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y);
+      PC_TREE *const pc_root =
+          av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, sb_size, NULL,
+                                 PARTITION_NONE, 0, 1, ss_x, ss_y);
       av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
                            &dummy_rate, &dummy_dist, 1,
 #if CONFIG_EXT_RECUR_PARTITIONS
@@ -825,8 +828,9 @@
                                : (loop_idx == 0 ? LUMA_PART : CHROMA_PART));
       init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row,
                         mi_col, 1);
-      PC_TREE *const pc_root = av1_alloc_pc_tree_node(
-          mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y);
+      PC_TREE *const pc_root =
+          av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, sb_size, NULL,
+                                 PARTITION_NONE, 0, 1, ss_x, ss_y);
 #if CONFIG_EXT_RECUR_PARTITIONS
       av1_reset_ptree_in_sbi(xd->sbi, xd->tree_type);
       av1_build_partition_tree_fixed_partitioning(
@@ -1139,7 +1143,7 @@
        mi_row += cm->mib_size) {
 #if CONFIG_REF_MV_BANK
     av1_zero(td->mb.e_mbd.ref_mv_bank);
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
     td->mb.e_mbd.ref_mv_bank_pt = &td->mb.e_mbd.ref_mv_bank;
 #endif
 #endif  // CONFIG_REF_MV_BANK
@@ -1237,8 +1241,13 @@
       get_ref_frame_buf(cm, skip_mode_info->ref_frame_idx_1);
   assert(buf_0 != NULL && buf_1 != NULL);
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  ref_order_hint[0] = buf_0->display_order_hint;
+  ref_order_hint[1] = buf_1->display_order_hint;
+#else
   ref_order_hint[0] = buf_0->order_hint;
   ref_order_hint[1] = buf_1->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
 }
 
 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
@@ -1247,9 +1256,13 @@
   av1_setup_skip_mode_allowed(cm);
   if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
 
-  // Turn off skip mode if the temporal distances of the reference pair to the
-  // current frame are different by more than 1 frame.
+    // Turn off skip mode if the temporal distances of the reference pair to the
+    // current frame are different by more than 1 frame.
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int cur_offset = (int)cm->current_frame.display_order_hint;
+#else
   const int cur_offset = (int)cm->current_frame.order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   int ref_offset[2];
   get_skip_mode_ref_offsets(cm, ref_offset);
   const int cur_to_ref0 = get_relative_dist(&cm->seq_params.order_hint_info,
@@ -1291,6 +1304,19 @@
 #endif  // CONFIG_OPTFLOW_REFINEMENT
     uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params,
     int *src_stride) {
+
+#if CONFIG_REFINEMV
+  if (inter_pred_params->use_ref_padding) {
+    tip_common_calc_subpel_params_and_extend(
+        src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+        use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+        mc_buf, pre, subpel_params, src_stride);
+    return;
+  }
+#endif  // CONFIG_REFINEMV
+
   // These are part of the function signature to use this function through a
   // function pointer. See typedef of 'CalcSubpelParamsFunc'.
   (void)xd;
@@ -1338,11 +1364,29 @@
     subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
     subpel_params->xs = sf->x_step_q4;
     subpel_params->ys = sf->y_step_q4;
+
+#if CONFIG_D071_IMP_MSK_BLD
+    if (inter_pred_params->border_data.enable_bacp) {
+      // Get reference block top left coordinate.
+      subpel_params->x0 = pos_x >> SCALE_SUBPEL_BITS;
+      subpel_params->y0 = pos_y >> SCALE_SUBPEL_BITS;
+      // Get reference block bottom right coordinate.
+      subpel_params->x1 = subpel_params->x0 + inter_pred_params->block_width;
+      subpel_params->y1 = subpel_params->y0 + inter_pred_params->block_height;
+    }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
     *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
            (pos_x >> SCALE_SUBPEL_BITS);
   } else {
     int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
     int pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+
+#if CONFIG_REFINEMV
+    const int bw = inter_pred_params->original_pu_width;
+    const int bh = inter_pred_params->original_pu_height;
+
+#else
 #if CONFIG_OPTFLOW_REFINEMENT
     // Use original block size to clamp MV and to extend block boundary
     const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width
@@ -1353,6 +1397,8 @@
     const int bw = inter_pred_params->block_width;
     const int bh = inter_pred_params->block_height;
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#endif  // CONFIG_REFINEMV
     const MV mv_q4 = tip_clamp_mv_to_umv_border_sb(
         inter_pred_params, src_mv, bw, bh,
 #if CONFIG_OPTFLOW_REFINEMENT
@@ -1365,6 +1411,18 @@
     subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
     pos_x += mv_q4.col;
     pos_y += mv_q4.row;
+
+#if CONFIG_D071_IMP_MSK_BLD
+    if (inter_pred_params->border_data.enable_bacp) {
+      // Get reference block top left coordinate.
+      subpel_params->x0 = pos_x >> SUBPEL_BITS;
+      subpel_params->y0 = pos_y >> SUBPEL_BITS;
+      // Get reference block bottom right coordinate.
+      subpel_params->x1 = subpel_params->x0 + inter_pred_params->block_width;
+      subpel_params->y1 = subpel_params->y0 + inter_pred_params->block_height;
+    }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
     *pre = pre_buf->buf0 + (pos_y >> SUBPEL_BITS) * pre_buf->stride +
            (pos_x >> SUBPEL_BITS);
   }
@@ -1479,6 +1537,19 @@
   }
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  features->allow_warpmv_mode =
+      (features->enabled_motion_modes & (1 << WARP_DELTA)) != 0;
+  if (features->allow_warpmv_mode &&
+      cpi->sf.inter_sf.prune_warpmv_prob_thresh > 0) {
+    const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+    if (frame_probs->warped_probs[update_type] <
+        cpi->sf.inter_sf.prune_warpmv_prob_thresh) {
+      features->allow_warpmv_mode = 0;
+    }
+  }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
   int hash_table_created = 0;
   if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi)) {
     // TODO(any): move this outside of the recoding loop to avoid recalculating
@@ -1643,10 +1714,10 @@
   start_timing(cpi, av1_setup_motion_field_time);
 #endif
   if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm);
-#if CONFIG_SMVP_IMPROVEMENT
+#if CONFIG_MVP_IMPROVEMENT
   else
     av1_setup_ref_frame_sides(cm);
-#endif  // CONFIG_SMVP_IMPROVEMENT
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_COLLECT_COMPONENT_TIMING
   end_timing(cpi, av1_setup_motion_field_time);
 #endif
@@ -1766,6 +1837,17 @@
         (frame_probs->warped_probs[update_type] + new_prob) >> 1;
   }
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  if (cpi->sf.inter_sf.prune_warpmv_prob_thresh > 0) {
+    const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+    int sum = 0;
+    for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
+    const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
+    frame_probs->warped_probs[update_type] =
+        (frame_probs->warped_probs[update_type] + new_prob) >> 1;
+  }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
   if ((!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi)) ||
       hash_table_created) {
     av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c
index e7589aa..4258a28 100644
--- a/av1/encoder/encodeframe_utils.c
+++ b/av1/encoder/encodeframe_utils.c
@@ -196,13 +196,42 @@
 // MB_MODE_INFO_EXT_FRAME to MB_MODE_INFO_EXT.
 static INLINE void copy_mbmi_ext_frame_to_mbmi_ext(
     MB_MODE_INFO_EXT *mbmi_ext,
-    const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_best, uint8_t ref_frame_type) {
+    const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_best, uint8_t ref_frame_type
+#if CONFIG_SEP_COMP_DRL
+    ,
+    PREDICTION_MODE this_mode
+#endif  // CONFIG_SEP_COMP_DRL
+) {
+#if CONFIG_SEP_COMP_DRL
+  MV_REFERENCE_FRAME rf[2];
+  av1_set_ref_frame(rf, ref_frame_type);
+  if (has_second_drl_by_mode(this_mode, rf)) {
+    memcpy(mbmi_ext->ref_mv_stack[rf[0]], mbmi_ext_best->ref_mv_stack[0],
+           sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
+    memcpy(mbmi_ext->weight[rf[0]], mbmi_ext_best->weight[0],
+           sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
+    mbmi_ext->ref_mv_count[rf[0]] = mbmi_ext_best->ref_mv_count[0];
+    memcpy(mbmi_ext->ref_mv_stack[rf[1]], mbmi_ext_best->ref_mv_stack[1],
+           sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
+    memcpy(mbmi_ext->weight[rf[1]], mbmi_ext_best->weight[1],
+           sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
+    mbmi_ext->ref_mv_count[rf[1]] = mbmi_ext_best->ref_mv_count[1];
+  } else {
+    memcpy(mbmi_ext->ref_mv_stack[ref_frame_type],
+           mbmi_ext_best->ref_mv_stack[0],
+           sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
+    memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight[0],
+           sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
+    mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count[0];
+  }
+#else
   memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack,
          sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
   memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight,
          sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
-  mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context;
   mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count;
+#endif  // CONFIG_SEP_COMP_DRL
+  mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context;
   memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs,
          sizeof(mbmi_ext->global_mvs));
 
@@ -240,12 +269,18 @@
   assert(mi->sb_type[xd->tree_type == CHROMA_PART] == bsize);
 
   *mi_addr = *mi;
+  mi_addr->chroma_ref_info = ctx->chroma_ref_info;
 #if CONFIG_C071_SUBBLK_WARPMV
   if (is_warp_mode(mi->motion_mode)) update_submi(xd, cm, ctx->submic, bsize);
 #endif  // CONFIG_C071_SUBBLK_WARPMV
   if (xd->tree_type != CHROMA_PART)
     copy_mbmi_ext_frame_to_mbmi_ext(x->mbmi_ext, &ctx->mbmi_ext_best,
-                                    av1_ref_frame_type(ctx->mic.ref_frame));
+                                    av1_ref_frame_type(ctx->mic.ref_frame)
+#if CONFIG_SEP_COMP_DRL
+                                        ,
+                                    ctx->mic.mode
+#endif  // CONFIG_SEP_COMP_DRL
+    );
 
   memcpy(txfm_info->blk_skip, ctx->blk_skip,
          sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
@@ -344,6 +379,9 @@
     p[i].qcoeff = ctx->qcoeff[i];
     p[i].dqcoeff = ctx->dqcoeff[i];
     p[i].eobs = ctx->eobs[i];
+#if CONFIG_ATC_DCTX_ALIGNED
+    p[i].bobs = ctx->bobs[i];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
   }
   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
@@ -367,6 +405,12 @@
           xd->mi[x_idx + y * mis]->cfl_alpha_signs = mi_addr->cfl_alpha_signs;
           xd->mi[x_idx + y * mis]->cfl_alpha_idx = mi_addr->cfl_alpha_idx;
           xd->mi[x_idx + y * mis]->partition = mi_addr->partition;
+#if CONFIG_EXT_RECUR_PARTITIONS
+          xd->mi[x_idx + y * mis]->chroma_mi_row_start =
+              mi_addr->chroma_mi_row_start;
+          xd->mi[x_idx + y * mis]->chroma_mi_col_start =
+              mi_addr->chroma_mi_col_start;
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
           xd->mi[x_idx + y * mis]
               ->palette_mode_info.palette_size[PLANE_TYPE_UV] =
               mi_addr->palette_mode_info.palette_size[PLANE_TYPE_UV];
@@ -386,12 +430,6 @@
   if (dry_run) return;
 
   if (mi_addr->ref_frame[0] != INTRA_FRAME) {
-    if (is_inter_block(mi_addr, xd->tree_type)) {
-      // TODO(sarahparker): global motion stats need to be handled per-tile
-      // to be compatible with tile-based threading.
-      update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc);
-    }
-
     if (cm->features.interp_filter == SWITCHABLE &&
         !is_warp_mode(mi_addr->motion_mode) &&
         !is_nontrans_global_motion(xd, xd->mi[0])) {
@@ -561,10 +599,18 @@
                    intraonly);
 #endif  // CONFIG_AIMC
     if (cm->seq_params.enable_mrls && av1_is_directional_mode(mbmi->mode)) {
+#if CONFIG_EXT_DIR
+      int mrl_ctx = get_mrl_index_ctx(xd->neighbors[0], xd->neighbors[1]);
+      update_cdf(fc->mrl_index_cdf[mrl_ctx], mbmi->mrl_index, MRL_LINE_NUMBER);
+#if CONFIG_ENTROPY_STATS
+      ++counts->mrl_index[mrl_ctx][mbmi->mrl_index];
+#endif  // CONFIG_ENTROPY_STATS
+#else
+      update_cdf(fc->mrl_index_cdf, mbmi->mrl_index, MRL_LINE_NUMBER);
 #if CONFIG_ENTROPY_STATS
       ++counts->mrl_index[mbmi->mrl_index];
-#endif
-      update_cdf(fc->mrl_index_cdf, mbmi->mrl_index, MRL_LINE_NUMBER);
+#endif  // CONFIG_ENTROPY_STATS
+#endif  // CONFIG_EXT_DIR
     }
     if (av1_filter_intra_allowed(cm, mbmi)) {
       const int use_filter_intra_mode =
@@ -1193,21 +1239,31 @@
   AVERAGE_CDF(ctx_left->v_dc_sign_cdf, ctx_tr->v_dc_sign_cdf, 2);
   AVERAGE_CDF(ctx_left->v_ac_sign_cdf, ctx_tr->v_ac_sign_cdf, 2);
 #endif  // CONFIG_CONTEXT_DERIVATION
-  AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, 5);
-  AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, 6);
-  AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, 7);
-  AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, 8);
-  AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, 9);
-  AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, 10);
-  AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, 11);
+  AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16,
+              EOB_MAX_SYMS - 6);
+  AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32,
+              EOB_MAX_SYMS - 5);
+  AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64,
+              EOB_MAX_SYMS - 4);
+  AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128,
+              EOB_MAX_SYMS - 3);
+  AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256,
+              EOB_MAX_SYMS - 2);
+  AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512,
+              EOB_MAX_SYMS - 1);
+  AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024,
+              EOB_MAX_SYMS);
   AVERAGE_CDF(ctx_left->coeff_base_eob_cdf, ctx_tr->coeff_base_eob_cdf, 3);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC_DCTX_ALIGNED
+  AVERAGE_CDF(ctx_left->coeff_base_bob_cdf, ctx_tr->coeff_base_bob_cdf, 3);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+#if CONFIG_ATC
   AVERAGE_CDF(ctx_left->coeff_base_lf_cdf, ctx_tr->coeff_base_lf_cdf,
               LF_BASE_SYMBOLS);
   AVERAGE_CDF(ctx_left->coeff_base_lf_eob_cdf, ctx_tr->coeff_base_lf_eob_cdf,
               LF_BASE_SYMBOLS - 1);
   AVERAGE_CDF(ctx_left->coeff_br_lf_cdf, ctx_tr->coeff_br_lf_cdf, BR_CDF_SIZE);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   AVERAGE_CDF(ctx_left->coeff_base_cdf, ctx_tr->coeff_base_cdf, 4);
   AVERAGE_CDF(ctx_left->idtx_sign_cdf, ctx_tr->idtx_sign_cdf, 2);
   AVERAGE_CDF(ctx_left->coeff_base_cdf_idtx, ctx_tr->coeff_base_cdf_idtx, 4);
@@ -1221,12 +1277,17 @@
   AVERAGE_CDF(ctx_left->inter_warp_mode_cdf, ctx_tr->inter_warp_mode_cdf, 2);
 #endif  // CONFIG_WARPMV
 
+#if CONFIG_REFINEMV
+  AVERAGE_CDF(ctx_left->refinemv_flag_cdf, ctx_tr->refinemv_flag_cdf,
+              REFINEMV_NUM_MODES);
+#endif  // CONFIG_REFINEMV
+
   AVERAGE_CDF(ctx_left->drl_cdf[0], ctx_tr->drl_cdf[0], 2);
   AVERAGE_CDF(ctx_left->drl_cdf[1], ctx_tr->drl_cdf[1], 2);
   AVERAGE_CDF(ctx_left->drl_cdf[2], ctx_tr->drl_cdf[2], 2);
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   AVERAGE_CDF(ctx_left->skip_drl_cdf, ctx_tr->skip_drl_cdf, 2);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if CONFIG_OPTFLOW_REFINEMENT
   AVERAGE_CDF(ctx_left->inter_compound_mode_cdf,
               ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_REF_TYPES);
@@ -1234,6 +1295,9 @@
   AVERAGE_CDF(ctx_left->inter_compound_mode_cdf,
               ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_MODES);
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_CWP
+  AVERAGE_CDF(ctx_left->cwp_idx_cdf, ctx_tr->cwp_idx_cdf, 2);
+#endif  // CONFIG_CWP
 #if CONFIG_IMPROVED_JMVD
   AVERAGE_CDF(ctx_left->jmvd_scale_mode_cdf, ctx_tr->jmvd_scale_mode_cdf,
               JOINT_NEWMV_SCALE_FACTOR_CNT);
@@ -1273,6 +1337,10 @@
   AVERAGE_CDF(ctx_left->warp_ref_idx_cdf[0], ctx_tr->warp_ref_idx_cdf[0], 2);
   AVERAGE_CDF(ctx_left->warp_ref_idx_cdf[1], ctx_tr->warp_ref_idx_cdf[1], 2);
   AVERAGE_CDF(ctx_left->warp_ref_idx_cdf[2], ctx_tr->warp_ref_idx_cdf[2], 2);
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  AVERAGE_CDF(ctx_left->warpmv_with_mvd_flag_cdf,
+              ctx_tr->warpmv_with_mvd_flag_cdf, 2);
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
   AVERAGE_CDF(ctx_left->warp_extend_cdf, ctx_tr->warp_extend_cdf, 2);
 #else
@@ -1316,19 +1384,19 @@
   AVERAGE_CDF(ctx_left->comp_group_idx_cdf, ctx_tr->comp_group_idx_cdf, 2);
   AVERAGE_CDF(ctx_left->skip_mode_cdfs, ctx_tr->skip_mode_cdfs, 2);
   AVERAGE_CDF(ctx_left->skip_txfm_cdfs, ctx_tr->skip_txfm_cdfs, 2);
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   AVERAGE_CDF(ctx_left->intra_inter_cdf[0], ctx_tr->intra_inter_cdf[0], 2);
   AVERAGE_CDF(ctx_left->intra_inter_cdf[1], ctx_tr->intra_inter_cdf[1], 2);
 #else
   AVERAGE_CDF(ctx_left->intra_inter_cdf, ctx_tr->intra_inter_cdf, 2);
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   avg_nmv(&ctx_left->nmvc, &ctx_tr->nmvc, wt_left, wt_tr);
   avg_nmv(&ctx_left->ndvc, &ctx_tr->ndvc, wt_left, wt_tr);
   AVERAGE_CDF(ctx_left->intrabc_cdf, ctx_tr->intrabc_cdf, 2);
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   AVERAGE_CDF(ctx_left->intrabc_mode_cdf, ctx_tr->intrabc_mode_cdf, 2);
   AVERAGE_CDF(ctx_left->intrabc_drl_idx_cdf, ctx_tr->intrabc_drl_idx_cdf, 2);
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
   AVERAGE_CDF(ctx_left->seg.tree_cdf, ctx_tr->seg.tree_cdf, MAX_SEGMENTS);
   AVERAGE_CDF(ctx_left->seg.pred_cdf, ctx_tr->seg.pred_cdf, 2);
   AVERAGE_CDF(ctx_left->seg.spatial_pred_seg_cdf,
@@ -1383,6 +1451,7 @@
   AVG_CDF_STRIDE(ctx_left->uv_mode_cdf[0], ctx_tr->uv_mode_cdf[0],
                  UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES));
   AVERAGE_CDF(ctx_left->uv_mode_cdf[1], ctx_tr->uv_mode_cdf[1], UV_INTRA_MODES);
+
 #if CONFIG_EXT_RECUR_PARTITIONS
   for (int plane_index = 0; plane_index < PARTITION_STRUCTURE_NUM;
        plane_index++) {
@@ -1411,6 +1480,15 @@
       for (RECT_PART_TYPE rect = 0; rect < NUM_RECT_PARTS; rect++) {
         AVERAGE_CDF(ctx_left->do_ext_partition_cdf[plane_index][rect][i],
                     ctx_tr->do_ext_partition_cdf[plane_index][rect][i], 2);
+#if CONFIG_UNEVEN_4WAY
+        AVERAGE_CDF(
+            ctx_left->do_uneven_4way_partition_cdf[plane_index][rect][i],
+            ctx_tr->do_uneven_4way_partition_cdf[plane_index][rect][i], 2);
+        AVERAGE_CDF(
+            ctx_left->uneven_4way_partition_type_cdf[plane_index][rect][i],
+            ctx_tr->uneven_4way_partition_type_cdf[plane_index][rect][i],
+            NUM_UNEVEN_4WAY_PARTS);
+#endif  // CONFIG_UNEVEN_4WAY
       }
     }
   }
@@ -1466,10 +1544,10 @@
   }
   AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[1], ctx_tr->intra_ext_tx_cdf[1],
                  INTRA_TX_SET1, CDF_SIZE(TX_TYPES));
-#if !(CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET)
+#if !(CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET)
   AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[2], ctx_tr->intra_ext_tx_cdf[2],
                  INTRA_TX_SET2, CDF_SIZE(TX_TYPES));
-#endif  // !(CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET)
+#endif  // !(CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET)
   AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[1], ctx_tr->inter_ext_tx_cdf[1], 16,
                  CDF_SIZE(TX_TYPES));
   AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[2], ctx_tr->inter_ext_tx_cdf[2], 12,
@@ -1590,9 +1668,9 @@
   const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
   sb_fp_stats->current_qindex =
       cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex;
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   sb_fp_stats->ref_mv_bank = td->mb.e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if WARP_CU_BANK
   sb_fp_stats->warp_param_bank = td->mb.e_mbd.warp_param_bank;
 #endif  // WARP_CU_BANK
@@ -1626,9 +1704,9 @@
   const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col);
   cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex =
       sb_fp_stats->current_qindex;
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   x->e_mbd.ref_mv_bank = sb_fp_stats->ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if WARP_CU_BANK
   x->e_mbd.warp_param_bank = sb_fp_stats->warp_param_bank;
 #endif  // WARP_CU_BANK
@@ -1693,7 +1771,7 @@
 #else
                         cm->features.allow_high_precision_mv, &x->mv_costs);
 #endif
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
       if (cm->features.allow_intrabc) {
 #if CONFIG_FLEX_MVRES
         fill_dv_costs(&x->dv_costs, xd->tile_ctx, &x->mv_costs);
@@ -1701,7 +1779,7 @@
         av1_fill_dv_costs(xd->tile_ctx, &x->dv_costs);
 #endif
       }
-#endif  // CONFIG_BVCOST_UPDATE
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
       break;
     default: assert(0);
   }
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h
index 4da6144..3da084f 100644
--- a/av1/encoder/encodeframe_utils.h
+++ b/av1/encoder/encodeframe_utils.h
@@ -40,6 +40,19 @@
   TXFM_CONTEXT *p_tl;
   TXFM_CONTEXT ta[MAX_MIB_SIZE];
   TXFM_CONTEXT tl[MAX_MIB_SIZE];
+#if CONFIG_MVP_IMPROVEMENT
+  //! The current level bank, used to restore the level bank in MACROBLOCKD.
+  REF_MV_BANK curr_level_bank;
+  //! The best level bank from the rdopt process.
+  REF_MV_BANK best_level_bank;
+#endif  // CONFIG_MVP_IMPROVEMENT
+#if WARP_CU_BANK
+  //! The current warp, level bank, used to restore the warp level bank in
+  //! MACROBLOCKD.
+  WARP_PARAM_BANK curr_level_warp_bank;
+  //! The best warp level bank from the rdopt process.
+  WARP_PARAM_BANK best_level_warp_bank;
+#endif  // WARP_CU_BANK
 } RD_SEARCH_MACROBLOCK_CONTEXT;
 
 // This struct is used to store the statistics used by sb-level multi-pass
@@ -54,9 +67,9 @@
   InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL];
   int thresh_freq_fact[BLOCK_SIZES_ALL][MB_MODE_COUNT];
   int current_qindex;
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   REF_MV_BANK ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if WARP_CU_BANK
   WARP_PARAM_BANK warp_param_bank;
 #endif  // WARP_CU_BANK
@@ -99,10 +112,16 @@
   int bsize_at_least_8x8;
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
 
-  // Indicates edge blocks in frame.
+  // Indicates if at least half of the rows / cols of this block are within the
+  // frame.
   int has_rows;
   int has_cols;
 
+  // Indicates if at least 7/8th of the rows / cols of this block are within the
+  // frame. Used by HORZ/VERT_4A/4B partitions.
+  int has_7_8th_rows;
+  int has_7_8th_cols;
+
   // Block size of current partition.
   BLOCK_SIZE bsize;
 
@@ -130,14 +149,13 @@
   // RD cost summed across all blocks of partition type.
   RD_STATS sum_rdc;
 
+#if !CONFIG_EXT_RECUR_PARTITIONS
   // Array holding partition type cost.
   int tmp_partition_cost[PARTITION_TYPES];
-#if CONFIG_EXT_RECUR_PARTITIONS
-  int partition_cost_table[ALL_PARTITION_TYPES];
-#endif
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   // Pointer to partition cost buffer
-  int *partition_cost;
+  const int *partition_cost;
 
   // RD costs for different partition types.
   int64_t none_rd;
@@ -164,7 +182,24 @@
 #if !CONFIG_EXT_RECUR_PARTITIONS
   int do_square_split;
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
-  int prune_rect_part[NUM_RECT_PARTS];
+#if CONFIG_EXT_RECUR_PARTITIONS
+  bool prune_partition_none;
+  bool ext_partition_allowed;
+  bool partition_3_allowed[NUM_RECT_PARTS];
+  bool prune_partition_3[NUM_RECT_PARTS];
+#if CONFIG_UNEVEN_4WAY
+  bool partition_4a_allowed[NUM_RECT_PARTS];
+  bool partition_4b_allowed[NUM_RECT_PARTS];
+  bool prune_partition_4a[NUM_RECT_PARTS];
+  bool prune_partition_4b[NUM_RECT_PARTS];
+#endif  // CONFIG_UNEVEN_4WAY
+  PARTITION_TYPE forced_partition;
+  // Pointer to an array that traces out the current best partition boundary.
+  // Used by prune_part_h_with_partition_boundary and
+  // prune_part_4_with_partition_boundary.
+  bool *partition_boundaries;
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
+  bool prune_rect_part[NUM_RECT_PARTS];
   int is_block_splittable;
 
   // Chroma subsampling in x and y directions.
@@ -178,19 +213,6 @@
   bool found_best_partition;
 } PartitionSearchState;
 
-static AOM_INLINE void update_global_motion_used(PREDICTION_MODE mode,
-                                                 BLOCK_SIZE bsize,
-                                                 const MB_MODE_INFO *mbmi,
-                                                 RD_COUNTS *rdc) {
-  if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) {
-    const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize];
-    int ref;
-    for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
-      rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s;
-    }
-  }
-}
-
 #if CONFIG_WEDGE_MOD_EXT
 static AOM_INLINE void update_wedge_mode_cdf(FRAME_CONTEXT *fc,
                                              const BLOCK_SIZE bsize,
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c
index a78753b..33b8cc2 100644
--- a/av1/encoder/encodemb.c
+++ b/av1/encoder/encodemb.c
@@ -466,6 +466,32 @@
 }
 #endif  // CONFIG_CROSS_CHROMA_TX
 
+#if CONFIG_ATC_DCTX_ALIGNED
+// Finds and sets the first position (BOB) index.
+// To make sure the BOB value is statistically similar to EOB
+// for arithmetic coding efficiency performs a simple rotation.
+void set_bob(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
+             TX_TYPE tx_type) {
+  const struct macroblock_plane *const p = &x->plane[plane];
+  const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
+  const int block_offset = BLOCK_OFFSET(block);
+  tran_low_t *const qcoeff = p->qcoeff + block_offset;
+  uint16_t *const eob = &p->eobs[block];
+  uint16_t *const bob_ptr = &p->bobs[block];
+  int bob = 0;
+  for (int c = 0; c < *eob; ++c) {
+    const int pos = scan_order->scan[c];
+    const tran_low_t v = qcoeff[pos];
+    const tran_low_t level = abs(v);
+    if (level != 0) {
+      break;
+    }
+    bob++;
+  }
+  *bob_ptr = av1_get_max_eob(tx_size) - bob;
+}
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 void av1_quant(MACROBLOCK *x, int plane, int block, TxfmParam *txfm_param,
                QUANT_PARAM *qparam) {
   const struct macroblock_plane *const p = &x->plane[plane];
@@ -487,6 +513,10 @@
     }
   }
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  set_bob(x, plane, block, txfm_param->tx_size, txfm_param->tx_type);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 #if CONFIG_CONTEXT_DERIVATION
   MACROBLOCKD *const xd = &x->e_mbd;
   const int16_t *const scan = scan_order->scan;
@@ -687,10 +717,17 @@
     }
 #endif
 #if CONFIG_CROSS_CHROMA_TX
+#if CONFIG_ATC_DCTX_ALIGNED
+    const int skip_cctx = is_inter ? 0 : (p->eobs[block] == 1);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     // Since eob can be updated here, make sure cctx_type is always CCTX_NONE
     // when eob of U is 0.
     if (is_cctx_allowed(cm, xd) && plane == AOM_PLANE_U &&
+#if CONFIG_ATC_DCTX_ALIGNED
+        (p->eobs[block] == 0 || skip_cctx)) {
+#else
         p->eobs[block] == 0) {
+#endif  // CONFIG_ATC_DCTX_ALIGNED
       // In dry run, cctx type will not be referenced by neighboring blocks, so
       // there is no need to fill in the whole chroma region. In addition,
       // ctx->cctx_type_map size in dry run may not be aligned with actual
@@ -709,6 +746,9 @@
                         &p->eobs[block]);
 #endif  // CONFIG_CROSS_CHROMA_TX && CCTX_C2_DROPPED
     p->eobs[block] = 0;
+#if CONFIG_ATC_DCTX_ALIGNED
+    p->bobs[block] = 0;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     p->txb_entropy_ctx[block] = 0;
   }
 
@@ -733,7 +773,13 @@
              .buf[(blk_row * pd_c1->dst.stride + blk_col) << MI_SIZE_LOG2];
     int eob_c1 = p_c1->eobs[block];
     int eob_c2 = x->plane[AOM_PLANE_V].eobs[block];
+#if CONFIG_ATC_DCTX_ALIGNED
+    const int is_inter = is_inter_block(mbmi, xd->tree_type);
+    const int skip_cctx = is_inter ? 0 : (p->eobs[block] == 1);
+    recon_with_cctx = (eob_c1 || eob_c2) && !skip_cctx;
+#else
     recon_with_cctx = eob_c1 || eob_c2;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     max_chroma_eob = AOMMAX(eob_c1, eob_c2);
     if (recon_with_cctx) {
       av1_inv_cross_chroma_tx_block(dqcoeff_c1, dqcoeff, tx_size, cctx_type);
@@ -751,8 +797,14 @@
     av1_inverse_transform_block(
         xd, dqcoeff, plane, tx_type, tx_size, dst, pd->dst.stride,
 #if CONFIG_CROSS_CHROMA_TX
+#if CONFIG_ATC_DCTX_ALIGNED
+        (plane == 0 || !is_cctx_allowed(cm, xd) || !recon_with_cctx)
+            ? p->eobs[block]
+            : max_chroma_eob,
+#else
         (plane == 0 || !is_cctx_allowed(cm, xd)) ? p->eobs[block]
                                                  : max_chroma_eob,
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 #else
         p->eobs[block],
 #endif
@@ -816,11 +868,20 @@
           &pd_c1->dst
                .buf[(blk_row * pd_c1->dst.stride + blk_col) << MI_SIZE_LOG2];
       mismatch_record_block_tx(dst_c1, pd_c1->dst.stride,
-                               cm->current_frame.order_hint, AOM_PLANE_U,
-                               pixel_c, pixel_r, blk_w, blk_h);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                               cm->current_frame.display_order_hint,
+#else
+                               cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                               AOM_PLANE_U, pixel_c, pixel_r, blk_w, blk_h);
     }
 #endif  // CONFIG_CROSS_CHROMA_TX
-    mismatch_record_block_tx(dst, pd->dst.stride, cm->current_frame.order_hint,
+    mismatch_record_block_tx(dst, pd->dst.stride,
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             cm->current_frame.display_order_hint,
+#else
+                             cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
                              plane, pixel_c, pixel_r, blk_w, blk_h);
   }
 #endif  // CONFIG_MISMATCH_DEBUG
@@ -1136,6 +1197,9 @@
   tran_low_t *dqcoeff = p->dqcoeff + BLOCK_OFFSET(block);
   PLANE_TYPE plane_type = get_plane_type(plane);
   uint16_t *eob = &p->eobs[block];
+#if CONFIG_ATC_DCTX_ALIGNED
+  uint16_t *bob_code = &p->bobs[block];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   const int dst_stride = pd->dst.stride;
   uint16_t *dst =
       &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
@@ -1155,9 +1219,14 @@
       mi_to_pixel_loc(&pixel_c, &pixel_r, xd->mi_col, xd->mi_row, blk_col,
                       blk_row, pd->subsampling_x, pd->subsampling_y);
     }
-    mismatch_record_block_pre(
-        pd->dst.buf, pd->dst.stride, cm->current_frame.order_hint, plane,
-        pixel_c, pixel_r, tx_size_wide[tx_size], tx_size_high[tx_size]);
+    mismatch_record_block_pre(pd->dst.buf, pd->dst.stride,
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                              cm->current_frame.display_order_hint,
+#else
+                              cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                              plane, pixel_c, pixel_r, tx_size_wide[tx_size],
+                              tx_size_high[tx_size]);
   }
 #endif  // CONFIG_MISMATCH_DEBUG
 
@@ -1175,6 +1244,9 @@
   if (plane == 0 && is_blk_skip(x->txfm_search_info.blk_skip, plane,
                                 blk_row * bw + blk_col)) {
     *eob = 0;
+#if CONFIG_ATC_DCTX_ALIGNED
+    *bob_code = 0;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     p->txb_entropy_ctx[block] = 0;
 #if DEBUG_EXTQUANT
     if (args->dry_run == OUTPUT_ENABLED) {
@@ -1260,6 +1332,43 @@
       av1_dropout_qcoeff(x, plane, block, tx_size, tx_type,
                          cm->quant_params.base_qindex);
     }
+#if CONFIG_ATC_DCTX_ALIGNED
+    // make sure recon is correct at the encoder
+    if (*eob == 1 && tx_type != 0 && plane == 0) {
+      xd->tx_type_map[blk_row * xd->tx_type_map_stride + blk_col] = DCT_DCT;
+      tx_type = av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size,
+                                cm->features.reduced_tx_set_used);
+      av1_setup_xform(cm, x, plane, tx_size, tx_type,
+#if CONFIG_CROSS_CHROMA_TX
+                      CCTX_NONE,
+#endif  // CONFIG_CROSS_CHROMA_TX
+                      &txfm_param);
+      av1_setup_quant(tx_size, use_trellis, quant_idx,
+                      cpi->oxcf.q_cfg.quant_b_adapt, &quant_param);
+      av1_setup_qmatrix(&cm->quant_params, xd, plane, tx_size, tx_type,
+                        &quant_param);
+      av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
+                      &txfm_param, &quant_param);
+      if (quant_param.use_optimize_b && do_trellis) {
+        TXB_CTX txb_ctx;
+        get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx,
+                    mbmi->fsc_mode[xd->tree_type == CHROMA_PART]);
+        av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type,
+#if CONFIG_CROSS_CHROMA_TX
+                       CCTX_NONE,
+#endif  // CONFIG_CROSS_CHROMA_TX
+                       &txb_ctx, &dummy_rate_cost);
+      }
+      if (do_dropout && !fsc_mode
+#if CONFIG_PAR_HIDING
+          && !enable_parity_hiding
+#endif  // CONFIG_PAR_HIDING
+      ) {
+        av1_dropout_qcoeff(x, plane, block, tx_size, tx_type,
+                           cm->quant_params.base_qindex);
+      }
+    }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 #if CONFIG_PAR_HIDING
     if (!quant_param.use_optimize_b && enable_parity_hiding) {
       parity_hiding_trellis_off(cpi, x, plane, block, tx_size, tx_type);
@@ -1312,7 +1421,12 @@
       mi_to_pixel_loc(&pixel_c, &pixel_r, xd->mi_col, xd->mi_row, blk_col,
                       blk_row, pd->subsampling_x, pd->subsampling_y);
     }
-    mismatch_record_block_tx(dst, pd->dst.stride, cm->current_frame.order_hint,
+    mismatch_record_block_tx(dst, pd->dst.stride,
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             cm->current_frame.display_order_hint,
+#else
+                             cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
                              plane, pixel_c, pixel_r, blk_w, blk_h);
   }
 #endif  // CONFIG_MISMATCH_DEBUG
@@ -1323,11 +1437,7 @@
   if (plane == AOM_PLANE_Y && xd->cfl.store_y && xd->tree_type == SHARED_PART) {
 #if CONFIG_ADAPTIVE_DS_FILTER
     cfl_store_tx(xd, blk_row, blk_col, tx_size,
-#if DS_FRAME_LEVEL
-                 cm->features.ds_filter_type);
-#else
                  cm->seq_params.enable_cfl_ds_filter);
-#endif  // DS_FRAME_LEVEL
 #else
     cfl_store_tx(xd, blk_row, blk_col, tx_size);
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
@@ -1402,13 +1512,21 @@
                     xd->mi[0]->chroma_ref_info.mi_row_chroma_base, blk_col,
                     blk_row, pd_c1->subsampling_x, pd_c1->subsampling_y);
     mismatch_record_block_pre(pd_c1->dst.buf, pd_c1->dst.stride,
-                              cm->current_frame.order_hint, AOM_PLANE_U,
-                              pixel_c, pixel_r, tx_size_wide[tx_size],
-                              tx_size_high[tx_size]);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                              cm->current_frame.display_order_hint,
+#else
+                              cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                              AOM_PLANE_U, pixel_c, pixel_r,
+                              tx_size_wide[tx_size], tx_size_high[tx_size]);
     mismatch_record_block_pre(pd_c2->dst.buf, pd_c2->dst.stride,
-                              cm->current_frame.order_hint, AOM_PLANE_V,
-                              pixel_c, pixel_r, tx_size_wide[tx_size],
-                              tx_size_high[tx_size]);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                              cm->current_frame.display_order_hint,
+#else
+                              cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                              AOM_PLANE_V, pixel_c, pixel_r,
+                              tx_size_wide[tx_size], tx_size_high[tx_size]);
   }
 #endif  // CONFIG_MISMATCH_DEBUG
 
@@ -1447,6 +1565,14 @@
                            (INTRA_BLOCK_OPT_TYPE == DROPOUT_OPT ||
                             INTRA_BLOCK_OPT_TYPE == TRELLIS_DROPOUT_OPT));
   for (int plane = AOM_PLANE_U; plane <= AOM_PLANE_V; plane++) {
+#if CONFIG_ATC_DCTX_ALIGNED
+    int skip_cctx = !is_inter_block(xd->mi[0], xd->tree_type) && *eob_c1 == 1;
+    if (plane == AOM_PLANE_V && skip_cctx) {
+      update_cctx_array(xd, blk_row, blk_col, 0, 0,
+                        args->dry_run ? TX_4X4 : tx_size, CCTX_NONE);
+      cctx_type = av1_get_cctx_type(xd, blk_row, blk_col);
+    }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     // Since eob can be updated here, make sure cctx_type is always CCTX_NONE
     // when eob of U is 0.
     if (plane == AOM_PLANE_V && *eob_c1 == 0) {
@@ -1486,6 +1612,33 @@
       av1_dropout_qcoeff(x, plane, block, tx_size, tx_type,
                          cm->quant_params.base_qindex);
     }
+#if CONFIG_ATC_DCTX_ALIGNED
+    skip_cctx = !is_inter_block(xd->mi[0], xd->tree_type) && *eob_c1 == 1;
+    if (plane == AOM_PLANE_V && skip_cctx) {
+      update_cctx_array(xd, blk_row, blk_col, 0, 0,
+                        args->dry_run ? TX_4X4 : tx_size, CCTX_NONE);
+      cctx_type = av1_get_cctx_type(xd, blk_row, blk_col);
+      av1_setup_qmatrix(&cm->quant_params, xd, plane, tx_size, tx_type,
+                        &quant_param);
+      av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
+                      &txfm_param, &quant_param);
+      if (quant_param.use_optimize_b && do_trellis) {
+        const ENTROPY_CONTEXT *a =
+            &args->ta[blk_col + (plane - AOM_PLANE_U) * MAX_MIB_SIZE];
+        const ENTROPY_CONTEXT *l =
+            &args->tl[blk_row + (plane - AOM_PLANE_U) * MAX_MIB_SIZE];
+        TXB_CTX txb_ctx;
+        get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx,
+                    xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART]);
+        av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, cctx_type,
+                       &txb_ctx, &dummy_rate_cost);
+      }
+      if (do_dropout) {
+        av1_dropout_qcoeff(x, plane, block, tx_size, tx_type,
+                           cm->quant_params.base_qindex);
+      }
+    }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   }
 
   if (*eob_c1 || *eob_c2) {
@@ -1522,11 +1675,19 @@
                     xd->mi[0]->chroma_ref_info.mi_row_chroma_base, blk_col,
                     blk_row, pd_c1->subsampling_x, pd_c1->subsampling_y);
     mismatch_record_block_tx(dst_c1, pd_c1->dst.stride,
-                             cm->current_frame.order_hint, AOM_PLANE_U, pixel_c,
-                             pixel_r, blk_w, blk_h);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             cm->current_frame.display_order_hint,
+#else
+                             cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             AOM_PLANE_U, pixel_c, pixel_r, blk_w, blk_h);
     mismatch_record_block_tx(dst_c2, pd_c2->dst.stride,
-                             cm->current_frame.order_hint, AOM_PLANE_V, pixel_c,
-                             pixel_r, blk_w, blk_h);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             cm->current_frame.display_order_hint,
+#else
+                             cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                             AOM_PLANE_V, pixel_c, pixel_r, blk_w, blk_h);
   }
 #endif  // CONFIG_MISMATCH_DEBUG
 
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h
index 3d0a27c..f6be3e4 100644
--- a/av1/encoder/encodemb.h
+++ b/av1/encoder/encodemb.h
@@ -105,6 +105,12 @@
                                     CctxType cctx_type);
 #endif  // CONFIG_CROSS_CHROMA_TX
 
+#if CONFIG_ATC_DCTX_ALIGNED
+// This function sets the first position index in a TU.
+void set_bob(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
+             TX_TYPE tx_type);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 void av1_quant(MACROBLOCK *x, int plane, int block, TxfmParam *txfm_param,
                QUANT_PARAM *qparam);
 
diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c
index 369054d..6e7a524 100644
--- a/av1/encoder/encodemv.c
+++ b/av1/encoder/encodemv.c
@@ -875,15 +875,31 @@
 
     int_mv av1_get_ref_mv_from_stack(
         int ref_idx, const MV_REFERENCE_FRAME *ref_frame, int ref_mv_idx,
-        const MB_MODE_INFO_EXT *mbmi_ext) {
+        const MB_MODE_INFO_EXT *mbmi_ext
+#if CONFIG_SEP_COMP_DRL
+        ,
+        const MB_MODE_INFO *mbmi
+#endif  // CONFIG_SEP_COMP_DRL
+    ) {
       const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
+#if CONFIG_SEP_COMP_DRL
       const CANDIDATE_MV *curr_ref_mv_stack =
-          mbmi_ext->ref_mv_stack[ref_frame_type];
+          has_second_drl(mbmi) ? mbmi_ext->ref_mv_stack[ref_frame[ref_idx]]
+                               : mbmi_ext->ref_mv_stack[ref_frame_type];
+#else
+  const CANDIDATE_MV *curr_ref_mv_stack =
+      mbmi_ext->ref_mv_stack[ref_frame_type];
+#endif  // CONFIG_SEP_COMP_DRL
 
       if (is_inter_ref_frame(ref_frame[1])) {
         assert(ref_idx == 0 || ref_idx == 1);
-        return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv
-                       : curr_ref_mv_stack[ref_mv_idx].this_mv;
+#if CONFIG_SEP_COMP_DRL
+        return ref_idx && !has_second_drl(mbmi)
+                   ? curr_ref_mv_stack[ref_mv_idx].comp_mv
+#else
+    return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv
+#endif  // CONFIG_SEP_COMP_DRL
+                   : curr_ref_mv_stack[ref_mv_idx].this_mv;
       }
 
       assert(ref_idx == 0);
@@ -910,8 +926,14 @@
       if (have_nearmv_newmv_in_inter_mode(mbmi->mode)) {
         assert(has_second_ref(mbmi));
       }
-      return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
-                                       mbmi->ref_mv_idx, x->mbmi_ext);
+#if CONFIG_SEP_COMP_DRL
+      const int ref_mv_idx = get_ref_mv_idx(mbmi, ref_idx);
+      return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx,
+                                       x->mbmi_ext, mbmi);
+#else
+  return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, mbmi->ref_mv_idx,
+                                   x->mbmi_ext);
+#endif  // CONFIG_SEP_COMP_DRL
     }
 
 /**
@@ -927,6 +949,9 @@
  */
 #if CONFIG_FLEX_MVRES
     int_mv av1_find_best_ref_mv_from_stack(const MB_MODE_INFO_EXT *mbmi_ext,
+#if CONFIG_SEP_COMP_DRL
+                                           const MB_MODE_INFO *mbmi,
+#endif  // CONFIG_SEP_COMP_DRL
                                            MV_REFERENCE_FRAME ref_frame,
                                            MvSubpelPrecision precision) {
 #else
@@ -941,7 +966,11 @@
       int range =
           AOMMIN(mbmi_ext->ref_mv_count[ref_frame], MAX_REF_MV_STACK_SIZE);
       for (int i = 0; i < range; i++) {
-        mv = av1_get_ref_mv_from_stack(0, ref_frames, i, mbmi_ext);
+#if CONFIG_SEP_COMP_DRL
+        mv = av1_get_ref_mv_from_stack(0, ref_frames, i, mbmi_ext, mbmi);
+#else
+    mv = av1_get_ref_mv_from_stack(0, ref_frames, i, mbmi_ext);
+#endif  // CONFIG_SEP_COMP_DRL
         if (mv.as_int != 0 && mv.as_int != INVALID_MV) {
           found_ref_mv = true;
           break;
@@ -969,7 +998,16 @@
       int_mv mv;
       const int ref_idx = 0;
       MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
-      mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext);
+#if CONFIG_SEP_COMP_DRL
+      // this function is not called in this software.
+      MB_MODE_INFO mbmi;
+      mbmi.skip_mode = 0;
+      mbmi.mode = NEWMV;
+      mbmi.ref_frame[0] = ref_frame;
+      mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext, &mbmi);
+#else
+  mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext);
+#endif  // CONFIG_SEP_COMP_DRL
 #if CONFIG_FLEX_MVRES
       lower_mv_precision(&mv.as_mv, precision);
 #else
diff --git a/av1/encoder/encodemv.h b/av1/encoder/encodemv.h
index 39cdf7d..119107d 100644
--- a/av1/encoder/encodemv.h
+++ b/av1/encoder/encodemv.h
@@ -57,12 +57,20 @@
 int_mv av1_get_ref_mv_from_stack(int ref_idx,
                                  const MV_REFERENCE_FRAME *ref_frame,
                                  int ref_mv_idx,
-                                 const MB_MODE_INFO_EXT *mbmi_ext);
+                                 const MB_MODE_INFO_EXT *mbmi_ext
+#if CONFIG_SEP_COMP_DRL
+                                 ,
+                                 const MB_MODE_INFO *mbmi
+#endif  // CONFIG_SEP_COMP_DRL
+);
 #if CONFIG_FLEX_MVRES
 int_mv av1_find_first_ref_mv_from_stack(const MB_MODE_INFO_EXT *mbmi_ext,
                                         MV_REFERENCE_FRAME ref_frame,
                                         MvSubpelPrecision precision);
 int_mv av1_find_best_ref_mv_from_stack(const MB_MODE_INFO_EXT *mbmi_ext,
+#if CONFIG_SEP_COMP_DRL
+                                       const MB_MODE_INFO *mbmi,
+#endif  // CONFIG_SEP_COMP_DRL
                                        MV_REFERENCE_FRAME ref_frame,
                                        MvSubpelPrecision precision);
 #else
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c
index 543f671..378e9d6 100644
--- a/av1/encoder/encoder.c
+++ b/av1/encoder/encoder.c
@@ -204,6 +204,9 @@
   av1_init_me_luts();
   av1_rc_init_minq_luts();
   av1_init_wedge_masks();
+#if CONFIG_CWP
+  init_cwp_masks();
+#endif  // CONFIG_CWP
 }
 
 static void update_reference_segmentation_map(AV1_COMP *cpi) {
@@ -371,6 +374,10 @@
           ? DEFAULT_EXPLICIT_ORDER_HINT_BITS - 1
           : -1;
   seq->explicit_ref_frame_map = oxcf->ref_frm_cfg.explicit_ref_frame_map;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // Set 0 for multi-layer coding
+  seq->enable_frame_output_order = oxcf->ref_frm_cfg.enable_frame_output_order;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   seq->max_reference_frames = oxcf->ref_frm_cfg.max_reference_frames;
 #if CONFIG_ALLOW_SAME_REF_COMPOUND
   seq->num_same_ref_compound = SAME_REF_COMPOUND_PRUNE;
@@ -414,6 +421,12 @@
 #if CONFIG_BAWP
   seq->enable_bawp = tool_cfg->enable_bawp;
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  seq->enable_cwp = tool_cfg->enable_cwp;
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  seq->enable_imp_msk_bld = tool_cfg->enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
 #if CONFIG_EXTENDED_WARP_PREDICTION
   seq->seq_enabled_motion_modes =
       oxcf->motion_mode_cfg.seq_enabled_motion_modes;
@@ -434,6 +447,9 @@
 #if CONFIG_ORIP
   seq->enable_orip = oxcf->intra_mode_cfg.enable_orip;
 #endif
+#if CONFIG_IDIF
+  seq->enable_idif = oxcf->intra_mode_cfg.enable_idif;
+#endif  // CONFIG_IDIF
   seq->enable_ist = oxcf->txfm_cfg.enable_ist;
 #if CONFIG_CROSS_CHROMA_TX
   seq->enable_cctx = oxcf->txfm_cfg.enable_cctx;
@@ -451,6 +467,9 @@
 #if CONFIG_JOINT_MVD
   seq->enable_joint_mvd = tool_cfg->enable_joint_mvd;
 #endif  // CONFIG_JOINT_MVD
+#if CONFIG_REFINEMV
+  seq->enable_refinemv = tool_cfg->enable_refinemv;
+#endif  // CONFIG_REFINEMV
   set_bitstream_level_tier(seq, cm, frm_dim_cfg->width, frm_dim_cfg->height,
                            oxcf->input_cfg.init_framerate);
 
@@ -495,6 +514,13 @@
 #if CONFIG_PAR_HIDING
   seq->enable_parity_hiding = tool_cfg->enable_parity_hiding;
 #endif  // CONFIG_PAR_HIDING
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  // TODO(rachelbarker): Check if cpi->sf.gm_sf.gm_search_type is set by this
+  // point, and set to 0 if cpi->sf.gm_sf.gm_search_type == GM_DISABLE_SEARCH
+  // if possible
+  seq->enable_global_motion =
+      tool_cfg->enable_global_motion && !seq->reduced_still_picture_hdr;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 }
 
 static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
@@ -575,7 +601,7 @@
   // set sb size before allocations
   const BLOCK_SIZE sb_size = av1_select_sb_size(cpi);
   set_sb_size(cm, sb_size);
-  cpi->td.sb_size = sb_size;
+  cpi->td.sb_size = cm->sb_size;
   alloc_compressor_data(cpi);
 
   av1_update_film_grain_parameters(cpi, oxcf);
@@ -1469,7 +1495,14 @@
   PSNR_STATS psnr;
   const uint32_t in_bit_depth = cpi->oxcf.input_cfg.input_bit_depth;
   const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // To match the PSNR results between encoder log and VMAF results,
+  // the same reference sources (unfiltered source) need to be used.
+  aom_calc_highbd_psnr(cpi->unfiltered_source, &cpi->common.cur_frame->buf,
+                       &psnr,
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   aom_calc_highbd_psnr(cpi->source, &cpi->common.cur_frame->buf, &psnr,
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
                        bit_depth, in_bit_depth);
 
   for (i = 0; i < 4; ++i) {
@@ -1622,9 +1655,13 @@
     dst += CFL_BUF_LINE;
   }
 }
-
+#if CONFIG_CFL_IMPROVEMENTS
+static int64_t compute_sad(const uint16_t *src, uint16_t *src2, int width,
+                           int height, int round_offset, int src2_stride) {
+#else
 static int compute_sad(const uint16_t *src, uint16_t *src2, int width,
                        int height, int round_offset, int src2_stride) {
+#endif  // CONFIG_CFL_IMPROVEMENTS
   int sad = round_offset;
   for (int j = 0; j < height; ++j) {
     for (int i = 0; i < width; ++i) {
@@ -1633,7 +1670,11 @@
     src += CFL_BUF_LINE;
     src2 += src2_stride;
   }
+#if CONFIG_CFL_IMPROVEMENTS
+  return sad;
+#else
   return (sad / (height * width));
+#endif  // CONFIG_CFL_IMPROVEMENTS
 }
 
 static void cfl_predict_hbd_pre_analysis(const int16_t *ac_buf_q3,
@@ -1688,11 +1729,7 @@
   }
 }
 
-#if DS_FRAME_LEVEL
-void av1_set_downsample_filter_options(AV1_COMP *cpi, FeatureFlags *features) {
-#else
 void av1_set_downsample_filter_options(AV1_COMP *cpi) {
-#endif  // DS_FRAME_LEVE
   AV1_COMMON *cm = &cpi->common;
   const uint16_t *src = cpi->unfiltered_source->y_buffer;
   uint16_t *src_chroma_u = cpi->unfiltered_source->u_buffer;
@@ -1707,14 +1744,34 @@
   const int subsampling_x = cpi->unfiltered_source->subsampling_x;
   const int subsampling_y = cpi->unfiltered_source->subsampling_y;
 
+#if CONFIG_ADPTIVE_DS_422
+  if (subsampling_x == 0 && subsampling_y == 0) {
+    cm->seq_params.enable_cfl_ds_filter =
+        0;  // For 4:4:4 chroma format, downsampling filter is not used. There
+            // is a redundant that the filter index is still signalled for
+            // 4:4:4. Should we remove the index signalling for 4:4:4 with this
+            // MR?
+    return;
+  }
+#endif  // CONFIG_ADPTIVE_DS_422
+
+#if CONFIG_CFL_IMPROVEMENTS
+  const int blk_w = 16;
+  const int blk_h = 16;
+#else
   const int blk_w = 32;
   const int blk_h = 32;
+#endif  // CONFIG_CFL_IMPROVEMENTS
 
   uint16_t recon_buf_q3[CFL_BUF_SQUARE];
   uint16_t dc_buf_q3[CFL_BUF_SQUARE];
   // Q3 AC contributions (reconstructed luma pixels - tx block avg)
   int16_t ac_buf_q3[CFL_BUF_SQUARE];
+#if CONFIG_CFL_IMPROVEMENTS
+  int64_t cost[3] = { 0, 0, 0 };
+#else
   int cost[3] = { 0, 0, 0 };
+#endif  // CONFIG_CFL_IMPROVEMENTS
   for (int filter_type = 0; filter_type < 3; ++filter_type) {
     for (int comp = 0; comp < 2; comp++) {
       for (int r = 2; r + blk_h <= height - 2; r += blk_h) {
@@ -1730,7 +1787,14 @@
           }
 
           int alpha = 0;
+#if CONFIG_ADPTIVE_DS_422
+          if (subsampling_x == 1 && subsampling_y == 0) {
+            cfl_adaptive_luma_subsampling_422_hbd_c(
+                this_src, stride, recon_buf_q3, blk_w, blk_h, filter_type);
+          } else if (filter_type == 1) {
+#else
           if (filter_type == 1) {
+#endif  // CONFIG_ADPTIVE_DS_422
             cfl_luma_subsampling_420_hbd_121_c(this_src, stride, recon_buf_q3,
                                                blk_w, blk_h);
           } else if (filter_type == 2) {
@@ -1740,6 +1804,30 @@
             cfl_luma_subsampling_420_hbd_c(this_src, stride, recon_buf_q3,
                                            blk_w, blk_h);
           }
+#if CONFIG_ADPTIVE_DS_422
+          cfl_derive_block_implicit_scaling_factor(
+              recon_buf_q3, this_src_chroma, blk_w >> subsampling_x,
+              blk_h >> subsampling_y, CFL_BUF_LINE, chroma_stride, &alpha);
+          subtract_average_c(
+              recon_buf_q3, ac_buf_q3, blk_w >> subsampling_x,
+              blk_h >> subsampling_y, 4,
+              (blk_w >> subsampling_x) * (blk_h >> subsampling_y));
+          cfl_predict_hbd_dc(this_src_chroma - chroma_stride, dc_buf_q3,
+                             chroma_stride, blk_w >> subsampling_x,
+                             blk_h >> subsampling_y);
+          cfl_predict_hbd_pre_analysis(ac_buf_q3, dc_buf_q3, CFL_BUF_LINE,
+                                       alpha, bd, blk_w >> subsampling_x,
+                                       blk_h >> subsampling_y);
+#if CONFIG_CFL_IMPROVEMENTS
+          int64_t filter_cost =
+              compute_sad(dc_buf_q3, this_src_chroma, blk_w >> 1, blk_h >> 1, 2,
+                          chroma_stride);
+#else
+          int filter_cost =
+              compute_sad(dc_buf_q3, this_src_chroma, blk_w >> subsampling_x,
+                          blk_h >> subsampling_y, 2, chroma_stride);
+#endif  // CONFIG_CFL_IMPROVEMENTS
+#else
           cfl_derive_block_implicit_scaling_factor(
               recon_buf_q3, this_src_chroma, blk_w >> 1, blk_h >> 1,
               CFL_BUF_LINE, chroma_stride, &alpha);
@@ -1749,23 +1837,29 @@
                              chroma_stride, blk_w >> 1, blk_h >> 1);
           cfl_predict_hbd_pre_analysis(ac_buf_q3, dc_buf_q3, CFL_BUF_LINE,
                                        alpha, bd, blk_w >> 1, blk_h >> 1);
+#if CONFIG_CFL_IMPROVEMENTS
+          int64_t filter_cost =
+              compute_sad(dc_buf_q3, this_src_chroma, blk_w >> 1, blk_h >> 1, 2,
+                          chroma_stride);
+#else
           int filter_cost = compute_sad(dc_buf_q3, this_src_chroma, blk_w >> 1,
                                         blk_h >> 1, 2, chroma_stride);
+#endif  // CONFIG_CFL_IMPROVEMENTS
+#endif  // CONFIG_ADPTIVE_DS_422
           cost[filter_type] = cost[filter_type] + filter_cost;
         }
       }
     }
   }
-
+#if CONFIG_CFL_IMPROVEMENTS
+  int64_t min_cost = INT64_MAX;
+#else
   int min_cost = INT_MAX;
+#endif  // CONFIG_CFL_IMPROVEMENTS
   for (int i = 0; i < 3; ++i) {
     if (cost[i] < min_cost) {
       min_cost = cost[i];
-#if DS_FRAME_LEVEL
-      features->ds_filter_type = i;
-#else
       cm->seq_params.enable_cfl_ds_filter = i;
-#endif  // DS_FRAME_LEVEL
     }
   }
 }
@@ -1911,6 +2005,29 @@
   }
 }
 
+#define COUPLED_CHROMA_FROM_LUMA_RESTORATION 0
+#if !CONFIG_FLEXIBLE_RU_SIZE
+static void set_restoration_unit_size(int width, int height, int sx, int sy,
+                                      RestorationInfo *rst) {
+  (void)width;
+  (void)height;
+  (void)sx;
+  (void)sy;
+#if COUPLED_CHROMA_FROM_LUMA_RESTORATION
+  int s = AOMMIN(sx, sy);
+#else
+  int s = 0;
+#endif  // !COUPLED_CHROMA_FROM_LUMA_RESTORATION
+
+  if (width * height > 352 * 288)
+    rst[0].restoration_unit_size = RESTORATION_UNITSIZE_MAX;
+  else
+    rst[0].restoration_unit_size = (RESTORATION_UNITSIZE_MAX >> 1);
+  rst[1].restoration_unit_size = rst[0].restoration_unit_size >> s;
+  rst[2].restoration_unit_size = rst[1].restoration_unit_size;
+}
+#endif  // !CONFIG_FLEXIBLE_RU_SIZE
+
 static void init_ref_frame_bufs(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   int i;
@@ -2049,11 +2166,15 @@
 
   const int frame_width = cm->superres_upscaled_width;
   const int frame_height = cm->superres_upscaled_height;
-  av1_set_restoration_unit_size(
-      frame_width, frame_height, seq_params->subsampling_x,
-      seq_params->subsampling_y, cm->rst_info, cm->sb_size);
+  set_restoration_unit_size(frame_width, frame_height,
+                            seq_params->subsampling_x,
+                            seq_params->subsampling_y, cm->rst_info);
   for (int i = 0; i < num_planes; ++i)
     cm->rst_info[i].frame_restoration_type = RESTORE_NONE;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  for (int i = 0; i < num_planes; ++i)
+    cm->rst_info[i].frame_cross_restoration_type = RESTORE_NONE;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 
   av1_alloc_restoration_buffers(cm);
   if (!is_stat_generation_stage(cpi)) alloc_util_frame_buffers(cpi);
@@ -2094,6 +2215,30 @@
   set_ref_ptrs(cm, xd, 0, 0);
 }
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+static void save_pre_filter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
+  (void)cpi;
+  YV12_BUFFER_CONFIG *frame = &cm->cur_frame->buf;
+  YV12_BUFFER_CONFIG *pre_filter_frame = &cm->pre_rst_frame;
+
+  const SequenceHeader *const seq_params = &cm->seq_params;
+
+  const int frame_width = frame->crop_widths[0];
+  const int frame_height = frame->crop_heights[0];
+
+  if (aom_realloc_frame_buffer(
+          pre_filter_frame, frame_width, frame_height,
+          seq_params->subsampling_x, seq_params->subsampling_y,
+          AOM_RESTORATION_FRAME_BORDER, cm->features.byte_alignment, NULL, NULL,
+          NULL) < 0)
+    aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+                       "Failed to allocate restoration dst buffer");
+
+  const int num_planes = av1_num_planes(cm);
+  aom_yv12_copy_frame(frame, pre_filter_frame, num_planes);
+}
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
 /*!\brief Select and apply cdef filters and switchable restoration filters
  *
  * \ingroup high_level_algo
@@ -2235,9 +2380,32 @@
   if (use_restoration) {
     av1_loop_restoration_save_boundary_lines(&cm->cur_frame->buf, cm, 1);
     av1_pick_filter_restoration(cpi->source, cpi);
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    save_pre_filter_frame(cpi, cm);
+    if (num_workers > 1)
+      av1_loop_restoration_filter_frame_mt(
+          &cm->cur_frame->buf, cm, 0, mt_info->workers, num_workers,
+          &mt_info->lr_row_sync, &cpi->lr_ctxt);
+    else
+      av1_loop_restoration_filter_frame(&cm->cur_frame->buf, cm, 0,
+                                        &cpi->lr_ctxt);
+
+    // restore luma component of the frame
+    aom_yv12_copy_y(&cm->pre_rst_frame, &cm->cur_frame->buf);
+    av1_pick_cross_filter_restoration(cpi->source, cpi);
+    // restore chroma components of the frame
+    aom_yv12_copy_u(&cm->pre_rst_frame, &cm->cur_frame->buf);
+    aom_yv12_copy_v(&cm->pre_rst_frame, &cm->cur_frame->buf);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
     if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE ||
         cm->rst_info[1].frame_restoration_type != RESTORE_NONE ||
-        cm->rst_info[2].frame_restoration_type != RESTORE_NONE) {
+        cm->rst_info[2].frame_restoration_type != RESTORE_NONE
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+        || cm->rst_info[0].frame_cross_restoration_type != RESTORE_NONE ||
+        cm->rst_info[1].frame_cross_restoration_type != RESTORE_NONE ||
+        cm->rst_info[2].frame_cross_restoration_type != RESTORE_NONE
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    ) {
       if (num_workers > 1)
         av1_loop_restoration_filter_frame_mt(
             &cm->cur_frame->buf, cm, 0, mt_info->workers, num_workers,
@@ -2250,6 +2418,11 @@
     cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   }
 #if CONFIG_COLLECT_COMPONENT_TIMING
   end_timing(cpi, loop_restoration_time);
@@ -2724,13 +2897,6 @@
       loop = 0;
     }
 
-    if (allow_recode && !cpi->sf.gm_sf.gm_disable_recode &&
-        av1_recode_loop_test_global_motion(cm->global_motion,
-                                           cpi->td.rd_counts.global_motion_used,
-                                           gm_info->params_cost)) {
-      loop = 1;
-    }
-
     if (loop) {
       ++loop_count;
 
@@ -2837,6 +3003,11 @@
     cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 
     for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
       cm->global_motion[i] = default_warp_params;
@@ -2967,6 +3138,11 @@
     cm->rst_info[0].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[1].frame_restoration_type = RESTORE_NONE;
     cm->rst_info[2].frame_restoration_type = RESTORE_NONE;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE;
+    cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   }
 
 #if CONFIG_TIP
@@ -3218,13 +3394,8 @@
   }
 #endif  // CONFIG_IBC_SR_EXT
 #if CONFIG_ADAPTIVE_DS_FILTER
-#if DS_FRAME_LEVEL
-  if (cm->current_frame.frame_type == KEY_FRAME) {
-    av1_set_downsample_filter_options(cpi, features);
-#else
-  if (cpi->common.current_frame.absolute_poc == 0) {
+  if (cpi->common.current_frame.frame_type == KEY_FRAME) {
     av1_set_downsample_filter_options(cpi);
-#endif  // DS_FRAME_LEVEL
   }
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
   // frame type has been decided outside of this function call
@@ -3243,11 +3414,20 @@
   features->allow_warped_motion = oxcf->motion_mode_cfg.allow_warped_motion &&
                                   frame_might_allow_warped_motion(cm);
 #endif  // !CONFIG_EXTENDED_WARP_PREDICTION
-
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  features->allow_warpmv_mode = features->enabled_motion_modes;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
   // temporal set of frame level enable_bawp flag.
 #if CONFIG_BAWP
   features->enable_bawp = seq_params->enable_bawp;
 #endif
+#if CONFIG_CWP
+  features->enable_cwp = seq_params->enable_cwp;
+#endif  // CONFIG_CWP
+
+#if CONFIG_D071_IMP_MSK_BLD
+  features->enable_imp_msk_bld = seq_params->enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
 
   cpi->last_frame_type = current_frame->frame_type;
 
@@ -3715,7 +3895,11 @@
 #endif
   cpi->bytes += frame_bytes;
   if (cm->show_frame) {
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    const YV12_BUFFER_CONFIG *orig = cpi->unfiltered_source;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     const YV12_BUFFER_CONFIG *orig = cpi->source;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     const YV12_BUFFER_CONFIG *recon = &cpi->common.cur_frame->buf;
     double y, u, v, frame_all;
 
@@ -3814,10 +3998,18 @@
   aom_usec_timer_mark(&cmptimer);
   cpi->time_compress_data += aom_usec_timer_elapsed(&cmptimer);
 #endif  // CONFIG_INTERNAL_STATS
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  if (cpi->b_calculate_psnr && *size > 0) {
+    if ((cm->showable_frame && cm->seq_params.enable_frame_output_order) ||
+        (cm->show_existing_frame &&
+         !cm->seq_params.enable_frame_output_order) ||
+        (!is_stat_generation_stage(cpi) && cm->show_frame)) {
+#else
   // Note *size = 0 indicates a dropeed frame for which psnr is not calculated
   if (cpi->b_calculate_psnr && *size > 0) {
     if (cm->show_existing_frame ||
         (!is_stat_generation_stage(cpi) && cm->show_frame)) {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       generate_psnr_packet(cpi);
     }
   }
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h
index a0a16b0..e365151 100644
--- a/av1/encoder/encoder.h
+++ b/av1/encoder/encoder.h
@@ -288,6 +288,12 @@
    */
   bool enable_orip;
 #endif
+#if CONFIG_IDIF
+  /*!
+   * Flag to indicate if IDIF should be enabled
+   */
+  bool enable_idif;
+#endif
   /*!
    * Flag to indicate if IBP should be enabled
    */
@@ -678,6 +684,10 @@
   // Indicates if one-sided compound should be enabled.
   bool enable_onesided_comp;
   bool explicit_ref_frame_map;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // Indicates if the implicit frame order derivation is enabled.
+  bool enable_frame_output_order;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 } RefFrameCfg;
 
 typedef struct {
@@ -864,6 +874,10 @@
   // Indicates if joint mvd coding should be enabled.
   bool enable_joint_mvd;
 #endif  // CONFIG_JOINT_MVD
+#if CONFIG_REFINEMV
+  // Indicates if refineMV mode should be enabled.
+  bool enable_refinemv;
+#endif  // CONFIG_REFINEMV
 #if CONFIG_TIP
   // enable temporal interpolated prediction
   int enable_tip;
@@ -872,6 +886,14 @@
   // enable block adaptive weighted prediction
   int enable_bawp;
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  // enable compound weighted prediction
+  int enable_cwp;
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  // enable implicit masked blending
+  bool enable_imp_msk_bld;
+#endif  // CONFIG_D071_IMP_MSK_BLD
   // When enabled, video mode should be used even for single frame input.
   bool force_video_mode;
   // Indicates if the error resiliency features should be enabled.
@@ -1220,7 +1242,11 @@
   unsigned int uv_mode[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES];
 #endif
   unsigned int fsc_mode[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS][FSC_MODES];
+#if CONFIG_EXT_DIR
+  unsigned int mrl_index[MRL_INDEX_CONTEXTS][MRL_LINE_NUMBER];
+#else
   unsigned int mrl_index[MRL_LINE_NUMBER];
+#endif  // CONFIG_EXT_DIR
 #if CONFIG_IMPROVED_CFL
   unsigned int cfl_index[CFL_TYPE_COUNT];
 #endif
@@ -1243,6 +1269,13 @@
   unsigned int rect_type[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS][2];
   unsigned int do_ext_partition[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS]
                                [PARTITION_CONTEXTS][2];
+#if CONFIG_UNEVEN_4WAY
+  unsigned int do_uneven_4way_partition[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS]
+                                       [PARTITION_CONTEXTS][2];
+  unsigned int uneven_4way_partition_type[PARTITION_STRUCTURE_NUM]
+                                         [NUM_RECT_PARTS][PARTITION_CONTEXTS]
+                                         [NUM_UNEVEN_4WAY_PARTS];
+#endif  // CONFIG_UNEVEN_4WAY
 #else
   unsigned int partition[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS]
                         [EXT_PARTITION_TYPES];
@@ -1268,6 +1301,17 @@
   unsigned int coeff_lps[TX_SIZES][PLANE_TYPES][BR_CDF_SIZE - 1][LEVEL_CONTEXTS]
                         [2];
   unsigned int eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS][2];
+#if CONFIG_ATC_DCTX_ALIGNED
+  unsigned int coeff_base_bob_multi[TOKEN_CDF_Q_CTXS][SIG_COEF_CONTEXTS_BOB]
+                                   [NUM_BASE_LEVELS + 1];
+  unsigned int eob_multi16[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 6];
+  unsigned int eob_multi32[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 5];
+  unsigned int eob_multi64[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 4];
+  unsigned int eob_multi128[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 3];
+  unsigned int eob_multi256[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 2];
+  unsigned int eob_multi512[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 1];
+  unsigned int eob_multi1024[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS];
+#else
   unsigned int eob_multi16[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][5];
   unsigned int eob_multi32[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][6];
   unsigned int eob_multi64[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][7];
@@ -1275,7 +1319,8 @@
   unsigned int eob_multi256[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][9];
   unsigned int eob_multi512[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][10];
   unsigned int eob_multi1024[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][11];
-#if CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+#if CONFIG_ATC
   unsigned int coeff_lps_lf[PLANE_TYPES][BR_CDF_SIZE - 1][LF_LEVEL_CONTEXTS][2];
   unsigned int coeff_base_lf_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
                                   [LF_SIG_COEF_CONTEXTS][LF_BASE_SYMBOLS];
@@ -1289,7 +1334,7 @@
 #else
   unsigned int coeff_lps_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES]
                               [LEVEL_CONTEXTS][BR_CDF_SIZE];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #if CONFIG_PAR_HIDING
   unsigned int coeff_base_ph_multi[TOKEN_CDF_Q_CTXS][COEFF_BASE_PH_CONTEXTS]
                                   [NUM_BASE_LEVELS + 2];
@@ -1304,9 +1349,9 @@
   unsigned int inter_single_mode[INTER_SINGLE_MODE_CONTEXTS]
                                 [INTER_SINGLE_MODES];
   unsigned int drl_mode[3][DRL_MODE_CONTEXTS][2];
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   unsigned int skip_drl_mode[3][2];
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 #if CONFIG_OPTFLOW_REFINEMENT
   unsigned int use_optflow[INTER_COMPOUND_MODE_CONTEXTS][2];
   unsigned int inter_compound_mode[INTER_COMPOUND_MODE_CONTEXTS]
@@ -1338,12 +1383,15 @@
   unsigned int motion_mode[BLOCK_SIZES_ALL][MOTION_MODES];
   unsigned int obmc[BLOCK_SIZES_ALL][2];
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   unsigned int intra_inter[INTRA_INTER_SKIP_TXFM_CONTEXTS][INTRA_INTER_CONTEXTS]
                           [2];
 #else
   unsigned int intra_inter[INTRA_INTER_CONTEXTS][2];
-#endif
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
+#if CONFIG_CWP
+  int8_t cwp_idx[MAX_CWP_NUM - 1][2];
+#endif  // CONFIG_CWP
 #if CONFIG_BAWP
   unsigned int bawp[2];
 #endif  // CONFIG_BAWP
@@ -1360,7 +1408,7 @@
 #else
   unsigned int intrabc[2];
 #endif  // CONFIG_NEW_CONTEXT_MODELING
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   unsigned int intrabc_mode[2];
   unsigned int intrabc_drl_idx[MAX_REF_BV_STACK_SIZE - 1][2];
 #endif
@@ -1380,7 +1428,12 @@
   unsigned int delta_lf_multi[FRAME_LF_COUNT][DELTA_LF_PROBS][2];
   unsigned int delta_lf[DELTA_LF_PROBS][2];
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EOB_TX_CTXS][EXT_TX_SIZES]
+                           [TX_TYPES];
+#else
   unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
                            [TX_TYPES];
 #if CONFIG_CROSS_CHROMA_TX
@@ -1625,8 +1678,6 @@
 
 typedef struct RD_COUNTS {
   int64_t comp_pred_diff[REFERENCE_MODES];
-  // Stores number of 4x4 blocks using global motion per reference frame.
-  int global_motion_used[INTER_REFS_PER_FRAME];
   int compound_ref_used_flag;
   int skip_mode_used_flag;
   int tx_type_used[TX_SIZES_ALL][TX_TYPES];
@@ -1944,19 +1995,6 @@
  */
 typedef struct {
   /*!
-   * Array to store the cost for signalling each global motion model.
-   * gmtype_cost[i] stores the cost of signalling the ith Global Motion model.
-   */
-  int type_cost[TRANS_TYPES];
-
-  /*!
-   * Array to store the cost for signalling a particular global motion model for
-   * each reference frame. gmparams_cost[i] stores the cost of signalling global
-   * motion for the ith reference frame.
-   */
-  int params_cost[INTER_REFS_PER_FRAME];
-
-  /*!
    * Flag to indicate if global motion search needs to be rerun.
    */
   bool search_done;
@@ -2007,6 +2045,25 @@
    * the y co-ordinate of the ith corner point detected.
    */
   int src_corners[2 * MAX_CORNERS];
+
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  /*!
+   * \brief Error ratio for each selected global motion model
+   *
+   * This is used to help decide which models will actually be used,
+   * because that decision has to be deferred until we actually select a
+   * base model to use
+   */
+  double erroradvantage[INTER_REFS_PER_FRAME];
+
+  /**
+   * \name Reference path for selected base model
+   */
+  /**@{*/
+  int base_model_our_ref;   /*!< which of our ref frames to copy from */
+  int base_model_their_ref; /*!< which model to copy from that frame */
+  /**@}*/
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 } GlobalMotionInfo;
 
 /*!
@@ -2743,7 +2800,7 @@
   /*!
    * Tables to calculate IntraBC MV cost.
    */
-#if !CONFIG_FLEX_MVRES && !CONFIG_BVCOST_UPDATE
+#if !CONFIG_FLEX_MVRES && !CONFIG_IBC_BV_IMPROVEMENT
   IntraBCMVCosts dv_costs;
 #endif
 
@@ -2896,6 +2953,14 @@
    * Number of frames left to be encoded, is 0 if limit is not set.
    */
   int frames_left;
+
+  /*!
+   * Indicates if a valid global motion model has been found in the different
+   * frame update types of a GF group.
+   * valid_gm_model_found[i] indicates if valid global motion model has been
+   * found in the frame update type with enum value equal to i
+   */
+  int valid_gm_model_found[FRAME_UPDATE_TYPES];
 } AV1_COMP;
 
 /*!
@@ -3077,11 +3142,7 @@
 
 int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size);
 
-#if DS_FRAME_LEVEL
-void av1_set_downsample_filter_options(AV1_COMP *cpi, FeatureFlags *features);
-#else
 void av1_set_downsample_filter_options(AV1_COMP *cpi);
-#endif  // DS_FRAME_LEVEl
 
 // Set screen content options.
 // This function estimates whether to use screen content tools, by counting
@@ -3289,8 +3350,18 @@
 // frame. An exception can be made for a forward keyframe since it has no
 // previous dependencies.
 static INLINE int encode_show_existing_frame(const AV1_COMMON *cm) {
-  return cm->show_existing_frame && (!cm->features.error_resilient_mode ||
-                                     cm->current_frame.frame_type == KEY_FRAME);
+  if (!cm->show_existing_frame) return 0;
+
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // When enable_frame_output_order == 1, show_existing_frame can be equal to 1
+  // only for a forward key frame
+  if (cm->seq_params.enable_frame_output_order)
+    return (!cm->features.error_resilient_mode &&
+            cm->current_frame.frame_type == KEY_FRAME);
+  else
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    return (!cm->features.error_resilient_mode ||
+            cm->current_frame.frame_type == KEY_FRAME);
 }
 
 // Get index into the 'cpi->mbmi_ext_info.frame_base' array for the given
@@ -3358,6 +3429,16 @@
   return AOMMIN(max_allowed_refs_for_given_speed, max_reference_frames);
 }
 
+#if CONFIG_SEP_COMP_DRL
+/*!\brief Return whether the current coding block has two separate DRLs,
+ * the mdoe info is used as inputs */
+static INLINE int has_second_drl_by_mode(const PREDICTION_MODE mode,
+                                         const MV_REFERENCE_FRAME *ref_frame) {
+  return (mode == NEAR_NEARMV || mode == NEAR_NEWMV) &&
+         !is_tip_ref_frame(ref_frame[0]);
+}
+#endif  // CONFIG_SEP_COMP_DRL
+
 // Enforce the number of references for each arbitrary frame based on user
 // options and speed.
 static AOM_INLINE void enforce_max_ref_frames(AV1_COMP *cpi,
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h
index 8ae62cd..2b6e702 100644
--- a/av1/encoder/encoder_alloc.h
+++ b/av1/encoder/encoder_alloc.h
@@ -80,7 +80,7 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   cpi->td.firstpass_ctx =
-      av1_alloc_pmc(cm, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0,
+      av1_alloc_pmc(cm, SHARED_PART, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0,
                     cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
                     &cpi->td.shared_coeff_buf);
 }
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c
index c6cdc29..9b6842d 100644
--- a/av1/encoder/encoder_utils.c
+++ b/av1/encoder/encoder_utils.c
@@ -740,9 +740,6 @@
     return AOMMIN(cm->width, cm->height) > 480 ? BLOCK_128X128 : BLOCK_64X64;
   }
 #if CONFIG_BLOCK_256
-  if (cm->features.allow_intrabc) {
-    return BLOCK_128X128;
-  }
   return AOMMIN(oxcf->frm_dim_cfg.width, oxcf->frm_dim_cfg.height) >= 720
              ? BLOCK_256X256
              : BLOCK_128X128;
@@ -779,9 +776,9 @@
   const int frame_width = cm->superres_upscaled_width;
   const int frame_height = cm->superres_upscaled_height;
 
-  av1_set_restoration_unit_size(
-      frame_width, frame_height, seq_params->subsampling_x,
-      seq_params->subsampling_y, cm->rst_info, seq_params->sb_size);
+  set_restoration_unit_size(frame_width, frame_height,
+                            seq_params->subsampling_x,
+                            seq_params->subsampling_y, cm->rst_info);
 
   if (old_restoration_unit_size != cm->rst_info[0].restoration_unit_size) {
     for (int i = 0; i < num_planes; ++i)
@@ -1006,28 +1003,6 @@
   cpi->sf.part_sf.fixed_partition_size = fixed_partition_block_size_orig;
 }
 
-#define GM_RECODE_LOOP_NUM4X4_FACTOR 192
-int av1_recode_loop_test_global_motion(WarpedMotionParams *const global_motion,
-                                       const int *const global_motion_used,
-                                       int *const gm_params_cost) {
-  int i;
-  int recode = 0;
-  for (i = 0; i < INTER_REFS_PER_FRAME; ++i) {
-    if (global_motion[i].wmtype != IDENTITY &&
-        global_motion_used[i] * GM_RECODE_LOOP_NUM4X4_FACTOR <
-            gm_params_cost[i]) {
-      global_motion[i] = default_warp_params;
-      assert(global_motion[i].wmtype == IDENTITY);
-      gm_params_cost[i] = 0;
-      recode = 1;
-      // TODO(sarahparker): The earlier condition for recoding here was:
-      // "recode |= (rdc->global_motion_used[i] > 0);". Can we bring something
-      // similar to that back to speed up global motion?
-    }
-  }
-  return recode;
-}
-
 static void fix_interp_filter(InterpFilter *const interp_filter,
                               const FRAME_COUNTS *const counts) {
   if (*interp_filter == SWITCHABLE) {
diff --git a/av1/encoder/encoder_utils.h b/av1/encoder/encoder_utils.h
index 37f5cef..5399797 100644
--- a/av1/encoder/encoder_utils.h
+++ b/av1/encoder/encoder_utils.h
@@ -921,7 +921,11 @@
       cpi->sf.inter_sf.prune_obmc_prob_thresh > 0) {
     av1_copy(frame_probs->obmc_probs, default_obmc_probs);
   }
-  if (cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
+  if (cpi->sf.inter_sf.prune_warped_prob_thresh > 0
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      || cpi->sf.inter_sf.prune_warpmv_prob_thresh > 0
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+  ) {
     av1_copy(frame_probs->warped_probs, default_warped_probs);
   }
 }
@@ -1071,10 +1075,6 @@
 
 void av1_determine_sc_tools_with_encoding(AV1_COMP *cpi, const int q_orig);
 
-int av1_recode_loop_test_global_motion(WarpedMotionParams *const global_motion,
-                                       const int *const global_motion_used,
-                                       int *const gm_params_cost);
-
 void av1_set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index,
                                  int *top_index);
 
@@ -1136,31 +1136,6 @@
   }
   av1_calculate_tile_rows(cm, mi_params->mi_rows, tiles);
 }
-
-#define COUPLED_CHROMA_FROM_LUMA_RESTORATION 0
-static AOM_INLINE void av1_set_restoration_unit_size(int width, int height,
-                                                     int sx, int sy,
-                                                     RestorationInfo *rst,
-                                                     BLOCK_SIZE sb_size) {
-  (void)width;
-  (void)height;
-  (void)sx;
-  (void)sy;
-#if COUPLED_CHROMA_FROM_LUMA_RESTORATION
-  int s = AOMMIN(sx, sy);
-#else
-  int s = 0;
-#endif  // !COUPLED_CHROMA_FROM_LUMA_RESTORATION
-
-  if (width * height > 352 * 288)
-    rst[0].restoration_unit_size = RESTORATION_UNITSIZE_MAX;
-  else
-    rst[0].restoration_unit_size =
-        AOMMAX((RESTORATION_UNITSIZE_MAX >> 1), block_size_wide[sb_size]);
-  rst[1].restoration_unit_size = rst[0].restoration_unit_size >> s;
-  rst[2].restoration_unit_size = rst[1].restoration_unit_size;
-}
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c
index 1c36d25..f1f51e6 100644
--- a/av1/encoder/encodetxb.c
+++ b/av1/encoder/encodetxb.c
@@ -174,11 +174,16 @@
 
 #if CONFIG_ENTROPY_STATS
 void av1_update_eob_context(int cdf_idx, int eob, TX_SIZE tx_size,
-                            TX_CLASS tx_class, PLANE_TYPE plane,
-                            FRAME_CONTEXT *ec_ctx, FRAME_COUNTS *counts,
-                            uint8_t allow_update_cdf) {
+#if !CONFIG_ATC_DCTX_ALIGNED
+                            TX_CLASS tx_class,
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
+                            PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx,
+                            FRAME_COUNTS *counts, uint8_t allow_update_cdf) {
 #else
-void av1_update_eob_context(int eob, TX_SIZE tx_size, TX_CLASS tx_class,
+void av1_update_eob_context(int eob, TX_SIZE tx_size,
+#if !CONFIG_ATC_DCTX_ALIGNED
+                            TX_CLASS tx_class,
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
                             PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx,
                             uint8_t allow_update_cdf) {
 #endif
@@ -187,6 +192,67 @@
   TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
 
   const int eob_multi_size = txsize_log2_minus4[tx_size];
+#if CONFIG_ATC_DCTX_ALIGNED
+  switch (eob_multi_size) {
+    case 0:
+#if CONFIG_ENTROPY_STATS
+      ++counts->eob_multi16[cdf_idx][plane][eob_pt - 1];
+#endif
+      if (allow_update_cdf)
+        update_cdf(ec_ctx->eob_flag_cdf16[plane], eob_pt - 1, EOB_MAX_SYMS - 6);
+      break;
+    case 1:
+#if CONFIG_ENTROPY_STATS
+      ++counts->eob_multi32[cdf_idx][plane][eob_pt - 1];
+#endif
+      if (allow_update_cdf)
+        update_cdf(ec_ctx->eob_flag_cdf32[plane], eob_pt - 1, EOB_MAX_SYMS - 5);
+      break;
+    case 2:
+#if CONFIG_ENTROPY_STATS
+      ++counts->eob_multi64[cdf_idx][plane][eob_pt - 1];
+#endif
+      if (allow_update_cdf)
+        update_cdf(ec_ctx->eob_flag_cdf64[plane], eob_pt - 1, EOB_MAX_SYMS - 4);
+      break;
+    case 3:
+#if CONFIG_ENTROPY_STATS
+      ++counts->eob_multi128[cdf_idx][plane][eob_pt - 1];
+#endif
+      if (allow_update_cdf) {
+        update_cdf(ec_ctx->eob_flag_cdf128[plane], eob_pt - 1,
+                   EOB_MAX_SYMS - 3);
+      }
+      break;
+    case 4:
+#if CONFIG_ENTROPY_STATS
+      ++counts->eob_multi256[cdf_idx][plane][eob_pt - 1];
+#endif
+      if (allow_update_cdf) {
+        update_cdf(ec_ctx->eob_flag_cdf256[plane], eob_pt - 1,
+                   EOB_MAX_SYMS - 2);
+      }
+      break;
+    case 5:
+#if CONFIG_ENTROPY_STATS
+      ++counts->eob_multi512[cdf_idx][plane][eob_pt - 1];
+#endif
+      if (allow_update_cdf) {
+        update_cdf(ec_ctx->eob_flag_cdf512[plane], eob_pt - 1,
+                   EOB_MAX_SYMS - 1);
+      }
+      break;
+    case 6:
+    default:
+#if CONFIG_ENTROPY_STATS
+      ++counts->eob_multi1024[cdf_idx][plane][eob_pt - 1];
+#endif
+      if (allow_update_cdf) {
+        update_cdf(ec_ctx->eob_flag_cdf1024[plane], eob_pt - 1, EOB_MAX_SYMS);
+      }
+      break;
+  }
+#else
   const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
 
   switch (eob_multi_size) {
@@ -249,6 +315,7 @@
       }
       break;
   }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
   if (av1_eob_offset_bits[eob_pt] > 0) {
     int eob_ctx = eob_pt - 3;
@@ -263,13 +330,21 @@
 }
 
 static int get_eob_cost(int eob, const LV_MAP_EOB_COST *txb_eob_costs,
-                        const LV_MAP_COEFF_COST *txb_costs, TX_CLASS tx_class) {
+                        const LV_MAP_COEFF_COST *txb_costs
+#if !CONFIG_ATC_DCTX_ALIGNED
+                        ,
+                        TX_CLASS tx_class
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
+) {
   int eob_extra;
   const int eob_pt = get_eob_pos_token(eob, &eob_extra);
   int eob_cost = 0;
+#if CONFIG_ATC_DCTX_ALIGNED
+  eob_cost = txb_eob_costs->eob_cost[eob_pt - 1];
+#else
   const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
   eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
-
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   if (av1_eob_offset_bits[eob_pt] > 0) {
     const int eob_ctx = eob_pt - 3;
     const int eob_shift = av1_eob_offset_bits[eob_pt] - 1;
@@ -301,7 +376,7 @@
   return 0;
 }
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 // Golomb cost of coding bypass coded level values in the
 // low-frequency region.
 static INLINE int get_golomb_cost_lf(int abs_qc) {
@@ -342,7 +417,7 @@
   }
   return coeff_lps[base_range] + golomb_bits;
 }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
 static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps,
                                         int *diff) {
@@ -374,23 +449,23 @@
                                  const int coeff_idx, const int bwl,
                                  const int height, const int scan_idx,
                                  const int is_eob,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                                  const TX_SIZE tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
                                  const TX_CLASS tx_class
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                                  ,
                                  const int plane) {
 #else
 ) {
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   if (is_eob) {
     if (scan_idx == 0) return 0;
     if (scan_idx <= (height << bwl) / 8) return 1;
     if (scan_idx <= (height << bwl) / 4) return 2;
     return 3;
   }
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   int stats = 0;
   const int row = coeff_idx >> bwl;
   const int col = coeff_idx - (row << bwl);
@@ -407,11 +482,20 @@
   const int stats =
       get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class);
   return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 }
 
 static INLINE int get_nz_map_ctx_skip(const uint8_t *const levels,
+#if CONFIG_ATC_DCTX_ALIGNED
+                                      const int height, const int scan_idx,
+                                      const int is_bob, const int coeff_idx,
+                                      const int bwl) {
+  if (is_bob) {
+    return get_lower_levels_ctx_bob(bwl, height, scan_idx);
+  }
+#else
                                       const int coeff_idx, const int bwl) {
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   const int stats =
       get_nz_mag_skip(levels + get_padded_idx_left(coeff_idx, bwl), bwl);
   return get_nz_map_ctx_from_stats_skip(stats, coeff_idx, bwl);
@@ -492,39 +576,114 @@
                                const int16_t *const scan, const uint16_t eob,
                                const TX_SIZE tx_size, const TX_CLASS tx_class,
                                int8_t *const coeff_contexts
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                                ,
                                const int plane) {
 #else
 ) {
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   const int bwl = get_txb_bwl(tx_size);
   const int height = get_txb_high(tx_size);
   for (int i = 0; i < eob; ++i) {
     const int pos = scan[i];
     coeff_contexts[pos] =
         get_nz_map_ctx(levels, pos, bwl, height, i, i == eob - 1,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                        tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
                        tx_class
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                        ,
                        plane);
 #else
         );
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   }
 }
 
+#if CONFIG_ATC_DCTX_ALIGNED
+// Encodes the EOB syntax in the bitstream.
+static INLINE void code_eob(MACROBLOCK *const x, aom_writer *w, int plane,
+                            TX_SIZE tx_size, const int eob) {
+  MACROBLOCKD *xd = &x->e_mbd;
+  FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+  const PLANE_TYPE plane_type = get_plane_type(plane);
+  const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
+
+  int eob_extra;
+  const int eob_pt = get_eob_pos_token(eob, &eob_extra);
+  const int eob_multi_size = txsize_log2_minus4[tx_size];
+  switch (eob_multi_size) {
+    case 0:
+      aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf16[plane_type],
+                       EOB_MAX_SYMS - 6);
+      break;
+    case 1:
+      aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf32[plane_type],
+                       EOB_MAX_SYMS - 5);
+      break;
+    case 2:
+      aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf64[plane_type],
+                       EOB_MAX_SYMS - 4);
+      break;
+    case 3:
+      aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf128[plane_type],
+                       EOB_MAX_SYMS - 3);
+      break;
+    case 4:
+      aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf256[plane_type],
+                       EOB_MAX_SYMS - 2);
+      break;
+    case 5:
+      aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf512[plane_type],
+                       EOB_MAX_SYMS - 1);
+      break;
+    default:
+      aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf1024[plane_type],
+                       EOB_MAX_SYMS);
+      break;
+  }
+  const int eob_offset_bits = av1_eob_offset_bits[eob_pt];
+  if (eob_offset_bits > 0) {
+    const int eob_ctx = eob_pt - 3;
+    int eob_shift = eob_offset_bits - 1;
+    int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
+    aom_write_symbol(w, bit,
+                     ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2);
+#if CONFIG_BYPASS_IMPROVEMENT
+    // Zero out top bit; write (eob_offset_bits - 1) lsb bits.
+    eob_extra &= (1 << (eob_offset_bits - 1)) - 1;
+    aom_write_literal(w, eob_extra, eob_offset_bits - 1);
+#else
+    for (int i = 1; i < eob_offset_bits; i++) {
+      eob_shift = eob_offset_bits - 1 - i;
+      bit = (eob_extra & (1 << eob_shift)) ? 1 : 0;
+      aom_write_bit(w, bit);
+    }
+#endif
+  }
+}
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 void av1_get_nz_map_contexts_skip_c(const uint8_t *const levels,
                                     const int16_t *const scan,
+#if CONFIG_ATC_DCTX_ALIGNED
+                                    const uint16_t bob,
+#endif  // CONFIG_ATC_DCTX_ALIGNED
                                     const uint16_t eob, const TX_SIZE tx_size,
                                     int8_t *const coeff_contexts) {
   const int bwl = get_txb_bwl(tx_size);
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int height = get_txb_high(tx_size);
+  for (int i = bob; i < eob; ++i) {
+    const int pos = scan[i];
+    coeff_contexts[pos] =
+        get_nz_map_ctx_skip(levels, height, i, bob == i, pos, bwl);
+#else
   for (int i = 0; i < eob; ++i) {
     const int pos = scan[i];
     coeff_contexts[pos] = get_nz_map_ctx_skip(levels, pos, bwl);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   }
 }
 
@@ -545,6 +704,10 @@
 
   const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset;
   const uint16_t eob = eob_txb[block];
+#if CONFIG_ATC_DCTX_ALIGNED
+  const uint16_t *bob_txb = cb_coef_buff->bobs[plane] + txb_offset;
+  const uint16_t bob_code = bob_txb[block];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   const uint8_t *entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset;
 
 #if CONFIG_CONTEXT_DERIVATION
@@ -563,6 +726,12 @@
   const TX_TYPE tx_type =
       av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size,
                       cm->features.reduced_tx_set_used);
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int is_inter = is_inter_block(xd->mi[0], xd->tree_type);
+  const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] &&
+                      plane == PLANE_TYPE_Y) ||
+                     use_inter_fsc(cm, plane, tx_type, is_inter);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
 #if CONFIG_CROSS_CHROMA_TX && CCTX_C2_DROPPED
   if (plane == AOM_PLANE_V && is_cctx_allowed(cm, xd)) {
@@ -585,17 +754,32 @@
   aom_write_symbol(w, eob == 0, ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 2);
 #endif  // CONFIG_CONTEXT_DERIVATION
 
-#if CONFIG_CROSS_CHROMA_TX
+#if CONFIG_CROSS_CHROMA_TX && !CONFIG_ATC_DCTX_ALIGNED
   if (plane == AOM_PLANE_U && is_cctx_allowed(cm, xd)) {
     CctxType cctx_type = av1_get_cctx_type(xd, blk_row, blk_col);
     if (eob > 0) av1_write_cctx_type(cm, xd, cctx_type, tx_size, w);
   }
-#endif  // CONFIG_CROSS_CHROMA_TX
+#endif  // CONFIG_CROSS_CHROMA_TX && !CONFIG_ATC_DCTX_ALIGNED
 
   if (eob == 0) return 0;
+#if CONFIG_ATC_DCTX_ALIGNED
+  int esc_eob = is_fsc ? bob_code : eob;
+  const int dc_skip = (eob == 1) && !is_inter;
+  code_eob(x, w, plane, tx_size, esc_eob);
+  av1_write_tx_type(cm, xd, tx_type, tx_size, w, plane, esc_eob, dc_skip);
+#if CONFIG_CROSS_CHROMA_TX
+  if (plane == AOM_PLANE_U && is_cctx_allowed(cm, xd)) {
+    const int skip_cctx = is_inter ? 0 : (eob == 1);
+    CctxType cctx_type = av1_get_cctx_type(xd, blk_row, blk_col);
+    if (eob > 0 && !skip_cctx)
+      av1_write_cctx_type(cm, xd, cctx_type, tx_size, w);
+  }
+#endif  // CONFIG_CROSS_CHROMA_TX
+#else
   if (plane == 0) {  // Only y plane's tx_type is transmitted
     av1_write_tx_type(cm, xd, tx_type, tx_size, w);
   }
+#endif
   return 1;
 }
 
@@ -625,15 +809,39 @@
   const int bwl = get_txb_bwl(tx_size);
 
   DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int txb_offset =
+      x->mbmi_ext_frame->cb_offset[plane] / (TX_SIZE_W_MIN * TX_SIZE_H_MIN);
+  const uint16_t *bob_txb = cb_coef_buff->bobs[plane] + txb_offset;
+  const int bob_code = bob_txb[block];
+  int bob = av1_get_max_eob(tx_size) - bob_code;
+  av1_get_nz_map_contexts_skip_c(levels, scan, bob, eob, tx_size,
+                                 coeff_contexts);
+#else
   av1_get_nz_map_contexts_skip(levels, scan, eob, tx_size, coeff_contexts);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  for (int c = bob; c < eob; ++c) {
+#else
   for (int c = 0; c < eob; ++c) {
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     const int pos = scan[c];
     const int coeff_ctx = coeff_contexts[pos];
     const tran_low_t v = tcoeff[pos];
     const tran_low_t level = abs(v);
+#if CONFIG_ATC_DCTX_ALIGNED
+    if (c == bob) {
+      aom_write_symbol(w, AOMMIN(level, 3) - 1,
+                       ec_ctx->coeff_base_bob_cdf[coeff_ctx], 3);
+    } else {
+      aom_write_symbol(w, AOMMIN(level, 3),
+                       ec_ctx->coeff_base_cdf_idtx[coeff_ctx], 4);
+    }
+#else
     aom_write_symbol(w, AOMMIN(level, 3),
                      ec_ctx->coeff_base_cdf_idtx[coeff_ctx], 4);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     if (level > NUM_BASE_LEVELS) {
       // level is above 1.
       const int base_range = level - 1 - NUM_BASE_LEVELS;
@@ -705,8 +913,11 @@
   const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
   FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
 #if CONFIG_PC_WIENER
-  assert((eob == 0) ==
-         av1_get_txk_skip(cm, xd->mi_row, xd->mi_col, plane, blk_row, blk_col));
+  if (!is_global_intrabc_allowed(cm) && !cm->features.coded_lossless) {
+    // Assert only when LR is enabled.
+    assert((eob == 0) == av1_get_txk_skip(cm, xd->mi_row, xd->mi_col, plane,
+                                          blk_row, blk_col));
+  }
 #endif  // CONFIG_PC_WIENER
   if (eob == 0) return;
 
@@ -719,7 +930,7 @@
   fprintf(cm->fEncCoeffLog, "\nblk_row=%d,blk_col=%d,plane=%d,tx_size=%d",
           blk_row, blk_col, plane, tx_size);
 #endif
-
+#if !CONFIG_ATC_DCTX_ALIGNED
   int eob_extra;
   const int eob_pt = get_eob_pos_token(eob, &eob_extra);
   const int eob_multi_size = txsize_log2_minus4[tx_size];
@@ -775,10 +986,17 @@
     }
 #endif
   }
+#else
+  const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)];
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
 
   // write sec_tx_type here
   // Only y plane's sec_tx_type is transmitted
+#if CONFIG_ATC_DCTX_ALIGNED
+  if ((plane == AOM_PLANE_Y) && (cm->seq_params.enable_ist) && eob != 1) {
+#else
   if ((plane == AOM_PLANE_Y) && (cm->seq_params.enable_ist)) {
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     av1_write_sec_tx_type(cm, xd, tx_type, tx_size, eob, w);
   }
 
@@ -800,10 +1018,10 @@
   const int16_t *const scan = scan_order->scan;
   DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
   av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                           ,
                           plane
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   );
 
   const int bwl = get_txb_bwl(tx_size);
@@ -822,7 +1040,7 @@
     const tran_low_t v = tcoeff[pos];
     const tran_low_t level = abs(v);
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     if (c == eob - 1) {
       const int row = pos >> bwl;
       const int col = pos - (row << bwl);
@@ -860,9 +1078,9 @@
       aom_write_symbol(w, AOMMIN(level, 3),
                        ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx],
                        4);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     const int row = pos >> bwl;
     const int col = pos - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
@@ -903,7 +1121,7 @@
         if (k < BR_CDF_SIZE - 1) break;
       }
     }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   }
 
 #if CONFIG_PAR_HIDING
@@ -929,7 +1147,7 @@
     const tran_low_t v = tcoeff[pos];
     const tran_low_t level = abs(v);
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     if (c == eob - 1) {
       const int row = pos >> bwl;
       const int col = pos - (row << bwl);
@@ -967,9 +1185,9 @@
       aom_write_symbol(w, AOMMIN(level, 3),
                        ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx],
                        4);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     const int row = pos >> bwl;
     const int col = pos - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
@@ -1010,7 +1228,7 @@
         if (k < BR_CDF_SIZE - 1) break;
       }
     }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   }
 #endif  // CONFIG_PAR_HIDING
 
@@ -1059,7 +1277,7 @@
 #endif  // CONFIG_CONTEXT_DERIVATION
       }
 #if CONFIG_PAR_HIDING
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       if (is_hidden && c == 0) {
         int q_index = level >> 1;
         if (q_index > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
@@ -1086,9 +1304,9 @@
         if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
           write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #else
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       const int pos = scan[c];
       const int row = pos >> bwl;
       const int col = pos - (row << bwl);
@@ -1103,7 +1321,7 @@
 #else
       if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS)
         write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #endif  // CONFIG_PAR_HIDING
     }
   }
@@ -1202,8 +1420,17 @@
 int get_cctx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
                        const MACROBLOCKD *xd, int plane, TX_SIZE tx_size,
                        int block, CctxType cctx_type) {
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int skip_cctx = is_inter_block(xd->mi[0], xd->tree_type)
+                            ? 0
+                            : (x->plane[plane].eobs[block] == 1);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   if (plane == AOM_PLANE_U && x->plane[plane].eobs[block] &&
-      is_cctx_allowed(cm, xd)) {
+      is_cctx_allowed(cm, xd)
+#if CONFIG_ATC_DCTX_ALIGNED
+      && !skip_cctx
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+  ) {
     const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
     int above_cctx, left_cctx;
 #if CONFIG_EXT_RECUR_PARTITIONS
@@ -1222,7 +1449,12 @@
 // TODO(angiebird): use this function whenever it's possible
 static int get_tx_type_cost(const MACROBLOCK *x, const MACROBLOCKD *xd,
                             int plane, TX_SIZE tx_size, TX_TYPE tx_type,
-                            int reduced_tx_set_used, int eob) {
+                            int reduced_tx_set_used, int eob
+#if CONFIG_ATC_DCTX_ALIGNED
+                            ,
+                            int bob_code, int is_fsc
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+) {
   if (plane > 0) return 0;
 
   const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
@@ -1239,8 +1471,18 @@
         get_ext_tx_set(tx_size, is_inter, reduced_tx_set_used);
     if (is_inter) {
       if (ext_tx_set > 0)
+#if CONFIG_ATC_DCTX_ALIGNED
+      {
+        const int esc_eob = is_fsc ? bob_code : eob;
+        const int eob_tx_ctx =
+            get_lp2tx_ctx(tx_size, get_txb_bwl(tx_size), esc_eob);
+        return x->mode_costs.inter_tx_type_costs[ext_tx_set][eob_tx_ctx]
+                                                [square_tx_size][tx_type];
+      }
+#else
         return x->mode_costs
             .inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     } else {
       if (ext_tx_set > 0) {
         PREDICTION_MODE intra_dir;
@@ -1250,6 +1492,20 @@
         else
           intra_dir = mbmi->mode;
         TX_TYPE primary_tx_type = get_primary_tx_type(tx_type);
+#if CONFIG_ATC_DCTX_ALIGNED
+        int tx_type_cost = 0;
+        if (eob != 1) {
+          tx_type_cost =
+              x->mode_costs.intra_tx_type_costs[ext_tx_set][square_tx_size]
+                                               [intra_dir][primary_tx_type];
+        }
+        if (block_signals_sec_tx_type(xd, tx_size, tx_type, eob) &&
+            xd->enable_ist) {
+          tx_type_cost +=
+              x->mode_costs.stx_flag_cost[square_tx_size]
+                                         [get_secondary_tx_type(tx_type)];
+        }
+#else
         int tx_type_cost =
             x->mode_costs.intra_tx_type_costs[ext_tx_set][square_tx_size]
                                              [intra_dir][primary_tx_type];
@@ -1259,6 +1515,7 @@
               x->mode_costs.stx_flag_cost[square_tx_size]
                                          [get_secondary_tx_type(tx_type)];
         }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
         return tx_type_cost;
       }
     }
@@ -1304,9 +1561,9 @@
 }
 
 static AOM_FORCE_INLINE int warehouse_efficients_txb_skip(
-#if CONFIG_CROSS_CHROMA_TX
+#if CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED
     const AV1_COMMON *cm,
-#endif  // CONFIG_CROSS_CHROMA_TX
+#endif  // CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED
     const MACROBLOCK *x, const int plane, const int block,
     const TX_SIZE tx_size, const TXB_CTX *const txb_ctx,
     const struct macroblock_plane *p, const int eob,
@@ -1329,22 +1586,61 @@
   int8_t signs_buf[TX_PAD_2D];
   int8_t *const signs = set_signs(signs_buf, width);
   av1_txb_init_levels_signs(qcoeff, width, height, levels_buf, signs_buf);
-  cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used,
-                           eob);
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int bob_code = p->bobs[block];
+  const int bob = av1_get_max_eob(tx_size) - bob_code;
+  const int is_inter = is_inter_block(xd->mi[0], xd->tree_type);
+  const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] &&
+                      plane == PLANE_TYPE_Y) ||
+                     use_inter_fsc(cm, plane, tx_type, is_inter);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+  cost +=
+      get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used, eob
+#if CONFIG_ATC_DCTX_ALIGNED
+                       ,
+                       bob_code, is_fsc
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+      );
+
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int eob_multi_size = txsize_log2_minus4[tx_size];
+  const LV_MAP_EOB_COST *const eob_costs =
+      &x->coeff_costs.eob_costs[eob_multi_size][PLANE_TYPE_Y];
+  cost += get_eob_cost(bob_code, eob_costs, coeff_costs);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 #if CONFIG_CROSS_CHROMA_TX
   cost += get_cctx_type_cost(cm, x, xd, plane, tx_size, block, cctx_type);
 #endif  // CONFIG_CROSS_CHROMA_TX
   DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
+#if CONFIG_ATC_DCTX_ALIGNED
+  av1_get_nz_map_contexts_skip_c(levels, scan, bob, eob, tx_size,
+                                 coeff_contexts);
+#else
   av1_get_nz_map_contexts_skip(levels, scan, eob, tx_size, coeff_contexts);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] =
       coeff_costs->lps_cost_skip;
   const int(*base_cost)[8] = coeff_costs->idtx_base_cost;
+
+#if CONFIG_ATC_DCTX_ALIGNED
+  for (int c = bob; c < eob; c++) {
+#else
   for (int c = 0; c < eob; c++) {
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     const int pos = scan[c];
     const int coeff_ctx = coeff_contexts[pos];
     const tran_low_t v = qcoeff[pos];
     const int level = abs(v);
+#if CONFIG_ATC_DCTX_ALIGNED
+    if (c == bob) {
+      cost += coeff_costs->base_bob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
+    } else {
+      cost += base_cost[coeff_ctx][AOMMIN(level, 3)];
+    }
+#else
     cost += base_cost[coeff_ctx][AOMMIN(level, 3)];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     if (v) {
       if (level > NUM_BASE_LEVELS) {
         const int ctx = get_br_ctx_skip(levels, pos, bwl);
@@ -1352,7 +1648,11 @@
       }
     }
   }
+#if CONFIG_ATC_DCTX_ALIGNED
+  for (int c = eob - 1; c >= bob; --c) {
+#else
   for (int c = eob - 1; c >= 0; --c) {
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     const int pos = scan[c];
     const tran_low_t v = qcoeff[pos];
     const tran_low_t level = abs(v);
@@ -1366,9 +1666,9 @@
 }
 
 static AOM_FORCE_INLINE int warehouse_efficients_txb(
-#if CONFIG_CROSS_CHROMA_TX
+#if CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED
     const AV1_COMMON *cm,
-#endif  // CONFIG_CROSS_CHROMA_TX
+#endif  // CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED
     const MACROBLOCK *x, const int plane, const int block,
     const TX_SIZE tx_size, const TXB_CTX *const txb_ctx,
     const struct macroblock_plane *p, const int eob,
@@ -1417,27 +1717,43 @@
 
   av1_txb_init_levels(qcoeff, width, height, levels);
 
-  cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used,
-                           eob);
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int bob_code = p->bobs[block];
+  const int is_inter = is_inter_block(xd->mi[0], xd->tree_type);
+  const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] &&
+                      plane == PLANE_TYPE_Y) ||
+                     use_inter_fsc(cm, plane, tx_type, is_inter);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
+  cost +=
+      get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used, eob
+#if CONFIG_ATC_DCTX_ALIGNED
+                       ,
+                       bob_code, is_fsc
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+      );
 #if CONFIG_CROSS_CHROMA_TX
   cost += get_cctx_type_cost(cm, x, xd, plane, tx_size, block, cctx_type);
 #endif  // CONFIG_CROSS_CHROMA_TX
-
+#if CONFIG_ATC_DCTX_ALIGNED
+  cost += get_eob_cost(eob, eob_costs, coeff_costs);
+#else
   cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
   av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                           ,
                           plane
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   );
 
   const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] =
       coeff_costs->lps_cost;
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   const int(*lps_lf_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] =
       coeff_costs->lps_lf_cost;
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   int c = eob - 1;
   {
     const int pos = scan[c];
@@ -1445,7 +1761,7 @@
     const int sign = AOMSIGN(v);
     const int level = (v ^ sign) - sign;
     const int coeff_ctx = coeff_contexts[pos];
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     const int row = pos >> bwl;
     const int col = pos - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
@@ -1458,11 +1774,11 @@
     }
 #else
     cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
     if (v) {
       // sign bit cost
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       if (limits) {
         if (level > LF_NUM_BASE_LEVELS) {
           const int ctx = get_br_ctx_lf_eob(pos, tx_class);
@@ -1479,7 +1795,7 @@
         const int ctx = get_br_ctx_eob(pos, bwl, tx_class);
         cost += get_br_cost(level, lps_cost[ctx]);
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
       if (c) {
 #if CONFIG_CONTEXT_DERIVATION
         if (plane == AOM_PLANE_V) {
@@ -1508,16 +1824,16 @@
       }
     }
   }
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   const int(*base_lf_cost)[LF_BASE_SYMBOLS * 2] = coeff_costs->base_lf_cost;
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   const int(*base_cost)[8] = coeff_costs->base_cost;
   for (c = eob - 2; c >= 1; --c) {
     const int pos = scan[c];
     const int coeff_ctx = coeff_contexts[pos];
     const tran_low_t v = qcoeff[pos];
     const int level = abs(v);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     const int row = pos >> bwl;
     const int col = pos - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
@@ -1528,7 +1844,7 @@
     }
 #else
     cost += base_cost[coeff_ctx][AOMMIN(level, 3)];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     if (v) {
       // sign bit cost
 #if CONFIG_CONTEXT_DERIVATION
@@ -1542,7 +1858,7 @@
 #else
       cost += av1_cost_literal(1);
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       if (limits) {
         if (level > LF_NUM_BASE_LEVELS) {
           const int ctx = get_br_lf_ctx(levels, pos, bwl, tx_class);
@@ -1559,7 +1875,7 @@
         const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
         cost += get_br_cost(level, lps_cost[ctx]);
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
   }
   // c == 0 after previous loop
@@ -1595,7 +1911,7 @@
     const int coeff_ctx = coeff_contexts[pos];
     const int sign = AOMSIGN(v);
     const int level = (v ^ sign) - sign;
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     const int row = pos >> bwl;
     const int col = pos - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
@@ -1606,7 +1922,7 @@
     }
 #else
     cost += base_cost[coeff_ctx][AOMMIN(level, 3)];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 
     if (v) {
       // sign bit cost
@@ -1622,7 +1938,7 @@
 #else
       cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01];
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       if (limits) {
         if (level > LF_NUM_BASE_LEVELS) {
           const int ctx = get_br_lf_ctx(levels, pos, bwl, tx_class);
@@ -1639,7 +1955,7 @@
         const int ctx = get_br_ctx(levels, pos, bwl, tx_class);
         cost += get_br_cost(level, lps_cost[ctx]);
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
   }
   return cost;
@@ -1653,7 +1969,10 @@
 #if CONFIG_CROSS_CHROMA_TX
     const CctxType cctx_type,
 #endif  // CONFIG_CROSS_CHROMA_TX
-    const TX_CLASS tx_class, int reduced_tx_set_used) {
+#if !CONFIG_ATC_DCTX_ALIGNED
+    const TX_CLASS tx_class,
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
+    int reduced_tx_set_used) {
 #if CONFIG_CONTEXT_DERIVATION
   int txb_skip_ctx = txb_ctx->txb_skip_ctx;
   if (plane == AOM_PLANE_V) {
@@ -1678,8 +1997,21 @@
   int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
 #endif  // CONFIG_CONTEXT_DERIVATION
 
-  cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used,
-                           eob);
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int bob_code = x->plane[plane].bobs[block];
+  const int is_inter = is_inter_block(xd->mi[0], xd->tree_type);
+  const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] &&
+                      plane == PLANE_TYPE_Y) ||
+                     use_inter_fsc(cm, plane, tx_type, is_inter);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
+  cost +=
+      get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used, eob
+#if CONFIG_ATC_DCTX_ALIGNED
+                       ,
+                       bob_code, is_fsc
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+      );
 #if CONFIG_CROSS_CHROMA_TX
   cost += get_cctx_type_cost(cm, x, xd, plane, tx_size, block, cctx_type);
 #endif  // CONFIG_CROSS_CHROMA_TX
@@ -1691,7 +2023,11 @@
     cost +=
         av1_cost_coeffs_txb_skip_estimate(x, plane, block, tx_size, tx_type);
   } else {
+#if CONFIG_ATC_DCTX_ALIGNED
+    cost += get_eob_cost(eob, eob_costs, coeff_costs);
+#else
     cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     cost += av1_cost_coeffs_txb_estimate(x, plane, block, tx_size, tx_type);
   }
   return cost;
@@ -1816,9 +2152,9 @@
        get_primary_tx_type(tx_type) == IDTX && plane == PLANE_TYPE_Y) ||
       use_inter_fsc(cm, plane, tx_type, is_inter_block(mbmi, xd->tree_type))) {
     return warehouse_efficients_txb_skip(
-#if CONFIG_CROSS_CHROMA_TX
+#if CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED
         cm,
-#endif  // CONFIG_CROSS_CHROMA_TX
+#endif  // CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED
         x, plane, block, tx_size, txb_ctx, p, eob, coeff_costs, xd, tx_type,
 #if CONFIG_CROSS_CHROMA_TX
         cctx_type,
@@ -1826,9 +2162,9 @@
         reduced_tx_set_used);
   } else {
     return warehouse_efficients_txb(
-#if CONFIG_CROSS_CHROMA_TX
+#if CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED
         cm,
-#endif  // CONFIG_CROSS_CHROMA_TX
+#endif  // CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED
         x, plane, block, tx_size, txb_ctx, p, eob, plane_type, coeff_costs, xd,
         tx_type,
 #if CONFIG_CROSS_CHROMA_TX
@@ -1892,7 +2228,9 @@
     return skip_cost;
   }
 
+#if !CONFIG_ATC_DCTX_ALIGNED
   const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)];
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
 
   return warehouse_efficients_txb_laplacian(cm, x, plane, block, tx_size,
                                             txb_ctx, eob, plane_type,
@@ -1900,20 +2238,23 @@
 #if CONFIG_CROSS_CHROMA_TX
                                             cctx_type,
 #endif  // CONFIG_CROSS_CHROMA_TX
-                                            tx_class, reduced_tx_set_used);
+#if !CONFIG_ATC_DCTX_ALIGNED
+                                            tx_class,
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
+                                            reduced_tx_set_used);
 }
 
 static AOM_FORCE_INLINE int get_two_coeff_cost_simple(
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     int plane,
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     int ci, tran_low_t abs_qc, int coeff_ctx,
     const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class,
     const uint8_t *levels, int *cost_low) {
   // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
   // and not the last (scan_idx != eob - 1)
   assert(ci > 0);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   const int row = ci >> bwl;
   const int col = ci - (row << bwl);
   int cost = 0;
@@ -1926,9 +2267,9 @@
   }
 #else
   int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   int diff = 0;
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   if (limits) {
     if (abs_qc <= (LF_BASE_SYMBOLS - 1)) {
       if (abs_qc == 0) {
@@ -1968,10 +2309,10 @@
   }
 #else
   if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   if (abs_qc) {
     cost += av1_cost_literal(1);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     if (limits) {
       if (abs_qc > LF_NUM_BASE_LEVELS) {
         const int br_ctx = get_br_lf_ctx(levels, ci, bwl, tx_class);
@@ -1997,7 +2338,7 @@
                                     &brcost_diff);
       diff += brcost_diff;
     }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   }
   *cost_low = cost - diff;
 
@@ -2012,13 +2353,13 @@
                                      ,
                                      int32_t *tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                                      ,
                                      int plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
 ) {
   int cost = 0;
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   const int row = ci >> bwl;
   const int col = ci - (row << bwl);
   int limits = get_lf_limits(row, col, tx_class, plane);
@@ -2031,7 +2372,7 @@
   }
 #else
   cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   if (abs_qc != 0) {
     if (ci == 0) {
 #if CONFIG_CONTEXT_DERIVATION
@@ -2052,7 +2393,7 @@
       cost += av1_cost_literal(1);
 #endif  // CONFIG_CONTEXT_DERIVATION
     }
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     if (limits) {
       if (abs_qc > LF_NUM_BASE_LEVELS) {
         int br_ctx;
@@ -2071,7 +2412,7 @@
       br_ctx = get_br_ctx_eob(ci, bwl, tx_class);
       cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
     }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   }
   return cost;
 }
@@ -2086,14 +2427,14 @@
                                          ,
                                          int32_t *tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                                          ,
                                          int plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
 ) {
   int cost = 0;
   if (is_last) {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     const int row = ci >> bwl;
     const int col = ci - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
@@ -2106,9 +2447,9 @@
     }
 #else
     cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   } else {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     const int row = ci >> bwl;
     const int col = ci - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
@@ -2121,7 +2462,7 @@
     }
 #else
     cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   }
   if (abs_qc != 0) {
     if (ci == 0) {
@@ -2143,7 +2484,7 @@
       cost += av1_cost_literal(1);
 #endif  // CONFIG_CONTEXT_DERIVATION
     }
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     const int row = ci >> bwl;
     const int col = ci - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
@@ -2175,7 +2516,7 @@
         br_ctx = get_br_ctx(levels, ci, bwl, tx_class);
       cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]);
     }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   }
   return cost;
 }
@@ -2198,9 +2539,9 @@
 
 static INLINE void update_coeff_general(
     int *accu_rate, int64_t *accu_dist, int si, int eob,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
     TX_SIZE tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
     TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift,
     int dc_sign_ctx, const int32_t *dequant, const int16_t *scan,
     const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
@@ -2210,10 +2551,10 @@
     ,
     int32_t *tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
     ,
     int plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
 #if CONFIG_PAR_HIDING
     ,
     coeff_info *coef_info, bool enable_parity_hiding
@@ -2225,17 +2566,17 @@
   const int is_last = si == (eob - 1);
   const int coeff_ctx =
       get_lower_levels_ctx_general(is_last, si, bwl, height, levels, ci,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                                    tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
                                    tx_class
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                                    ,
                                    plane
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
       );
   if (qc == 0) {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     const int row = ci >> bwl;
     const int col = ci - (row << bwl);
     int limits = get_lf_limits(row, col, tx_class, plane);
@@ -2246,7 +2587,7 @@
     }
 #else
     *accu_rate += txb_costs->base_cost[coeff_ctx][0];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   } else {
     const int sign = (qc < 0) ? 1 : 0;
     const tran_low_t abs_qc = abs(qc);
@@ -2261,10 +2602,10 @@
                                ,
                                tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                                ,
                                plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
         );
     const int64_t rd = RDCOST(rdmult, rate, dist);
 
@@ -2275,7 +2616,7 @@
     if (abs_qc == 1) {
       abs_qc_low = qc_low = dqc_low = 0;
       dist_low = dist0;
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       const int row = ci >> bwl;
       const int col = ci - (row << bwl);
       int limits = get_lf_limits(row, col, tx_class, plane);
@@ -2286,7 +2627,7 @@
       }
 #else
       rate_low = txb_costs->base_cost[coeff_ctx][0];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     } else {
       get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
       abs_qc_low = abs_qc - 1;
@@ -2298,10 +2639,10 @@
                                  ,
                                  tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                                  ,
                                  plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
           );
     }
 
@@ -2331,9 +2672,9 @@
 
 static AOM_FORCE_INLINE void update_coeff_simple(
     int *accu_rate, int si, int eob,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
     TX_SIZE tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
     TX_CLASS tx_class, int bwl, int64_t rdmult, int shift,
     const int32_t *dequant, const int16_t *scan,
     const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff,
@@ -2343,12 +2684,12 @@
     ,
     coeff_info *coef_info, bool enable_parity_hiding
 #endif  // CONFIG_PAR_HIDING
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     ,
     int plane) {
 #else
 ) {
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   const int dqv = get_dqv(dequant, scan[si], iqmatrix);
   (void)eob;
   // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0)
@@ -2357,7 +2698,7 @@
   assert(si > 0);
   const int ci = scan[si];
   const tran_low_t qc = qcoeff[ci];
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   const int row = ci >> bwl;
   const int col = ci - (row << bwl);
 
@@ -2371,9 +2712,9 @@
 #else
   const int coeff_ctx =
       get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   if (qc == 0) {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     if (limits) {
       *accu_rate += txb_costs->base_lf_cost[coeff_ctx][0];
     } else {
@@ -2381,16 +2722,16 @@
     }
 #else
     *accu_rate += txb_costs->base_cost[coeff_ctx][0];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   } else {
     const tran_low_t abs_qc = abs(qc);
     const tran_low_t abs_tqc = abs(tcoeff[ci]);
     const tran_low_t abs_dqc = abs(dqcoeff[ci]);
     int rate_low = 0;
     const int rate = get_two_coeff_cost_simple(
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
         plane,
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
         ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low);
     if (abs_dqc < abs_tqc) {
       *accu_rate += rate;
@@ -2440,9 +2781,9 @@
 static AOM_FORCE_INLINE void update_coeff_eob(
     int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci,
     int si,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
     TX_SIZE tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
     TX_CLASS tx_class, int bwl, int height, int dc_sign_ctx, int64_t rdmult,
     int shift, const int32_t *dequant, const int16_t *scan,
     const LV_MAP_EOB_COST *txb_eob_costs, const LV_MAP_COEFF_COST *txb_costs,
@@ -2452,10 +2793,10 @@
     ,
     int32_t *tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
     ,
     int plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
 #if CONFIG_PAR_HIDING
     ,
     coeff_info *coef_info, bool enable_parity_hiding
@@ -2465,7 +2806,7 @@
   assert(si != *eob - 1);
   const int ci = scan[si];
   const tran_low_t qc = qcoeff[ci];
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   const int row = ci >> bwl;
   const int col = ci - (row << bwl);
   int limits = get_lf_limits(row, col, tx_class, plane);
@@ -2478,9 +2819,9 @@
 #else
   const int coeff_ctx =
       get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   if (qc == 0) {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
     if (limits) {
       *accu_rate += txb_costs->base_lf_cost[coeff_ctx][0];
     } else {
@@ -2488,7 +2829,7 @@
     }
 #else
     *accu_rate += txb_costs->base_cost[coeff_ctx][0];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   } else {
 #if CONFIG_PAR_HIDING
     int64_t rd_eob_low = INT64_MAX >> 1;
@@ -2508,10 +2849,10 @@
                                ,
                                tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                                ,
                                plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
         );
     int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist);
 
@@ -2523,7 +2864,7 @@
       abs_qc_low = 0;
       dqc_low = qc_low = 0;
       dist_low = 0;
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       if (limits) {
         rate_low = txb_costs->base_lf_cost[coeff_ctx][0];
       } else {
@@ -2531,7 +2872,7 @@
       }
 #else
       rate_low = txb_costs->base_cost[coeff_ctx][0];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
       rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist);
     } else {
       get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low);
@@ -2544,10 +2885,10 @@
                                  ,
                                  tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                                  ,
                                  plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
           );
       rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low);
     }
@@ -2559,7 +2900,11 @@
     const int new_eob = si + 1;
     const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bwl, height, si);
     const int new_eob_cost =
+#if CONFIG_ATC_DCTX_ALIGNED
+        get_eob_cost(new_eob, txb_eob_costs, txb_costs);
+#else
         get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     int rate_coeff_eob =
         new_eob_cost + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob,
                                           dc_sign_ctx, txb_costs, bwl, tx_class
@@ -2567,10 +2912,10 @@
                                           ,
                                           tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                                           ,
                                           plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                        );
     int64_t dist_new_eob = dist;
     int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob);
@@ -2587,10 +2932,10 @@
                                             ,
                                             tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                                             ,
                                             plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                          );
       const int64_t dist_new_eob_low = dist_low;
       const int64_t rd_new_eob_low =
@@ -2682,14 +3027,14 @@
                                       tran_low_t level, int bwl, int pos,
                                       uint8_t *levels, int dc_sign_ctx,
                                       TX_CLASS tx_class, int *rate
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                                       ,
                                       TX_SIZE tx_size
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 ) {
   tran_low_t abslevel = abs(level), q_index = abslevel >> 1;
   int sign = level < 0;
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   const int row = pos >> bwl;
   const int col = pos - (row << bwl);
   int limits = get_lf_limits(row, col, tx_class, 0);
@@ -2702,17 +3047,17 @@
 #else
   const int coeff_ctx =
       get_lower_levels_ctx(levels, pos, bwl, tx_size, tx_class);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
   *rate = get_coeff_cost_general(0, pos, abslevel, level < 0, coeff_ctx,
                                  dc_sign_ctx, txb_costs, bwl, tx_class, levels
 #if CONFIG_CONTEXT_DERIVATION
                                  ,
                                  0
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                                  ,
                                  0
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
   );
 
   const int base_ctx_ph = get_base_ctx_ph(levels, pos, bwl, tx_class);
@@ -2898,10 +3243,10 @@
     const qm_val_t *iqmatrix, int dc_sign_ctx, const TX_CLASS tx_class,
     tran_low_t *qcoeff, tran_low_t *dqcoeff, const tran_low_t *tcoeff,
     coeff_info *coef_info, int *accu_rate
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
     ,
     TX_SIZE tx_size
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 ) {
   int nzsbb = 0, sum_abs1 = 0;
   for (int scan_idx = eob - 1; scan_idx > 0; --scan_idx) {
@@ -2922,10 +3267,10 @@
   const int ratesaving =
       rate_save(txb_costs, txb_costs_ph, qcoeff[hidepos], bwl, hidepos, levels,
                 dc_sign_ctx, tx_class, &rate_cur
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                 ,
                 tx_size
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
       );
 
   if (!needtune && nzsbb >= PHTHRESH) {
@@ -3009,6 +3354,12 @@
   const int height = get_txb_high(tx_size);
   assert(width == (1 << bwl));
   const int is_inter = is_inter_block(mbmi, xd->tree_type);
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int bob_code = p->bobs[block];
+  const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] &&
+                      plane == PLANE_TYPE_Y) ||
+                     use_inter_fsc(&cpi->common, plane, tx_type, is_inter);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   const LV_MAP_COEFF_COST *txb_costs =
       &coeff_costs->coeff_costs[txs_ctx][plane_type];
   const int eob_multi_size = txsize_log2_minus4[tx_size];
@@ -3068,7 +3419,11 @@
   const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0];
   const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1];
 #endif  // CONFIG_CONTEXT_DERIVATION
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs);
+#else
   const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   int accu_rate = eob_cost;
   int64_t accu_dist = 0;
   int si = eob - 1;
@@ -3081,9 +3436,9 @@
   int nz_ci[3] = { ci, 0, 0 };
   if (abs_qc >= 2) {
     update_coeff_general(&accu_rate, &accu_dist, si, eob,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                          tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
                          tx_class, bwl, height, rdmult, shift,
                          txb_ctx->dc_sign_ctx, dequant, scan, txb_costs, tcoeff,
                          qcoeff, dqcoeff, levels, iqmatrix
@@ -3091,10 +3446,10 @@
                          ,
                          xd->tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                          ,
                          plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
 #if CONFIG_PAR_HIDING
                          ,
                          coef_info, enable_parity_hiding
@@ -3111,10 +3466,10 @@
                            ,
                            xd->tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                            ,
                            plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
         );
     const tran_low_t tqc = tcoeff[ci];
     const tran_low_t dqc = dqcoeff[ci];
@@ -3126,9 +3481,9 @@
 #if CONFIG_PAR_HIDING
   for (; si >= 0 && nz_num <= max_nz_num; --si) {
     update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                      tx_size,
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
                      tx_class, bwl, height, txb_ctx->dc_sign_ctx, rdmult, shift,
                      dequant, scan, txb_eob_costs, txb_costs, tcoeff, qcoeff,
                      dqcoeff, levels, sharpness, iqmatrix
@@ -3136,16 +3491,16 @@
                      ,
                      xd->tmp_sign
 #endif
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                      ,
                      plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                      ,
                      coef_info, enable_parity_hiding);
   }
 #else
 #if CONFIG_CONTEXT_DERIVATION
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 #define UPDATE_COEFF_EOB_CASE(tx_class_literal)                                \
   case tx_class_literal:                                                       \
     for (; si >= 0 && nz_num <= max_nz_num; --si) {                            \
@@ -3167,7 +3522,7 @@
                        levels, sharpness, iqmatrix, xd->tmp_sign, plane);  \
     }                                                                      \
     break;
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #else
 #define UPDATE_COEFF_EOB_CASE(tx_class_literal)                            \
   case tx_class_literal:                                                   \
@@ -3197,20 +3552,20 @@
 #if CONFIG_PAR_HIDING
   for (; si >= 1; --si) {
     update_coeff_simple(&accu_rate, si, eob,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                         tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
                         tx_class, bwl, rdmult, shift, dequant, scan, txb_costs,
                         tcoeff, qcoeff, dqcoeff, levels, iqmatrix, coef_info,
                         enable_parity_hiding
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                         ,
                         plane
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     );
   }
 #else
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
 #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal)                            \
   case tx_class_literal:                                                      \
     for (; si >= 1; --si) {                                                   \
@@ -3242,7 +3597,7 @@
 #undef UPDATE_COEFF_SIMPLE_CASE
     default: assert(false);
   }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #endif  // CONFIG_PAR_HIDING
 
   // DC position
@@ -3250,9 +3605,9 @@
     // no need to update accu_dist because it's not used after this point
     int64_t dummy_dist = 0;
     update_coeff_general(&accu_rate, &dummy_dist, si, eob,
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                          tx_size,
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
                          tx_class, bwl, height, rdmult, shift,
                          txb_ctx->dc_sign_ctx, dequant, scan, txb_costs, tcoeff,
                          qcoeff, dqcoeff, levels, iqmatrix
@@ -3260,10 +3615,10 @@
                          ,
                          xd->tmp_sign
 #endif  // CONFIG_CONTEXT_DERIVATION
-#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
                          ,
                          plane
-#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING
+#endif  // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC
 #if CONFIG_PAR_HIDING
                          ,
                          coef_info, enable_parity_hiding
@@ -3276,16 +3631,30 @@
     parity_hide_tb(eob, scan, levels, bwl, rdmult, shift, txb_costs,
                    txb_costs_ph, dequant, iqmatrix, txb_ctx->dc_sign_ctx,
                    tx_class, qcoeff, dqcoeff, tcoeff, coef_info, &accu_rate
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
                    ,
                    tx_size
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     );
   }
 
   aom_free(coef_info);
 #endif  // CONFIG_PAR_HIDING
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  set_bob(x, plane, block, tx_size, tx_type);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
+#if CONFIG_ATC_DCTX_ALIGNED
+  if (eob == 0) {
+    accu_rate += skip_cost;
+  } else {
+    const int tx_type_cost = get_tx_type_cost(x, xd, plane, tx_size, tx_type,
+                                              cm->features.reduced_tx_set_used,
+                                              eob, bob_code, is_fsc);
+    accu_rate += non_skip_cost + tx_type_cost;
+  }
+#else
   const int tx_type_cost = get_tx_type_cost(
       x, xd, plane, tx_size, tx_type, cm->features.reduced_tx_set_used, eob);
 
@@ -3293,6 +3662,7 @@
     accu_rate += skip_cost;
   else
     accu_rate += non_skip_cost + tx_type_cost;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
   p->eobs[block] = eob;
   p->txb_entropy_ctx[block] =
@@ -3360,7 +3730,12 @@
                                  MACROBLOCKD *xd, int blk_row, int blk_col,
                                  int plane, TX_SIZE tx_size,
                                  FRAME_COUNTS *counts, uint8_t allow_update_cdf,
-                                 int eob) {
+                                 int eob
+#if CONFIG_ATC_DCTX_ALIGNED
+                                 ,
+                                 int bob_code, int is_fsc
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+) {
   MB_MODE_INFO *mbmi = xd->mi[0];
   int is_inter = is_inter_block(mbmi, xd->tree_type);
   const int reduced_tx_set_used = cm->features.reduced_tx_set_used;
@@ -3397,19 +3772,39 @@
       const TxSetType tx_set_type =
           av1_get_ext_tx_set_type(tx_size, is_inter, reduced_tx_set_used);
       if (is_inter) {
+#if CONFIG_ATC_DCTX_ALIGNED
+        const int esc_eob = is_fsc ? bob_code : eob;
+        const int eob_tx_ctx =
+            get_lp2tx_ctx(tx_size, get_txb_bwl(tx_size), esc_eob);
+        if (allow_update_cdf) {
+          update_cdf(
+              fc->inter_ext_tx_cdf[eset][eob_tx_ctx][txsize_sqr_map[tx_size]],
+              av1_ext_tx_ind[tx_set_type][tx_type],
+              av1_num_ext_tx_set[tx_set_type]);
+        }
+#else
         if (allow_update_cdf) {
           update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]],
                      av1_ext_tx_ind[tx_set_type][tx_type],
                      av1_num_ext_tx_set[tx_set_type]);
         }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 #if CONFIG_ENTROPY_STATS
+#if CONFIG_ATC_DCTX_ALIGNED
+        ++counts->inter_ext_tx[eset][eob_tx_ctx][txsize_sqr_map[tx_size]]
+                              [av1_ext_tx_ind[tx_set_type][tx_type]];
+#else
         ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]]
                               [av1_ext_tx_ind[tx_set_type][tx_type]];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 #endif  // CONFIG_ENTROPY_STATS
       } else {
         if (mbmi->fsc_mode[xd->tree_type == CHROMA_PART] && allow_update_cdf) {
           return;
         }
+#if CONFIG_ATC_DCTX_ALIGNED
+        if (eob == 1 && allow_update_cdf) return;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
         PREDICTION_MODE intra_dir;
         if (mbmi->filter_intra_mode_info.use_filter_intra)
           intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
@@ -3418,7 +3813,7 @@
           intra_dir = mbmi->mode;
 #if CONFIG_ENTROPY_STATS
         const TX_TYPE primary_tx_type = get_primary_tx_type(tx_type);
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
         ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir]
                               [av1_tx_type_to_idx(primary_tx_type, tx_set_type,
                                                   intra_dir,
@@ -3427,7 +3822,7 @@
         ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir]
                               [av1_ext_tx_ind_intra[tx_set_type]
                                                    [primary_tx_type]];
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 #endif  // CONFIG_ENTROPY_STATS
         if (allow_update_cdf) {
           update_cdf(
@@ -3437,12 +3832,12 @@
 #else
               fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][intra_dir],
 #endif  // CONFIG_ATC_REDUCED_TXSET
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
               av1_tx_type_to_idx(get_primary_tx_type(tx_type), tx_set_type,
                                  intra_dir, av1_size_class[tx_size]),
 #else
               av1_ext_tx_ind_intra[tx_set_type][get_primary_tx_type(tx_type)],
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 #if CONFIG_ATC_REDUCED_TXSET
               cm->features.reduced_tx_set_used
                   ? av1_num_reduced_tx_set
@@ -3465,6 +3860,9 @@
            !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
            cm->seq_params.enable_ist &&
            block_signals_sec_tx_type(xd, tx_size, tx_type, eob)) {
+#if CONFIG_ATC_DCTX_ALIGNED
+    if (eob == 1 && allow_update_cdf) return;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     if (allow_update_cdf)
       update_cdf(fc->stx_cdf[txsize_sqr_map[tx_size]],
                  (int8_t)get_secondary_tx_type(tx_type), STX_TYPES);
@@ -3483,6 +3881,9 @@
   struct macroblock_plane *p = &x->plane[plane];
   struct macroblockd_plane *pd = &xd->plane[plane];
   const int eob = p->eobs[block];
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int bob_code = p->bobs[block];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   const int block_offset = BLOCK_OFFSET(block);
   tran_low_t *qcoeff = p->qcoeff + block_offset;
   const PLANE_TYPE plane_type = pd->plane_type;
@@ -3520,6 +3921,11 @@
     uint8_t *const entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset;
     entropy_ctx[block] = txb_ctx.txb_skip_ctx;
     eob_txb[block] = eob;
+#if CONFIG_ATC_DCTX_ALIGNED
+    uint16_t *bob_txb = cb_coef_buff->bobs[plane] + txb_offset;
+    bob_txb[block] = bob_code;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
     if (eob == 0) {
       av1_set_entropy_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col,
                                blk_row);
@@ -3539,23 +3945,67 @@
     int8_t *const signs = set_signs(signs_buf, width);
     av1_txb_init_levels_signs(tcoeff, width, height, levels_buf, signs_buf);
     update_tx_type_count(cpi, cm, xd, blk_row, blk_col, plane, tx_size,
-                         td->counts, allow_update_cdf, eob);
+                         td->counts, allow_update_cdf, eob
+#if CONFIG_ATC_DCTX_ALIGNED
+                         ,
+                         bob_code, 1 /* is_fsc */
+#endif                               // CONFIG_ATC_DCTX_ALIGNED
+    );
     const int16_t *const scan = scan_order->scan;
     // record tx type usage
     td->rd_counts.tx_type_used[tx_size][get_primary_tx_type(tx_type)]++;
+#if CONFIG_ATC_DCTX_ALIGNED
+    int bob = av1_get_max_eob(tx_size) - bob_code;
+#if CONFIG_ENTROPY_STATS
+    av1_update_eob_context(cdf_idx, bob_code, tx_size, plane_type, ec_ctx,
+                           td->counts, allow_update_cdf);
+#else
+    av1_update_eob_context(bob_code, tx_size, plane_type, ec_ctx,
+                           allow_update_cdf);
+#endif
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
+#if CONFIG_ATC_DCTX_ALIGNED
+    av1_get_nz_map_contexts_skip_c(levels, scan, bob, eob, tx_size,
+                                   coeff_contexts);
+#else
     av1_get_nz_map_contexts_skip(levels, scan, eob, tx_size, coeff_contexts);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+#if CONFIG_ATC_DCTX_ALIGNED
+    for (int c = bob; c < eob; ++c) {
+#else
     for (int c = 0; c < eob; c++) {
+#endif  // CONFIG_ATC_DCTX_ALIGNED
       const int pos = scan[c];
       const int coeff_ctx = coeff_contexts[pos];
       const tran_low_t v = qcoeff[pos];
       const tran_low_t level = abs(v);
       if (allow_update_cdf) {
+#if CONFIG_ATC_DCTX_ALIGNED
+        if (c == bob) {
+          update_cdf(ec_ctx->coeff_base_bob_cdf[coeff_ctx],
+                     AOMMIN(level, 3) - 1, 3);
+        } else {
+          update_cdf(ec_ctx->coeff_base_cdf_idtx[coeff_ctx], AOMMIN(level, 3),
+                     4);
+        }
+#else
         update_cdf(ec_ctx->coeff_base_cdf_idtx[coeff_ctx], AOMMIN(level, 3), 4);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
       }
 #if CONFIG_ENTROPY_STATS
+#if CONFIG_ATC_DCTX_ALIGNED
+      if (c == bob) {
+        ++td->counts
+              ->coeff_base_bob_multi[cdf_idx][coeff_ctx][AOMMIN(level, 3) - 1];
+      } else {
+        ++td->counts
+              ->coeff_base_multi_skip[cdf_idx][coeff_ctx][AOMMIN(level, 3)];
+      }
+#else
       ++td->counts->coeff_base_multi_skip[cdf_idx][coeff_ctx][AOMMIN(level, 3)];
-#endif  // CONFIG_ENTROPY_STATS
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+#endif
       if (level > NUM_BASE_LEVELS) {
         const int base_range = level - 1 - NUM_BASE_LEVELS;
         const int br_ctx = get_br_ctx_skip(levels, pos, bwl);
@@ -3650,9 +4100,19 @@
   struct macroblock_plane *p = &x->plane[plane];
   struct macroblockd_plane *pd = &xd->plane[plane];
   const int eob = p->eobs[block];
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int bob_code = p->bobs[block];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   const int block_offset = BLOCK_OFFSET(block);
   tran_low_t *qcoeff = p->qcoeff + block_offset;
   const PLANE_TYPE plane_type = pd->plane_type;
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int is_inter = is_inter_block(xd->mi[0], xd->tree_type);
+  if (eob == 1 && plane_type == 0 &&
+      !xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] && !is_inter) {
+    update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT);
+  }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   const TX_TYPE tx_type =
       av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size,
                       cm->features.reduced_tx_set_used);
@@ -3737,9 +4197,22 @@
     uint8_t *const entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset;
     entropy_ctx[block] = txb_ctx.txb_skip_ctx;
     eob_txb[block] = eob;
+#if CONFIG_ATC_DCTX_ALIGNED
+    uint16_t *bob_txb = cb_coef_buff->bobs[plane] + txb_offset;
+    bob_txb[block] = bob_code;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
 #if CONFIG_CROSS_CHROMA_TX
-    if (is_cctx_allowed(cm, xd) && plane == AOM_PLANE_U && eob > 0)
+#if CONFIG_ATC_DCTX_ALIGNED
+    const int skip_cctx = is_inter ? 0 : (eob == 1);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+    if (is_cctx_allowed(cm, xd) && plane == AOM_PLANE_U &&
+#if CONFIG_ATC_DCTX_ALIGNED
+        !skip_cctx && eob > 0
+#else
+        eob > 0
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+    )
       update_cctx_type_count(cm, xd, blk_row, blk_col, tx_size, td->counts,
                              allow_update_cdf);
 #endif  // CONFIG_CROSS_CHROMA_TX
@@ -3759,7 +4232,12 @@
     uint8_t *const levels = set_levels(levels_buf, width);
     av1_txb_init_levels(tcoeff, width, height, levels);
     update_tx_type_count(cpi, cm, xd, blk_row, blk_col, plane, tx_size,
-                         td->counts, allow_update_cdf, eob);
+                         td->counts, allow_update_cdf, eob
+#if CONFIG_ATC_DCTX_ALIGNED
+                         ,
+                         bob_code, 0 /* is_fsc */
+#endif                               // CONFIG_ATC_DCTX_ALIGNED
+    );
 
     const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)];
     const int16_t *const scan = scan_order->scan;
@@ -3768,19 +4246,25 @@
     td->rd_counts.tx_type_used[tx_size][get_primary_tx_type(tx_type)]++;
 
 #if CONFIG_ENTROPY_STATS
-    av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx,
-                           td->counts, allow_update_cdf);
+    av1_update_eob_context(cdf_idx, eob, tx_size,
+#if !CONFIG_ATC_DCTX_ALIGNED
+                           tx_class,
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
+                           plane_type, ec_ctx, td->counts, allow_update_cdf);
 #else
-    av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx,
-                           allow_update_cdf);
+    av1_update_eob_context(eob, tx_size,
+#if !CONFIG_ATC_DCTX_ALIGNED
+                           tx_class,
+#endif  // !CONFIG_ATC_DCTX_ALIGNED
+                           plane_type, ec_ctx, allow_update_cdf);
 #endif
 
     DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
     av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
                             ,
                             plane
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     );
 #if CONFIG_PAR_HIDING
     bool enable_parity_hiding = cm->features.allow_parity_hiding &&
@@ -3799,7 +4283,7 @@
       if (allow_update_cdf) {
         if (c == eob - 1) {
           assert(coeff_ctx < 4);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
           const int row = pos >> bwl;
           const int col = pos - (row << bwl);
           int limits = get_lf_limits(row, col, tx_class, plane);
@@ -3818,9 +4302,9 @@
           update_cdf(
               ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx],
               AOMMIN(level, 3) - 1, 3);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
         } else {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
           const int row = pos >> bwl;
           const int col = pos - (row << bwl);
           int limits = get_lf_limits(row, col, tx_class, plane);
@@ -3836,14 +4320,14 @@
 #else
           update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx],
                      AOMMIN(level, 3), 4);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
         }
       }
       if (c == eob - 1) {
         assert(coeff_ctx < 4);
         assert(level > 0);
 #if CONFIG_ENTROPY_STATS
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
         const int row = pos >> bwl;
         const int col = pos - (row << bwl);
         int limits = get_lf_limits(row, col, tx_class, plane);
@@ -3858,9 +4342,9 @@
 #else
         ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type]
                                           [coeff_ctx][AOMMIN(level, 3) - 1];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
       } else {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
         const int row = pos >> bwl;
         const int col = pos - (row << bwl);
         int limits = get_lf_limits(row, col, tx_class, plane);
@@ -3875,10 +4359,10 @@
 #else
         ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type]
                                       [coeff_ctx][AOMMIN(level, 3)];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #endif
       }
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       const int row = pos >> bwl;
       const int col = pos - (row << bwl);
       int limits = get_lf_limits(row, col, tx_class, plane);
@@ -3953,7 +4437,7 @@
           if (k < BR_CDF_SIZE - 1) break;
         }
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
 
 #if CONFIG_PAR_HIDING
@@ -3986,7 +4470,7 @@
       if (allow_update_cdf) {
         if (c == eob - 1) {
           assert(coeff_ctx < 4);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
           const int row = pos >> bwl;
           const int col = pos - (row << bwl);
           int limits = get_lf_limits(row, col, tx_class, plane);
@@ -4004,9 +4488,9 @@
           update_cdf(
               ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx],
               AOMMIN(level, 3) - 1, 3);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
         } else {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
           const int row = pos >> bwl;
           const int col = pos - (row << bwl);
           int limits = get_lf_limits(row, col, tx_class, plane);
@@ -4022,13 +4506,13 @@
 #else
           update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx],
                      AOMMIN(level, 3), 4);
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
         }
       }
       if (c == eob - 1) {
         assert(coeff_ctx < 4);
 #if CONFIG_ENTROPY_STATS
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
         const int row = pos >> bwl;
         const int col = pos - (row << bwl);
         int limits = get_lf_limits(row, col, tx_class, plane);
@@ -4043,9 +4527,9 @@
 #else
         ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type]
                                           [coeff_ctx][AOMMIN(level, 3) - 1];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
       } else {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
         const int row = pos >> bwl;
         const int col = pos - (row << bwl);
         int limits = get_lf_limits(row, col, tx_class, plane);
@@ -4060,10 +4544,10 @@
 #else
         ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type]
                                       [coeff_ctx][AOMMIN(level, 3)];
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
 #endif
       }
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       const int row = pos >> bwl;
       const int col = pos - (row << bwl);
       int limits = get_lf_limits(row, col, tx_class, plane);
@@ -4138,7 +4622,7 @@
           if (k < BR_CDF_SIZE - 1) break;
         }
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
     }
 #endif  // CONFIG_PAR_HIDING
 
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c
index 6c669ac..a1d0de8 100644
--- a/av1/encoder/ethread.c
+++ b/av1/encoder/ethread.c
@@ -27,10 +27,6 @@
   for (int i = 0; i < REFERENCE_MODES; i++)
     td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
 
-  for (int i = 0; i < INTER_REFS_PER_FRAME; i++)
-    td->rd_counts.global_motion_used[i] +=
-        td_t->rd_counts.global_motion_used[i];
-
   td->rd_counts.compound_ref_used_flag |=
       td_t->rd_counts.compound_ref_used_flag;
   td->rd_counts.skip_mode_used_flag |= td_t->rd_counts.skip_mode_used_flag;
@@ -470,7 +466,7 @@
         &td->mb.txfm_search_info.mb_rd_record.crc_calculator);
 #if CONFIG_REF_MV_BANK
     av1_zero(td->mb.e_mbd.ref_mv_bank);
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
     td->mb.e_mbd.ref_mv_bank_pt = &td->mb.e_mbd.ref_mv_bank;
 #endif
 #endif  // CONFIG_REF_MV_BANK}
@@ -679,7 +675,7 @@
     if (i > 0) {
       // Set up firstpass PICK_MODE_CONTEXT.
       thread_data->td->firstpass_ctx = av1_alloc_pmc(
-          cm, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0,
+          cm, SHARED_PART, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0,
           cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
           &thread_data->td->shared_coeff_buf);
 
@@ -814,7 +810,7 @@
     }
 #if CONFIG_REF_MV_BANK
     av1_zero(thread_data->td->mb.e_mbd.ref_mv_bank);
-#if !CONFIG_C043_MVP_IMPROVEMENTS
+#if !CONFIG_MVP_IMPROVEMENT
     thread_data->td->mb.e_mbd.ref_mv_bank_pt =
         &thread_data->td->mb.e_mbd.ref_mv_bank;
 
@@ -1431,7 +1427,7 @@
     // source_alt_ref_frame w.r.t. ARF frames.
     if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search &&
         gm_info->reference_frames[cur_dir][ref_frame_idx].distance != 0 &&
-        cpi->common.global_motion[ref_buf_idx].wmtype != ROTZOOM)
+        cpi->common.global_motion[ref_buf_idx].wmtype <= TRANSLATION)
       job_info->early_exit[cur_dir] = 1;
 
 #if CONFIG_MULTITHREAD
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c
index cf7332c..9d3f5dc 100644
--- a/av1/encoder/firstpass.c
+++ b/av1/encoder/firstpass.c
@@ -221,7 +221,7 @@
   }
 #if CONFIG_FLEX_MVRES
   const MvSubpelPrecision pb_mv_precision = cm->features.fr_mv_precision;
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int is_ibc_cost = 0;
 #endif
 #endif
@@ -230,7 +230,7 @@
 #if CONFIG_FLEX_MVRES
   av1_make_default_fullpel_ms_params(
       &ms_params, cpi, x, bsize, ref_mv, pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
       is_ibc_cost,
 #endif
       first_pass_search_sites, fine_search_interval);
@@ -688,6 +688,9 @@
     xd->mi[0]->tx_size = TX_4X4;
     xd->mi[0]->ref_frame[0] = get_closest_pastcur_ref_index(cm);
     xd->mi[0]->ref_frame[1] = NONE_FRAME;
+#if CONFIG_CWP
+    xd->mi[0]->cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
     av1_enc_build_inter_predictor(cm, xd, mb_row * mb_scale, mb_col * mb_scale,
                                   NULL, bsize, AOM_PLANE_Y, AOM_PLANE_Y);
     av1_encode_sby_pass1(cpi, x, bsize);
@@ -984,6 +987,9 @@
     x->plane[i].coeff = ctx->coeff[i];
     x->plane[i].qcoeff = ctx->qcoeff[i];
     x->plane[i].eobs = ctx->eobs[i];
+#if CONFIG_ATC_DCTX_ALIGNED
+    x->plane[i].bobs = ctx->bobs[i];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     x->plane[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
     x->plane[i].dqcoeff = ctx->dqcoeff[i];
   }
@@ -1087,14 +1093,8 @@
     cpi->is_screen_content_type = features->allow_screen_content_tools;
   }
 #if CONFIG_ADAPTIVE_DS_FILTER
-#if DS_FRAME_LEVEL
-  if (cm->current_frame.frame_type == KEY_FRAME) {
-    FeatureFlags *const features = &cm->features;
-    av1_set_downsample_filter_options(cpi, features);
-#else
-  if (cpi->common.current_frame.absolute_poc == 0) {
+  if (cpi->common.current_frame.frame_type == KEY_FRAME) {
     av1_set_downsample_filter_options(cpi);
-#endif  // DS_FRAME_LEVEL
   }
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
   // First pass coding proceeds in raster scan order with unit size of 16x16.
diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c
index 1f07c20..c3495e1 100644
--- a/av1/encoder/global_motion.c
+++ b/av1/encoder/global_motion.c
@@ -72,7 +72,9 @@
 
 static void convert_to_params(const double *params, int32_t *model) {
   int i;
+#if !CONFIG_IMPROVED_GLOBAL_MOTION
   int alpha_present = 0;
+#endif  // !CONFIG_IMPROVED_GLOBAL_MOTION
   model[0] = (int32_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5);
   model[1] = (int32_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5);
   model[0] = (int32_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) *
@@ -85,22 +87,28 @@
     model[i] = (int32_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5);
     model[i] =
         (int32_t)clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX);
+#if !CONFIG_IMPROVED_GLOBAL_MOTION
     alpha_present |= (model[i] != 0);
+#endif  // !CONFIG_IMPROVED_GLOBAL_MOTION
     model[i] = (model[i] + diag_value) * GM_ALPHA_DECODE_FACTOR;
   }
   for (; i < 8; ++i) {
     model[i] = (int32_t)floor(params[i] * (1 << GM_ROW3HOMO_PREC_BITS) + 0.5);
     model[i] = (int32_t)clamp(model[i], GM_ROW3HOMO_MIN, GM_ROW3HOMO_MAX) *
                GM_ROW3HOMO_DECODE_FACTOR;
+#if !CONFIG_IMPROVED_GLOBAL_MOTION
     alpha_present |= (model[i] != 0);
+#endif  // !CONFIG_IMPROVED_GLOBAL_MOTION
   }
 
+#if !CONFIG_IMPROVED_GLOBAL_MOTION
   if (!alpha_present) {
     if (abs(model[0]) < MIN_TRANS_THRESH && abs(model[1]) < MIN_TRANS_THRESH) {
       model[0] = 0;
       model[1] = 0;
     }
   }
+#endif  // !CONFIG_IMPROVED_GLOBAL_MOTION
 }
 
 void av1_convert_model_to_params(const double *params,
diff --git a/av1/encoder/global_motion_facade.c b/av1/encoder/global_motion_facade.c
index 7e28cab..8180bf4 100644
--- a/av1/encoder/global_motion_facade.c
+++ b/av1/encoder/global_motion_facade.c
@@ -30,11 +30,38 @@
                               const WarpedMotionParams *ref_gm,
                               MvSubpelPrecision precision) {
   const int precision_loss = get_gm_precision_loss(precision);
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  (void)precision_loss;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 #else
                               const WarpedMotionParams *ref_gm, int allow_hp) {
-#endif
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  (void)allow_hp;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+#endif  // CONFIG_FLEX_MVRES
   int params_cost = 0;
-  int trans_bits, trans_prec_diff;
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  const int trans_bits = GM_ABS_TRANS_BITS;
+  const int trans_prec_diff = GM_TRANS_PREC_DIFF;
+  const int trans_max = (1 << trans_bits) - 1;
+#else
+  const int trans_bits = (gm->wmtype == TRANSLATION)
+#if CONFIG_FLEX_MVRES
+                             ? GM_ABS_TRANS_ONLY_BITS - precision_loss
+#else
+                             ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
+#endif
+                             : GM_ABS_TRANS_BITS;
+  const int trans_prec_diff = (gm->wmtype == TRANSLATION)
+#if CONFIG_FLEX_MVRES
+                                  ? GM_TRANS_ONLY_PREC_DIFF + precision_loss
+#else
+                                  ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
+#endif
+                                  : GM_TRANS_PREC_DIFF;
+  const int trans_max = (1 << trans_bits);
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
   switch (gm->wmtype) {
     case AFFINE:
     case ROTZOOM:
@@ -57,29 +84,11 @@
                 (1 << GM_ALPHA_PREC_BITS),
             (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
       }
-      AOM_FALLTHROUGH_INTENDED;
-    case TRANSLATION:
-      trans_bits = (gm->wmtype == TRANSLATION)
-#if CONFIG_FLEX_MVRES
-                       ? GM_ABS_TRANS_ONLY_BITS - precision_loss
-#else
-                       ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
-#endif
-                       : GM_ABS_TRANS_BITS;
-      trans_prec_diff = (gm->wmtype == TRANSLATION)
-#if CONFIG_FLEX_MVRES
-                            ? GM_TRANS_ONLY_PREC_DIFF + precision_loss
-#else
-                            ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
-#endif
-                            : GM_TRANS_PREC_DIFF;
       params_cost += aom_count_signed_primitive_refsubexpfin(
-          (1 << trans_bits) + 1, SUBEXPFIN_K,
-          (ref_gm->wmmat[0] >> trans_prec_diff),
+          trans_max + 1, SUBEXPFIN_K, (ref_gm->wmmat[0] >> trans_prec_diff),
           (gm->wmmat[0] >> trans_prec_diff));
       params_cost += aom_count_signed_primitive_refsubexpfin(
-          (1 << trans_bits) + 1, SUBEXPFIN_K,
-          (ref_gm->wmmat[1] >> trans_prec_diff),
+          trans_max + 1, SUBEXPFIN_K, (ref_gm->wmmat[1] >> trans_prec_diff),
           (gm->wmmat[1] >> trans_prec_diff));
       AOM_FALLTHROUGH_INTENDED;
     case IDENTITY: break;
@@ -95,16 +104,28 @@
 
 // For the given reference frame, computes the global motion parameters for
 // different motion models and finds the best.
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+static AOM_INLINE void compute_global_motion_for_ref_frame(
+    AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[INTER_REFS_PER_FRAME], int frame,
+    int num_src_corners, int *src_corners, unsigned char *src_buffer,
+    MotionModel *params_by_motion, uint8_t *segment_map,
+    const int segment_map_w, const int segment_map_h) {
+#else
 static AOM_INLINE void compute_global_motion_for_ref_frame(
     AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[INTER_REFS_PER_FRAME], int frame,
     int num_src_corners, int *src_corners, unsigned char *src_buffer,
     MotionModel *params_by_motion, uint8_t *segment_map,
     const int segment_map_w, const int segment_map_h,
     const WarpedMotionParams *ref_params) {
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
   ThreadData *const td = &cpi->td;
   MACROBLOCK *const x = &td->mb;
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCKD *const xd = &x->e_mbd;
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  GlobalMotionInfo *const gm_info = &cpi->gm_info;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
   int i;
   int src_width = cpi->source->y_width;
   int src_height = cpi->source->y_height;
@@ -125,16 +146,21 @@
   // TODO(sarahparker, debargha): Explore do_adaptive_gm_estimation = 1
   const int do_adaptive_gm_estimation = 0;
 
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int ref_frame_dist = get_relative_dist(
+      &cm->seq_params.order_hint_info, cm->current_frame.display_order_hint,
+      cm->cur_frame->ref_display_order_hint[frame]);
+#else
   const int ref_frame_dist = get_relative_dist(
       &cm->seq_params.order_hint_info, cm->current_frame.order_hint,
       cm->cur_frame->ref_order_hints[frame]);
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   const GlobalMotionEstimationType gm_estimation_type =
       cm->seq_params.order_hint_info.enable_order_hint &&
               abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation
           ? GLOBAL_MOTION_DISFLOW_BASED
           : GLOBAL_MOTION_FEATURE_BASED;
   for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
-    int64_t best_warp_error = INT64_MAX;
     // Initially set all params to identity.
     for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
       memcpy(params_by_motion[i].params, kIdentityParams,
@@ -147,48 +173,82 @@
                               ref_buf[frame], cpi->common.seq_params.bit_depth,
                               gm_estimation_type, inliers_by_motion,
                               params_by_motion, RANSAC_NUM_MOTIONS);
-    int64_t ref_frame_error = 0;
+
+    int64_t best_ref_frame_error = 0;
+    int64_t best_warp_error = INT64_MAX;
     for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
       if (inliers_by_motion[i] == 0) continue;
 
       params_this_motion = params_by_motion[i].params;
       av1_convert_model_to_params(params_this_motion, &tmp_wm_params);
 
-      if (tmp_wm_params.wmtype != IDENTITY) {
-        av1_compute_feature_segmentation_map(
-            segment_map, segment_map_w, segment_map_h,
-            params_by_motion[i].inliers, params_by_motion[i].num_inliers);
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+      // If the found model can be represented as a simple translation,
+      // then reject it. This is because translational motion is cheaper
+      // to signal through the standard MV coding tools, rather than through
+      // global motion
+      if (tmp_wm_params.wmtype <= TRANSLATION) continue;
+#else
+      // For IDENTITY type models, we don't need to evaluate anything because
+      // all the following logic is effectively comparing the estimated model
+      // to an identity model.
+      if (tmp_wm_params.wmtype == IDENTITY) continue;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 
-        ref_frame_error = av1_segmented_frame_error(
-            xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride,
-            cpi->source->y_buffer, src_width, src_height, src_stride,
-            segment_map, segment_map_w);
+      av1_compute_feature_segmentation_map(
+          segment_map, segment_map_w, segment_map_h,
+          params_by_motion[i].inliers, params_by_motion[i].num_inliers);
 
-        const int64_t erroradv_threshold =
-            calc_erroradv_threshold(ref_frame_error);
+      const int64_t ref_frame_error = av1_segmented_frame_error(
+          xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride,
+          cpi->source->y_buffer, src_width, src_height, src_stride, segment_map,
+          segment_map_w);
 
-        const int64_t warp_error = av1_refine_integerized_param(
-            &tmp_wm_params, tmp_wm_params.wmtype, xd->bd,
-            ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
-            ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
-            cpi->source->y_buffer, src_width, src_height, src_stride,
-            GM_REFINEMENT_COUNT, best_warp_error, segment_map, segment_map_w,
-            erroradv_threshold);
+      if (ref_frame_error == 0) continue;
 
-        if (warp_error < best_warp_error) {
-          best_warp_error = warp_error;
-          // Save the wm_params modified by
-          // av1_refine_integerized_param() rather than motion index to
-          // avoid rerunning refine() below.
-          memcpy(&(cm->global_motion[frame]), &tmp_wm_params,
-                 sizeof(WarpedMotionParams));
-        }
+      const int64_t erroradv_threshold =
+          calc_erroradv_threshold(ref_frame_error);
+
+      const int64_t warp_error = av1_refine_integerized_param(
+          &tmp_wm_params, tmp_wm_params.wmtype, xd->bd,
+          ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
+          ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
+          cpi->source->y_buffer, src_width, src_height, src_stride,
+          GM_REFINEMENT_COUNT, best_warp_error, segment_map, segment_map_w,
+          erroradv_threshold);
+
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+      // av1_refine_integerized_param() can change the wmtype to a simpler
+      // model type than its input. So we need to check again to see if
+      // we have a translational model
+      if (tmp_wm_params.wmtype <= TRANSLATION) continue;
+#else
+      // av1_refine_integerized_param() can return a simpler model type than
+      // its input, so re-check model type here
+      if (tmp_wm_params.wmtype == IDENTITY) continue;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+      // Apply initial quality filter, which depends only on the error metrics
+      // and not the model cost
+      if (warp_error >= ref_frame_error * erroradv_tr) continue;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
+      if (warp_error < best_warp_error) {
+        best_ref_frame_error = ref_frame_error;
+        best_warp_error = warp_error;
+        // Save the wm_params modified by
+        // av1_refine_integerized_param() rather than motion index to
+        // avoid rerunning refine() below.
+        memcpy(&(cm->global_motion[frame]), &tmp_wm_params,
+               sizeof(WarpedMotionParams));
       }
     }
     if (cm->global_motion[frame].wmtype <= AFFINE)
       if (!av1_get_shear_params(&cm->global_motion[frame]))
         cm->global_motion[frame] = default_warp_params;
 
+#if !CONFIG_IMPROVED_GLOBAL_MOTION
     if (cm->global_motion[frame].wmtype == TRANSLATION) {
       cm->global_motion[frame].wmmat[0] =
 #if CONFIG_FLEX_MVRES
@@ -207,15 +267,25 @@
                                 cm->global_motion[frame].wmmat[1]) *
           GM_TRANS_ONLY_DECODE_FACTOR;
     }
+#endif  // !CONFIG_IMPROVED_GLOBAL_MOTION
 
     if (cm->global_motion[frame].wmtype == IDENTITY) continue;
 
-    if (ref_frame_error == 0) continue;
+    // Once we get here, best_ref_frame_error must be > 0. This is because
+    // of the logic above, which skips over any models which have
+    // ref_frame_error == 0
+    assert(best_ref_frame_error > 0);
 
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+    gm_info->erroradvantage[frame] =
+        (double)best_warp_error / best_ref_frame_error;
+
+    break;
+#else
     // If the best error advantage found doesn't meet the threshold for
     // this motion type, revert to IDENTITY.
     if (!av1_is_enough_erroradvantage(
-            (double)best_warp_error / ref_frame_error,
+            (double)best_warp_error / best_ref_frame_error,
             gm_get_params_cost(&cm->global_motion[frame], ref_params,
 #if CONFIG_FLEX_MVRES
                                cm->features.fr_mv_precision))) {
@@ -226,6 +296,7 @@
     }
 
     if (cm->global_motion[frame].wmtype != IDENTITY) break;
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
   }
 
   aom_clear_system_state();
@@ -237,8 +308,12 @@
     int num_src_corners, int *src_corners, unsigned char *src_buffer,
     MotionModel *params_by_motion, uint8_t *segment_map, int segment_map_w,
     int segment_map_h) {
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  compute_global_motion_for_ref_frame(
+      cpi, ref_buf, frame, num_src_corners, src_corners, src_buffer,
+      params_by_motion, segment_map, segment_map_w, segment_map_h);
+#else
   AV1_COMMON *const cm = &cpi->common;
-  GlobalMotionInfo *const gm_info = &cpi->gm_info;
   const WarpedMotionParams *ref_params =
       cm->prev_frame ? &cm->prev_frame->global_motion[frame]
                      : &default_warp_params;
@@ -246,16 +321,7 @@
   compute_global_motion_for_ref_frame(
       cpi, ref_buf, frame, num_src_corners, src_corners, src_buffer,
       params_by_motion, segment_map, segment_map_w, segment_map_h, ref_params);
-
-  gm_info->params_cost[frame] =
-      gm_get_params_cost(&cm->global_motion[frame], ref_params,
-#if !CONFIG_FLEX_MVRES
-                         cm->features.allow_high_precision_mv) +
-#else
-                         cm->features.fr_mv_precision) +
-#endif
-      gm_info->type_cost[cm->global_motion[frame].wmtype] -
-      gm_info->type_cost[IDENTITY];
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 }
 
 // Loops over valid reference frames and computes global motion estimation.
@@ -282,7 +348,7 @@
     // source_alt_ref_frame w.r.t. ARF frames.
     if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search &&
         reference_frame[frame].distance != 0 &&
-        cm->global_motion[ref_frame].wmtype != ROTZOOM)
+        cm->global_motion[ref_frame].wmtype <= TRANSLATION)
       break;
   }
 }
@@ -299,37 +365,26 @@
   return 0;
 }
 
-// Function to decide if we can skip the global motion parameter computation
-// for a particular ref frame.
-static AOM_INLINE int skip_gm_frame(AV1_COMMON *const cm, int refrank) {
-  const RefCntBuffer *const refbuf = get_ref_frame_buf(cm, refrank);
-  if (refbuf == NULL) return 1;
-  const int d0 = get_dir_rank(cm, refrank, NULL);
-  for (int i = 0; i < refrank; ++i) {
-    const int di = get_dir_rank(cm, i, NULL);
-    if (di == d0 && cm->global_motion[i].wmtype != IDENTITY) {
-      // Same direction higher ranked ref has a non-identity gm.
-      // Allow search if distance is smaller in this case.
-      return (abs(cm->ref_frames_info.ref_frame_distance[i]) >
-              abs(cm->ref_frames_info.ref_frame_distance[refrank]));
-    }
-  }
-  return 0;
-}
+static int disable_gm_search_based_on_stats(const AV1_COMP *const cpi) {
+  const GF_GROUP *gf_group = &cpi->gf_group;
+  int is_gm_present = 1;
 
-// Prunes reference frames for global motion estimation based on the speed
-// feature 'gm_search_type'.
-static int do_gm_search_logic(SPEED_FEATURES *const sf, int refrank) {
-  switch (sf->gm_sf.gm_search_type) {
-    case GM_FULL_SEARCH: return 1;
-    case GM_REDUCED_REF_SEARCH_SKIP_LEV2:
-      return refrank < INTER_REFS_PER_FRAME - 2;
-    case GM_REDUCED_REF_SEARCH_SKIP_LEV3:
-      return refrank < INTER_REFS_PER_FRAME - 4;
-    case GM_DISABLE_SEARCH: return 0;
-    default: assert(0);
+  // Check number of GM models only in GF groups with ARF frames. GM param
+  // estimation is always done in the case of GF groups with no ARF frames (flat
+  // gops)
+  if (gf_group->arf_index > -1) {
+    // valid_gm_model_found is initialized to INT32_MAX in the beginning of
+    // every GF group.
+    // Therefore, GM param estimation is always done for all frames until
+    // at least 1 frame each of ARF_UPDATE, INTNL_ARF_UPDATE and LF_UPDATE are
+    // encoded in a GF group For subsequent frames, GM param estimation is
+    // disabled, if no valid models have been found in all the three update
+    // types.
+    is_gm_present = (cpi->valid_gm_model_found[ARF_UPDATE] != 0) ||
+                    (cpi->valid_gm_model_found[INTNL_ARF_UPDATE] != 0) ||
+                    (cpi->valid_gm_model_found[LF_UPDATE] != 0);
   }
-  return 1;
+  return !is_gm_present;
 }
 
 // Populates valid reference frames in past/future directions in
@@ -344,6 +399,12 @@
   const GF_GROUP *gf_group = &cpi->gf_group;
   int ref_pruning_enabled = is_frame_eligible_for_ref_pruning(
       gf_group, cpi->sf.inter_sf.selective_ref_frame, 1, gf_group->index);
+  int cur_frame_gm_disabled = 0;
+  int pyr_lvl = cm->cur_frame->pyramid_level;
+
+  if (cpi->sf.gm_sf.disable_gm_search_based_on_stats) {
+    cur_frame_gm_disabled = disable_gm_search_based_on_stats(cpi);
+  }
 
   for (int frame = cm->ref_frames_info.num_total_refs - 1; frame >= 0;
        --frame) {
@@ -358,7 +419,6 @@
     // Skip global motion estimation for invalid ref frames
     if (buf == NULL ||
         (ref_disabled && cpi->sf.hl_sf.recode_loop != DISALLOW_RECODE)) {
-      cpi->gm_info.params_cost[frame] = 0;
       continue;
     } else {
       ref_buf[frame] = &buf->buf;
@@ -367,12 +427,12 @@
     int prune_ref_frames =
         ref_pruning_enabled &&
         prune_ref_by_selective_ref_frame(cpi, NULL, ref_frame);
+    int ref_pyr_lvl = buf->pyramid_level;
 
     if (ref_buf[frame]->y_crop_width == cpi->source->y_crop_width &&
         ref_buf[frame]->y_crop_height == cpi->source->y_crop_height &&
-        do_gm_search_logic(&cpi->sf, ref_frame[0]) &&
-        !(cpi->sf.gm_sf.selective_ref_gm && skip_gm_frame(cm, ref_frame[0])) &&
-        !prune_ref_frames) {
+        frame < cpi->sf.gm_sf.max_ref_frames && !prune_ref_frames &&
+        ref_pyr_lvl <= pyr_lvl && !cur_frame_gm_disabled) {
       assert(ref_buf[frame] != NULL);
       const int relative_frame_dist = av1_encoder_get_relative_dist(
           buf->display_order_hint, cm->cur_frame->display_order_hint);
@@ -420,6 +480,173 @@
   }
 }
 
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+// Select which global motion model to use as a base
+static AOM_INLINE void pick_base_gm_params(AV1_COMP *cpi) {
+  AV1_COMMON *const cm = &cpi->common;
+  const SequenceHeader *const seq_params = &cm->seq_params;
+  GlobalMotionInfo *const gm_info = &cpi->gm_info;
+  int num_total_refs = cm->ref_frames_info.num_total_refs;
+
+  int best_our_ref;
+  int best_their_ref;
+  const WarpedMotionParams *best_base_model;
+  int best_temporal_distance;
+  int best_num_models;
+  int best_cost;
+
+  // Bitmask of which models we will actually use if we accept the current
+  // best base model
+  uint8_t best_enable_models;
+
+  // First, evaluate the identity model as a base
+  {
+    int this_num_models = 0;
+    int this_cost =
+        aom_count_primitive_quniform(num_total_refs + 1, num_total_refs)
+        << AV1_PROB_COST_SHIFT;
+    uint8_t this_enable_models = 0;
+
+    for (int frame = 0; frame < num_total_refs; frame++) {
+      const WarpedMotionParams *model = &cm->global_motion[frame];
+      if (model->wmtype == IDENTITY) continue;
+
+#if CONFIG_FLEX_MVRES
+      int model_cost = gm_get_params_cost(model, &default_warp_params,
+                                          cm->features.fr_mv_precision);
+#else
+      int model_cost = gm_get_params_cost(model, &default_warp_params,
+                                          cm->features.allow_high_precision_mv);
+#endif  // CONFIG_FLEX_MVRES
+      bool use_model = av1_is_enough_erroradvantage(
+          gm_info->erroradvantage[frame], model_cost);
+
+      if (use_model) {
+        this_num_models += 1;
+        this_cost += model_cost;
+        this_enable_models |= (1 << frame);
+      }
+    }
+
+    // Set initial values
+    best_our_ref = cm->ref_frames_info.num_total_refs;
+    best_their_ref = -1;
+    best_base_model = &default_warp_params;
+    best_temporal_distance = 1;
+    best_num_models = this_num_models;
+    best_cost = this_cost;
+    best_enable_models = this_enable_models;
+  }
+
+  // Then try each available reference model in turn
+  for (int our_ref = 0; our_ref < num_total_refs; ++our_ref) {
+    const int ref_disabled = !(cm->ref_frame_flags & (1 << our_ref));
+    RefCntBuffer *buf = get_ref_frame_buf(cm, our_ref);
+    // Skip looking at invalid ref frames
+    if (buf == NULL ||
+        (ref_disabled && cpi->sf.hl_sf.recode_loop != DISALLOW_RECODE)) {
+      continue;
+    }
+
+    int their_num_refs = buf->num_ref_frames;
+    for (int their_ref = 0; their_ref < their_num_refs; ++their_ref) {
+      const WarpedMotionParams *base_model = &buf->global_motion[their_ref];
+      if (base_model->wmtype == IDENTITY) {
+        continue;
+      }
+
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      const int our_ref_order_hint = buf->display_order_hint;
+      const int their_ref_order_hint = buf->ref_display_order_hint[their_ref];
+#else
+      const int our_ref_order_hint = buf->order_hint;
+      const int their_ref_order_hint = buf->ref_order_hints[their_ref];
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+      int base_temporal_distance =
+          get_relative_dist(&seq_params->order_hint_info, our_ref_order_hint,
+                            their_ref_order_hint);
+
+      int this_num_models = 0;
+      int this_cost =
+          (aom_count_primitive_quniform(num_total_refs + 1, our_ref) +
+           aom_count_primitive_quniform(their_num_refs, their_ref))
+          << AV1_PROB_COST_SHIFT;
+      uint8_t this_enable_models = 0;
+
+      for (int frame = 0; frame < num_total_refs; frame++) {
+        const WarpedMotionParams *model = &cm->global_motion[frame];
+        if (model->wmtype == IDENTITY) continue;
+
+        int temporal_distance;
+        if (seq_params->order_hint_info.enable_order_hint) {
+          const RefCntBuffer *const ref_buf = get_ref_frame_buf(cm, frame);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+          const int ref_order_hint = ref_buf->display_order_hint;
+          const int cur_order_hint = cm->cur_frame->display_order_hint;
+#else
+          const int ref_order_hint = ref_buf->order_hint;
+          const int cur_order_hint = cm->cur_frame->order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+          temporal_distance = get_relative_dist(&seq_params->order_hint_info,
+                                                cur_order_hint, ref_order_hint);
+        } else {
+          temporal_distance = 1;
+        }
+
+        if (temporal_distance == 0) {
+          // Don't code global motion for frames at the same temporal instant
+          assert(model->wmtype == IDENTITY);
+          continue;
+        }
+
+        WarpedMotionParams ref_params;
+        av1_scale_warp_model(base_model, base_temporal_distance, &ref_params,
+                             temporal_distance);
+
+#if CONFIG_FLEX_MVRES
+        int model_cost = gm_get_params_cost(model, &ref_params,
+                                            cm->features.fr_mv_precision);
+#else
+        int model_cost = gm_get_params_cost(
+            model, &ref_params, cm->features.allow_high_precision_mv);
+#endif  // CONFIG_FLEX_MVRES
+        bool use_model = av1_is_enough_erroradvantage(
+            gm_info->erroradvantage[frame], model_cost);
+
+        if (use_model) {
+          this_num_models += 1;
+          this_cost += model_cost;
+          this_enable_models |= (1 << frame);
+        }
+      }
+
+      if (this_num_models > best_num_models ||
+          (this_num_models == best_num_models && this_cost < best_cost)) {
+        best_our_ref = our_ref;
+        best_their_ref = their_ref;
+        best_base_model = base_model;
+        best_temporal_distance = base_temporal_distance;
+        best_num_models = this_num_models;
+        best_cost = this_cost;
+        best_enable_models = this_enable_models;
+      }
+    }
+  }
+
+  gm_info->base_model_our_ref = best_our_ref;
+  gm_info->base_model_their_ref = best_their_ref;
+  cm->base_global_motion_model = *best_base_model;
+  cm->base_global_motion_distance = best_temporal_distance;
+
+  for (int frame = 0; frame < num_total_refs; frame++) {
+    if ((best_enable_models & (1 << frame)) == 0) {
+      // Disable this model
+      cm->global_motion[frame] = default_warp_params;
+    }
+  }
+}
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
 // Initializes parameters used for computing global motion.
 static AOM_INLINE void setup_global_motion_info_params(AV1_COMP *cpi) {
   GlobalMotionInfo *const gm_info = &cpi->gm_info;
@@ -486,15 +713,45 @@
   dealloc_global_motion_data(params_by_motion, segment_map);
 }
 
+static AOM_INLINE void reset_gm_stats(AV1_COMP *cpi) {
+  for (int i = 0; i < FRAME_UPDATE_TYPES; i++) {
+    cpi->valid_gm_model_found[i] = INT32_MAX;
+  }
+}
+
+// Updates frame level stats related to global motion
+static AOM_INLINE void update_gm_stats(AV1_COMP *cpi) {
+  const GF_GROUP *gf_group = &cpi->gf_group;
+  FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index];
+
+  int is_gm_present = 0;
+  for (int frame = 0; frame < INTER_REFS_PER_FRAME; frame++) {
+    if (cpi->common.global_motion[frame].wmtype != IDENTITY) {
+      is_gm_present = 1;
+      break;
+    }
+  }
+
+  if (cpi->valid_gm_model_found[update_type] == INT32_MAX) {
+    cpi->valid_gm_model_found[update_type] = is_gm_present;
+  } else {
+    cpi->valid_gm_model_found[update_type] |= is_gm_present;
+  }
+}
+
 // Global motion estimation for the current frame is computed.This computation
 // happens once per frame and the winner motion model parameters are stored in
 // cm->cur_frame->global_motion.
 void av1_compute_global_motion_facade(AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
+  const GF_GROUP *gf_group = &cpi->gf_group;
   GlobalMotionInfo *const gm_info = &cpi->gm_info;
 
-  av1_zero(cpi->td.rd_counts.global_motion_used);
-  av1_zero(gm_info->params_cost);
+  // Reset `valid_gm_model_found` at the start of each GOP
+  if (cpi->oxcf.tool_cfg.enable_global_motion &&
+      cpi->sf.gm_sf.disable_gm_search_based_on_stats && gf_group->index == 0) {
+    reset_gm_stats(cpi);
+  }
 
   if (cpi->common.current_frame.frame_type == INTER_FRAME && cpi->source &&
       cpi->oxcf.tool_cfg.enable_global_motion && !gm_info->search_done) {
@@ -503,8 +760,22 @@
       av1_global_motion_estimation_mt(cpi);
     else
       global_motion_estimation(cpi);
+
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+    // Once we have determined the best motion model for each ref frame,
+    // choose the base parameters to minimize the total encoding cost
+    pick_base_gm_params(cpi);
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
+
+    // Check if the current frame has any valid global motion model across its
+    // reference frames
+    if (cpi->sf.gm_sf.disable_gm_search_based_on_stats) {
+      update_gm_stats(cpi);
+    }
+
     gm_info->search_done = 1;
   }
+
   memcpy(cm->cur_frame->global_motion, cm->global_motion,
          sizeof(cm->cur_frame->global_motion));
 }
diff --git a/av1/encoder/interp_search.c b/av1/encoder/interp_search.c
index d3108aa..c355ffc 100644
--- a/av1/encoder/interp_search.c
+++ b/av1/encoder/interp_search.c
@@ -125,6 +125,9 @@
   // to MULTITAP_SHARP, and thus is not switchable.
   assert(x->e_mbd.mi[0]->mode < NEAR_NEARMV_OPTFLOW);
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+  assert(!x->e_mbd.mi[0]->refinemv_flag);
+#endif  // CONFIG_REFINEMV
   const int inter_filter_cost =
       x->mode_costs.switchable_interp_costs[ctx[0]][interp_fltr];
   return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
@@ -188,7 +191,11 @@
   mbmi->interp_fltr = filter_idx;
 #if CONFIG_OPTFLOW_REFINEMENT
   const int tmp_rs =
-      (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi))
+      (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)
+#if CONFIG_REFINEMV
+       || mbmi->refinemv_flag
+#endif  // CONFIG_REFINEMV
+       )
           ? 0
           : get_switchable_rate(x, mbmi->interp_fltr, switchable_ctx);
 #else
@@ -439,7 +446,11 @@
   switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1);
 #if CONFIG_OPTFLOW_REFINEMENT
   *switchable_rate =
-      (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi))
+      (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)
+#if CONFIG_REFINEMV
+       || mbmi->refinemv_flag
+#endif  // CONFIG_REFINEMV
+       )
           ? 0
           : get_switchable_rate(x, mbmi->interp_fltr, switchable_ctx);
 #else
@@ -475,28 +486,54 @@
   }
   if (!need_search) {
 #if CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+    assert(mbmi->interp_fltr ==
+           ((mbmi->mode >= NEAR_NEARMV_OPTFLOW ||
+             use_opfl_refine_all(cm, mbmi) || mbmi->refinemv_flag)
+                ? MULTITAP_SHARP
+                : EIGHTTAP_REGULAR));
+#else
     assert(mbmi->interp_fltr ==
            ((mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi))
                 ? MULTITAP_SHARP
                 : EIGHTTAP_REGULAR));
+#endif  // CONFIG_REFINEMV
 #else
     assert(mbmi->interp_fltr == EIGHTTAP_REGULAR);
 #endif  // CONFIG_OPTFLOW_REFINEMENT
     return 0;
   }
   if (args->modelled_rd != NULL) {
+#if CONFIG_REFINEMV
+    int use_default_filter = mbmi->refinemv_flag
+#if CONFIG_OPTFLOW_REFINEMENT
+                             || mbmi->mode >= NEAR_NEARMV_OPTFLOW ||
+                             use_opfl_refine_all(cm, mbmi)
+#endif
+        ;
+    if (has_second_ref(mbmi) && !use_default_filter) {
+#else
 #if CONFIG_OPTFLOW_REFINEMENT
     if (has_second_ref(mbmi) && mbmi->mode < NEAR_NEARMV_OPTFLOW &&
         !use_opfl_refine_all(cm, mbmi)) {
 #else
     if (has_second_ref(mbmi)) {
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#endif  // CONFIG_REFINEMV
+#if !CONFIG_SEP_COMP_DRL
       const int ref_mv_idx = mbmi->ref_mv_idx;
+#endif  // !CONFIG_SEP_COMP_DRL
       MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
       const int mode0 = compound_ref0_mode(mbmi->mode);
       const int mode1 = compound_ref1_mode(mbmi->mode);
+#if CONFIG_SEP_COMP_DRL
+      const int64_t mrd =
+          AOMMIN(args->modelled_rd[mode0][get_ref_mv_idx(mbmi, 0)][refs[0]],
+                 args->modelled_rd[mode1][get_ref_mv_idx(mbmi, 1)][refs[1]]);
+#else
       const int64_t mrd = AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
                                  args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
+#endif  // CONFIG_SEP_COMP_DRL
 
       if ((*rd >> 1) > mrd && ref_best_rd < INT64_MAX) {
         return INT64_MAX;
diff --git a/av1/encoder/interp_search.h b/av1/encoder/interp_search.h
index 8dfbe2b..d5c0a77 100644
--- a/av1/encoder/interp_search.h
+++ b/av1/encoder/interp_search.h
@@ -153,6 +153,17 @@
    * Index of the last set of saved stats in the interp_filter_stats array.
    */
   int interp_filter_stats_idx;
+#if CONFIG_SKIP_ME_FOR_OPFL_MODES
+  /*!
+   * Saved MV information for opfl off case.
+   */
+  int_mv (*comp_newmv)[4][NUM_MV_PRECISIONS][2];
+  /*!
+   * Valid status of saved MV information for opfl off case.
+   */
+  int (*comp_newmv_valid)[4][NUM_MV_PRECISIONS];
+#endif  // CONFIG_SKIP_ME_FOR_OPFL_MODES
+
 } HandleInterModeArgs;
 
 /*!\cond */
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c
index b2d10b5..d31870a 100644
--- a/av1/encoder/intra_mode_search.c
+++ b/av1/encoder/intra_mode_search.c
@@ -10,6 +10,7 @@
  * aomedia.org/license/patent-license/.
  */
 
+#include "av1/common/av1_common_int.h"
 #include "av1/common/reconintra.h"
 
 #include "av1/encoder/intra_mode_search.h"
@@ -790,12 +791,19 @@
   set_mv_precision(mbmi, mbmi->max_mv_precision);
 #endif
 
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_EXTENDED_WARP_PREDICTION
   mbmi->motion_mode = SIMPLE_TRANSLATION;
 #endif
 #if CONFIG_WARP_REF_LIST
   mbmi->warp_ref_idx = 0;
   mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
   RD_STATS rd_stats_y;
   av1_invalid_rd_stats(&rd_stats_y);
@@ -856,10 +864,15 @@
   if (skippable) {
     rate2 -= rd_stats_y.rate;
     if (num_planes > 1) rate2 -= intra_search_state->rate_uv_tokenonly;
+#if !CONFIG_SKIP_TXFM_OPT
     rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1];
-  } else {
+#endif  // !CONFIG_SKIP_TXFM_OPT
+  }
+#if !CONFIG_SKIP_TXFM_OPT
+  else {
     rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
   }
+#endif  // !CONFIG_SKIP_TXFM_OPT
   this_rd = RDCOST(x->rdmult, rate2, distortion2);
   this_rd_cost->rate = rate2;
   this_rd_cost->dist = distortion2;
@@ -948,6 +961,9 @@
 #if CONFIG_FLEX_MVRES
   set_mv_precision(mbmi, mbmi->max_mv_precision);
 #endif
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
   mbmi->motion_mode = SIMPLE_TRANSLATION;
 
   RD_STATS rd_stats_y_fi;
@@ -1026,10 +1042,20 @@
   assert(mbmi->ref_frame[0] == INTRA_FRAME);
   const PREDICTION_MODE mode = mbmi->mode;
   const ModeCosts *mode_costs = &x->mode_costs;
+
+#if CONFIG_EXT_DIR
+  int mrl_ctx = get_mrl_index_ctx(xd->neighbors[0], xd->neighbors[1]);
+  int mrl_idx_cost =
+      (av1_is_directional_mode(mbmi->mode) &&
+       cpi->common.seq_params.enable_mrls)
+          ? x->mode_costs.mrl_index_cost[mrl_ctx][mbmi->mrl_index]
+          : 0;
+#else
   int mrl_idx_cost = (av1_is_directional_mode(mbmi->mode) &&
                       cpi->common.seq_params.enable_mrls)
                          ? x->mode_costs.mrl_index_cost[mbmi->mrl_index]
                          : 0;
+#endif  // CONFIG_EXT_DIR
 #if CONFIG_AIMC
   int mode_cost = 0;
   const int context = get_y_mode_idx_ctx(xd);
@@ -1055,12 +1081,16 @@
   const int intra_cost_penalty = av1_get_intra_cost_penalty(
       cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q,
       cm->seq_params.base_y_dc_delta_q, cm->seq_params.bit_depth);
+#if !CONFIG_SKIP_TXFM_OPT
   const int skip_ctx = av1_get_skip_txfm_context(xd);
+#endif  // !CONFIG_SKIP_TXFM_OPT
 
   int known_rate = mode_cost;
   if (mode != DC_PRED && mode != PAETH_PRED) known_rate += intra_cost_penalty;
+#if !CONFIG_SKIP_TXFM_OPT
   known_rate += AOMMIN(mode_costs->skip_txfm_cost[skip_ctx][0],
                        mode_costs->skip_txfm_cost[skip_ctx][1]);
+#endif  // !CONFIG_SKIP_TXFM_OPT
   const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0);
   if (known_rd > best_rd) {
     intra_search_state->skip_intra_modes = 1;
@@ -1140,10 +1170,14 @@
 #if !CONFIG_AIMC
     if (intra_search_state->rate_uv_intra == INT_MAX) {
 #endif  // !CONFIG_AIMC
-      // If no good uv-predictor had been found, search for it.
-      const int rate_y = rd_stats_y->skip_txfm
-                             ? mode_costs->skip_txfm_cost[skip_ctx][1]
-                             : rd_stats_y->rate;
+        // If no good uv-predictor had been found, search for it.
+#if CONFIG_SKIP_TXFM_OPT
+      const int rate_y = rd_stats_y->rate;
+#else
+    const int rate_y = rd_stats_y->skip_txfm
+                           ? mode_costs->skip_txfm_cost[skip_ctx][1]
+                           : rd_stats_y->rate;
+#endif  // CONFIG_SKIP_TXFM_OPT
       const int64_t rdy =
           RDCOST(x->rdmult, rate_y + mode_cost_y, rd_stats_y->dist);
       if (best_rd < (INT64_MAX / 2) && rdy > (best_rd + (best_rd >> 2))) {
@@ -1224,8 +1258,10 @@
   // Intra block is always coded as non-skip
   rd_stats->skip_txfm = 0;
   rd_stats->dist = rd_stats_y->dist + rd_stats_uv->dist;
+#if !CONFIG_SKIP_TXFM_OPT
   // Add in the cost of the no skip flag.
   rd_stats->rate += mode_costs->skip_txfm_cost[skip_ctx][0];
+#endif  // !CONFIG_SKIP_TXFM_OPT
   // Calculate the final RD estimate for this mode.
   const int64_t this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
   // Keep record of best intra rd
@@ -1349,13 +1385,23 @@
         continue;
 
       if (!is_directional_mode && mrl_idx) continue;
+#if !CONFIG_EXT_DIR
       if (best_mbmi->mrl_index == 0 && mbmi->mrl_index > 1 &&
           av1_is_directional_mode(best_mbmi->mode) == 0) {
         continue;
       }
+#endif  // CONFIG_EXT_DIR
+#if CONFIG_EXT_DIR
+      int mrl_ctx = get_mrl_index_ctx(xd->neighbors[0], xd->neighbors[1]);
+      int mrl_idx_cost =
+          (is_directional_mode && enable_mrls_flag)
+              ? x->mode_costs.mrl_index_cost[mrl_ctx][mbmi->mrl_index]
+              : 0;
+#else
       int mrl_idx_cost = (is_directional_mode && enable_mrls_flag)
                              ? x->mode_costs.mrl_index_cost[mbmi->mrl_index]
                              : 0;
+#endif  // CONFIG_EXT_DIR
 #if CONFIG_AIMC
       mode_costs += mrl_idx_cost;
 #endif  // CONFIG_AIMC
@@ -1548,13 +1594,23 @@
         continue;
 
       if (!is_directional_mode && mrl_idx) continue;
+#if !CONFIG_EXT_DIR
       if (best_mbmi.mrl_index == 0 && mbmi->mrl_index > 1 &&
           av1_is_directional_mode(best_mbmi.mode) == 0) {
         continue;
       }
+#endif  // CONFIG_EXT_DIR
+#if CONFIG_EXT_DIR
+      int mrl_ctx = get_mrl_index_ctx(xd->neighbors[0], xd->neighbors[1]);
+      int mrl_idx_cost =
+          (is_directional_mode && enable_mrls_flag)
+              ? x->mode_costs.mrl_index_cost[mrl_ctx][mbmi->mrl_index]
+              : 0;
+#else
       int mrl_idx_cost = (is_directional_mode && enable_mrls_flag)
                              ? x->mode_costs.mrl_index_cost[mbmi->mrl_index]
                              : 0;
+#endif  // CONFIG_EXT_DIR
 #if CONFIG_AIMC
       mode_costs += mrl_idx_cost;
 #endif  // CONFIG_AIMC
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c
index 572864a..5280a23 100644
--- a/av1/encoder/mcomp.c
+++ b/av1/encoder/mcomp.c
@@ -42,7 +42,7 @@
 #if CONFIG_FLEX_MVRES
                                        ,
                                        MvSubpelPrecision pb_mv_precision
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                        ,
                                        const int is_ibc_cost
 #endif
@@ -60,7 +60,7 @@
   mv_cost_params->is_adaptive_mvd = is_adaptive_mvd;
 #endif  // CONFIG_ADAPTIVE_MVD
 
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   mv_cost_params->is_ibc_cost = is_ibc_cost;
 #endif
 
@@ -119,7 +119,7 @@
     const MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv,
 #if CONFIG_FLEX_MVRES
     const MvSubpelPrecision pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
     const int is_ibc_cost,
 #endif
 #endif
@@ -136,6 +136,10 @@
       enable_adaptive_mvd_resolution(&cpi->common, mbmi);
 #endif  // CONFIG_ADAPTIVE_MVD
 
+#if CONFIG_CWP
+  ms_params->xd = xd;
+#endif  // CONFIG_CWP
+
   // High level params
   ms_params->bsize = bsize;
   ms_params->vfp = &cpi->fn_ptr[bsize];
@@ -143,12 +147,16 @@
   init_ms_buffers(&ms_params->ms_buffers, x);
 
   SEARCH_METHODS search_method = mv_sf->search_method;
+  const int min_dim = AOMMIN(block_size_wide[bsize], block_size_high[bsize]);
+  const int max_dim = AOMMAX(block_size_wide[bsize], block_size_high[bsize]);
   if (mv_sf->use_bsize_dependent_search_method) {
-    const int min_dim = AOMMIN(block_size_wide[bsize], block_size_high[bsize]);
     if (min_dim >= 32) {
       search_method = get_faster_search_method(search_method);
     }
   }
+  if (max_dim >= 256) {
+    search_method = get_faster_search_method(search_method);
+  }
 #if CONFIG_FLEX_MVRES
   // MV search of flex MV precision is supported only for NSTEP or DIAMOND
   // search
@@ -209,7 +217,7 @@
 #endif  // CONFIG_ADAPTIVE_MVD
 #if CONFIG_FLEX_MVRES
                       ref_mv, pb_mv_precision
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                       ,
                       is_ibc_cost
 #endif
@@ -237,7 +245,7 @@
   ms_params->allow_hp = cm->features.allow_high_precision_mv;
 #endif
 
-#if CONFIG_BVCOST_UPDATE && CONFIG_FLEX_MVRES
+#if CONFIG_IBC_BV_IMPROVEMENT && CONFIG_FLEX_MVRES
   const int is_ibc_cost = 0;
 #endif
 
@@ -287,7 +295,7 @@
 #if CONFIG_FLEX_MVRES
                       ref_mv, pb_mv_precision
 
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                       ,
                       is_ibc_cost
 #endif
@@ -303,6 +311,10 @@
   ms_params->var_params.subpel_search_type =
       cpi->sf.mv_sf.use_accurate_subpel_search;
 #endif
+  if (AOMMAX(block_size_wide[bsize], block_size_high[bsize]) >= 256) {
+    ms_params->var_params.subpel_search_type =
+        AOMMIN(ms_params->var_params.subpel_search_type, USE_2_TAPS);
+  }
 
   ms_params->var_params.w = block_size_wide[bsize];
   ms_params->var_params.h = block_size_high[bsize];
@@ -471,7 +483,7 @@
 #if CONFIG_ADAPTIVE_MVD
     const int is_adaptive_mvd,
 #endif
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
     const int is_ibc_cost,
 #endif
     const MvCosts *mv_costs, int weight, int round_bits) {
@@ -480,7 +492,7 @@
   const int *mvjcost =
       is_adaptive_mvd
           ? mv_costs->amvd_nmv_joint_cost
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
           : (is_ibc_cost ? mv_costs->dv_joint_cost : mv_costs->nmv_joint_cost);
 #else
           : mv_costs->nmv_joint_cost;
@@ -488,7 +500,7 @@
   const int *const *mvcost =
       is_adaptive_mvd
           ? CONVERT_TO_CONST_MVCOST(mv_costs->amvd_nmv_cost)
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
           : (is_ibc_cost ? CONVERT_TO_CONST_MVCOST(mv_costs->dv_nmv_cost)
                          : CONVERT_TO_CONST_MVCOST(
                                mv_costs->nmv_costs[pb_mv_precision]));
@@ -497,7 +509,7 @@
 #endif
 
 #else
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int *mvjcost =
       (is_ibc_cost ? mv_costs->dv_joint_cost : mv_costs->nmv_joint_cost);
   const int *const *mvcost =
@@ -556,7 +568,7 @@
                     const int is_adaptive_mvd
 #endif
 ) {
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   // For ibc block this function should not be called
   const int is_ibc_cost = 0;
 #endif
@@ -565,7 +577,7 @@
 #if CONFIG_ADAPTIVE_MVD
                                     is_adaptive_mvd,
 #endif
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                     is_ibc_cost,
 #endif
                                     mv_costs, weight, 7);
@@ -618,7 +630,7 @@
 #if CONFIG_ADAPTIVE_MVD
           mv_cost_params->is_adaptive_mvd,
 #endif
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
           mv_cost_params->is_ibc_cost,
 #endif
           mv_costs, mv_costs->errorperbit,
@@ -677,7 +689,7 @@
 
   const MvCosts *mv_costs = mv_cost_params->mv_costs;
 
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int *mvjcost =
       mv_cost_params->is_ibc_cost
           ? mv_costs->dv_joint_cost
@@ -1062,7 +1074,7 @@
          ((col + range) <= mv_limits->col_max);
 }
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
 int av1_get_mv_err_cost(const MV *mv, const MV_COST_PARAMS *mv_cost_params) {
 #if CONFIG_FLEX_MVRES
   return mv_err_cost(*mv, mv_cost_params);
@@ -1073,7 +1085,7 @@
                      mv_cost_params->mv_cost_type);
 #endif
 }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 static INLINE int get_mvpred_var_cost(
     const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV *this_mv) {
@@ -1170,6 +1182,17 @@
   return bestsme;
 }
 
+#if CONFIG_CWP
+// Set weighting factor for two reference frames
+static INLINE void set_cmp_weight(const MB_MODE_INFO *mi, int invert_mask,
+                                  DIST_WTD_COMP_PARAMS *jcp_param) {
+  int weight = get_cwp_idx(mi);
+  weight = invert_mask ? (1 << CWP_WEIGHT_BITS) - weight : weight;
+  jcp_param->fwd_offset = weight;
+  jcp_param->bck_offset = (1 << CWP_WEIGHT_BITS) - weight;
+}
+#endif  // CONFIG_CWP
+
 static INLINE int get_mvpred_compound_sad(
     const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
     const struct buf_2d *const src, const uint16_t *const ref_address,
@@ -1187,6 +1210,16 @@
     return vfp->msdf(src_buf, src_stride, ref_address, ref_stride, second_pred,
                      mask, mask_stride, invert_mask);
   } else if (second_pred) {
+#if CONFIG_CWP
+    const MB_MODE_INFO *mi = ms_params->xd->mi[0];
+    if (get_cwp_idx(mi) != CWP_EQUAL) {
+      DIST_WTD_COMP_PARAMS jcp_param;
+      set_cmp_weight(mi, invert_mask, &jcp_param);
+
+      return vfp->jsdaf(src_buf, src_stride, ref_address, ref_stride,
+                        second_pred, &jcp_param);
+    }
+#endif  // CONFIG_CWP
     return vfp->sdaf(src_buf, src_stride, ref_address, ref_stride, second_pred);
   } else {
     return ms_params->sdf(src_buf, src_stride, ref_address, ref_stride);
@@ -2804,7 +2837,28 @@
   return var;
 }
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_CWP
+// Get the cost for compound weighted prediction
+int av1_get_cwp_idx_cost(int8_t cwp_idx, const AV1_COMMON *const cm,
+                         const MACROBLOCK *x) {
+  assert(cwp_idx >= CWP_MIN && cwp_idx <= CWP_MAX);
+  const MACROBLOCKD *xd = &x->e_mbd;
+  MB_MODE_INFO *mi = xd->mi[0];
+  int cost = 0;
+  int bit_cnt = 0;
+  const int ctx = 0;
+
+  const int8_t final_idx = get_cwp_coding_idx(cwp_idx, 1, cm, mi);
+  for (int idx = 0; idx < MAX_CWP_NUM - 1; ++idx) {
+    cost += x->mode_costs.cwp_idx_cost[ctx][bit_cnt][final_idx != idx];
+    if (final_idx == idx) return cost;
+    ++bit_cnt;
+  }
+  return cost;
+}
+#endif  // CONFIG_CWP
+
+#if CONFIG_IBC_BV_IMPROVEMENT
 int av1_get_ref_mvpred_var_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd,
                                 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params) {
   const BLOCK_SIZE bsize = ms_params->bsize;
@@ -2946,7 +3000,7 @@
   }
   return INT_MAX;
 }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 int av1_intrabc_hash_search(const AV1_COMP *cpi, const MACROBLOCKD *xd,
                             const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
@@ -2973,13 +3027,13 @@
 
   uint32_t hash_value1, hash_value2;
   int best_hash_cost = INT_MAX;
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   int best_intrabc_mode = 0;
   int best_intrabc_drl_idx = 0;
   int_mv best_ref_bv;
   best_ref_bv.as_mv = *ms_params->mv_cost_params.ref_mv;
   MB_MODE_INFO *mbmi = xd->mi[0];
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
   // for the hashMap
   hash_table *ref_frame_hash = &intrabc_hash_info->intrabc_hash_table;
@@ -3014,7 +3068,7 @@
 #endif
                                   ))
         continue;
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
       int refCost = get_mvpred_var_cost(ms_params, &hash_mv);
       int cur_intrabc_mode = 0;
       int cur_intrabc_drl_idx = 0;
@@ -3035,24 +3089,24 @@
       }
 #else
       const int refCost = get_mvpred_var_cost(ms_params, &hash_mv);
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
       if (refCost < best_hash_cost) {
         best_hash_cost = refCost;
         *best_mv = hash_mv;
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
         best_intrabc_mode = cur_intrabc_mode;
         best_intrabc_drl_idx = cur_intrabc_drl_idx;
         best_ref_bv = cur_ref_bv;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
       }
     }
   }
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   mbmi->ref_bv = best_ref_bv;
   mbmi->intrabc_drl_idx = best_intrabc_drl_idx;
   mbmi->intrabc_mode = best_intrabc_mode;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
   return best_hash_cost;
 }
@@ -3429,9 +3483,22 @@
           subpel_y_q3, ref, ref_stride, mask, mask_stride, invert_mask, xd->bd,
           subpel_search_type);
     } else {
-      aom_highbd_comp_avg_upsampled_pred(
-          xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h, subpel_x_q3,
-          subpel_y_q3, ref, ref_stride, xd->bd, subpel_search_type);
+#if CONFIG_CWP
+      if (get_cwp_idx(xd->mi[0]) != CWP_EQUAL) {
+        DIST_WTD_COMP_PARAMS jcp_param;
+        set_cmp_weight(xd->mi[0], invert_mask, &jcp_param);
+
+        aom_highbd_dist_wtd_comp_avg_upsampled_pred(
+            xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h,
+            subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd, &jcp_param,
+            subpel_search_type);
+      } else
+#endif  // CONFIG_CWP
+
+        aom_highbd_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv,
+                                           pred, second_pred, w, h, subpel_x_q3,
+                                           subpel_y_q3, ref, ref_stride, xd->bd,
+                                           subpel_search_type);
     }
   } else {
     aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h,
@@ -3914,18 +3981,11 @@
 #endif  // CONFIG_C071_SUBBLK_WARPMV
     lower_mv_precision(&ref_mv, mbmi->pb_mv_precision);
     // We are not signaling other_mv. So frame level precision should be okay.
-#if !CONFIG_C071_SUBBLK_WARPMV
-  lower_mv_precision(other_mv, cm->features.fr_mv_precision);
-#endif  // !CONFIG_C071_SUBBLK_WARPMV
 #else
-#if !CONFIG_C071_SUBBLK_WARPMV
-  lower_mv_precision(other_mv, allow_hp,
-                     cm->features.cur_frame_force_integer_mv);
-#endif  // !CONFIG_C071_SUBBLK_WARPMV
 #endif
 
-  // How many steps to take. A round of 0 means fullpel search only, 1 means
-  // half-pel, and so on.
+    // How many steps to take. A round of 0 means fullpel search only, 1 means
+    // half-pel, and so on.
 #if CONFIG_FLEX_MVRES
   const int round = (mbmi->pb_mv_precision >= MV_PRECISION_ONE_PEL)
                         ? AOMMIN(FULL_PEL - forced_stop,
@@ -4163,10 +4223,7 @@
   if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL)
 #endif
     lower_mv_precision(&ref_mv, mbmi->pb_mv_precision);
-    // We are not signaling other_mv. So frame level precision should be okay.
-#if !CONFIG_C071_SUBBLK_WARPMV
-  lower_mv_precision(other_mv, cm->features.fr_mv_precision);
-#endif  // CONFIG_C071_SUBBLK_WARPMV
+  // We are not signaling other_mv. So frame level precision should be okay.
 
   unsigned int besterr = INT_MAX;
 
@@ -4392,23 +4449,10 @@
   // perform prediction for second MV
   const BLOCK_SIZE bsize = mbmi->sb_type[PLANE_TYPE_Y];
 
-#if CONFIG_FLEX_MVRES
 #if BUGFIX_AMVD_AMVR
   set_amvd_mv_precision(mbmi, mbmi->max_mv_precision);
-#if !CONFIG_C071_SUBBLK_WARPMV
-  lower_mv_precision(other_mv, cm->features.fr_mv_precision);
-#endif  // !CONFIG_C071_SUBBLK_WARPMV
 #else
   assert(mbmi->pb_mv_precision == mbmi->max_mv_precision);
-#if !CONFIG_C071_SUBBLK_WARPMV
-  lower_mv_precision(other_mv, mbmi->pb_mv_precision);
-#endif  // !CONFIG_C071_SUBBLK_WARPMV
-#endif
-#else
-#if !CONFIG_C071_SUBBLK_WARPMV
-  lower_mv_precision(other_mv, allow_hp,
-                     cm->features.cur_frame_force_integer_mv);
-#endif  // !CONFIG_C071_SUBBLK_WARPMV
 #endif
 
   // How many steps to take. A round of 0 means fullpel search only, 1 means
@@ -5581,7 +5625,15 @@
   int mi_row = xd->mi_row;
   int mi_col = xd->mi_col;
 
-  bool can_refine_mv = (mbmi->mode == NEWMV);
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  assert(IMPLIES(mbmi->warpmv_with_mvd_flag, mbmi->mode == WARPMV));
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
+  bool can_refine_mv = (mbmi->mode == NEWMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                        || (mbmi->mode == WARPMV && mbmi->warpmv_with_mvd_flag)
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+  );
   const SubpelMvLimits *mv_limits = &ms_params->mv_limits;
 
   // get the base parameters
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h
index e6571b2..2e57f5e 100644
--- a/av1/encoder/mcomp.h
+++ b/av1/encoder/mcomp.h
@@ -92,7 +92,7 @@
 #if CONFIG_ADAPTIVE_MVD
   int is_adaptive_mvd;
 #endif  // CONFIG_ADAPTIVE_MVD
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   int is_ibc_cost;
 #endif
 #endif
@@ -217,10 +217,10 @@
   int mi_row;
   int mi_col;
 #endif  // CONFIG_IBC_SR_EXT
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   MACROBLOCK *x;
   int ref_bv_cnt;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
   MSBuffers ms_buffers;
 
@@ -261,7 +261,7 @@
     const MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv,
 #if CONFIG_FLEX_MVRES
     const MvSubpelPrecision pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
     const int is_ibc_cost,
 #endif
 #endif
@@ -577,9 +577,6 @@
   int row_max =
       av1_lower_mv_limit(GET_MV_SUBPEL(mv_limits->row_max), sub_pel_prec_shift);
 
-  const int mv_low = av1_lower_mv_limit(MV_LOW + 1, sub_pel_prec_shift);
-  const int mv_upp = av1_lower_mv_limit(MV_UPP - 1, sub_pel_prec_shift);
-
   int minc = AOMMAX(col_min, low_prec_ref_mv.col - max_mv);
   int maxc = AOMMIN(col_max, low_prec_ref_mv.col + max_mv);
   int minr = AOMMAX(row_min, low_prec_ref_mv.row - max_mv);
@@ -588,10 +585,10 @@
   maxc = AOMMAX(minc, maxc);
   maxr = AOMMAX(minr, maxr);
 
-  subpel_limits->col_min = AOMMAX(mv_low + (1 << sub_pel_prec_shift), minc);
-  subpel_limits->col_max = AOMMIN(mv_upp - (1 << sub_pel_prec_shift), maxc);
-  subpel_limits->row_min = AOMMAX(mv_low + (1 << sub_pel_prec_shift), minr);
-  subpel_limits->row_max = AOMMIN(mv_upp - (1 << sub_pel_prec_shift), maxr);
+  subpel_limits->col_min = AOMMAX(MV_LOW + (1 << sub_pel_prec_shift), minc);
+  subpel_limits->col_max = AOMMIN(MV_UPP - (1 << sub_pel_prec_shift), maxc);
+  subpel_limits->row_min = AOMMAX(MV_LOW + (1 << sub_pel_prec_shift), minr);
+  subpel_limits->row_max = AOMMIN(MV_UPP - (1 << sub_pel_prec_shift), maxr);
 #else
 
   const int max_mv = GET_MV_SUBPEL(MAX_FULL_PEL_VAL);
@@ -628,7 +625,13 @@
          (mv.row >= mv_limits->row_min) && (mv.row <= mv_limits->row_max);
 }
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_CWP
+// Returns the cost for signaling the index of compound weighted prediction
+int av1_get_cwp_idx_cost(int8_t cwp_idx, const AV1_COMMON *const cm,
+                         const MACROBLOCK *x);
+#endif  // CONFIG_CWP
+
+#if CONFIG_IBC_BV_IMPROVEMENT
 // Returns the cost of using the current mv during the motion search
 int av1_get_mv_err_cost(const MV *mv, const MV_COST_PARAMS *mv_cost_params);
 
@@ -654,7 +657,7 @@
 int av1_get_ref_mvpred_var_cost(const struct AV1_COMP *cpi,
                                 const MACROBLOCKD *xd,
                                 const FULLPEL_MOTION_SEARCH_PARAMS *ms_params);
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c
index 9b72201..b6481a4 100644
--- a/av1/encoder/motion_search_facade.c
+++ b/av1/encoder/motion_search_facade.c
@@ -50,12 +50,23 @@
 void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
                               BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
                               int search_range, inter_mode_info *mode_info,
-                              int_mv *best_mv) {
+                              int_mv *best_mv
+#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+                              ,
+                              const int_mv *warp_ref_mv
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+) {
   MACROBLOCKD *xd = &x->e_mbd;
   const AV1_COMMON *cm = &cpi->common;
   const MotionVectorSearchParams *mv_search_params = &cpi->mv_search_params;
   const int num_planes = av1_num_planes(cm);
   MB_MODE_INFO *mbmi = xd->mi[0];
+#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+  MOTION_MODE backup_motion_mode = mbmi->motion_mode;
+  // Make the motion mode transalational, so that transalation MS can be used.
+  if (mbmi->mode == WARPMV) mbmi->motion_mode = SIMPLE_TRANSLATION;
+#endif  // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+
   struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
   int bestsme = INT_MAX;
   const int ref = mbmi->ref_frame[ref_idx];
@@ -97,7 +108,11 @@
   }
 
 #if CONFIG_FLEX_MVRES
-  MV ref_mv_low_prec = av1_get_ref_mv(x, ref_idx).as_mv;
+  MV ref_mv_low_prec =
+#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+      (mbmi->mode == WARPMV) ? warp_ref_mv->as_mv :
+#endif  // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+                             av1_get_ref_mv(x, ref_idx).as_mv;
 #if CONFIG_C071_SUBBLK_WARPMV
   MV sub_mv_offset = { 0, 0 };
   get_phase_from_mv(ref_mv_low_prec, &sub_mv_offset, mbmi->pb_mv_precision);
@@ -229,7 +244,7 @@
       mv_search_params->search_site_cfg[SS_CFG_SRC];
 #if CONFIG_FLEX_MVRES
   const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision;
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int is_ibc_cost = 0;
 #endif
 #endif
@@ -238,7 +253,7 @@
 #if CONFIG_FLEX_MVRES
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
                                      pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                      is_ibc_cost,
 #endif
                                      src_search_sites, fine_search_interval);
@@ -305,6 +320,9 @@
   //     for the other ref_mv.
   if (cpi->sf.inter_sf.skip_repeated_full_newmv &&
       mbmi->motion_mode == SIMPLE_TRANSLATION &&
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      mbmi->mode != WARPMV &&
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
       best_mv->as_int != INVALID_MV) {
     int_mv this_mv;
     this_mv.as_mv = get_mv_from_fullmv(&best_mv->as_fullmv);
@@ -320,7 +338,11 @@
       this_mv.as_mv.col += sub_mv_offset.col;
     }
 #endif  // CONFIG_C071_SUBBLK_WARPMV
+#if CONFIG_SEP_COMP_DRL
+    const int ref_mv_idx = av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx);
+#else
     const int ref_mv_idx = mbmi->ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
 #if CONFIG_FLEX_MVRES
     const int this_mv_rate = av1_mv_bit_cost(
         &this_mv.as_mv, &ref_mv, pb_mv_precision, mv_costs, MV_COST_WEIGHT
@@ -477,6 +499,10 @@
   assert(is_this_mv_precision_compliant(best_mv->as_mv, mbmi->pb_mv_precision));
 #endif  // !CONFIG_C071_SUBBLK_WARPMV
 #endif
+#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+  // Restore the motion mode
+  if (mbmi->mode == WARPMV) mbmi->motion_mode = backup_motion_mode;
+#endif  // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
 }
 
 #if CONFIG_FLEX_MVRES
@@ -531,13 +557,13 @@
     lower_mv_precision(&ref_mv_low_prec, mbmi->pb_mv_precision);
   const MV ref_mv = ref_mv_low_prec;
 
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int is_ibc_cost = 0;
 #endif
 
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
                                      pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                      is_ibc_cost,
 #endif
 
@@ -579,7 +605,11 @@
       this_mv.as_mv.col += sub_mv_offset.col;
     }
 #endif  // CONFIG_C071_SUBBLK_WARPMV
+#if CONFIG_SEP_COMP_DRL
+    const int ref_mv_idx = av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx);
+#else
     const int ref_mv_idx = mbmi->ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
     const int this_mv_rate = av1_mv_bit_cost(
         &this_mv.as_mv, &ref_mv, pb_mv_precision, mv_costs, MV_COST_WEIGHT
 #if CONFIG_ADAPTIVE_MVD
@@ -763,7 +793,7 @@
     // Do full-pixel compound motion search on the current reference frame.
     if (id) xd->plane[plane].pre[0] = ref_yv12[id];
 
-#if CONFIG_FLEX_MVRES && CONFIG_BVCOST_UPDATE
+#if CONFIG_FLEX_MVRES && CONFIG_IBC_BV_IMPROVEMENT
     const int is_ibc_cost = 0;
 #endif
 
@@ -773,7 +803,7 @@
                                        &ref_mv[id].as_mv,
 #if CONFIG_FLEX_MVRES
                                        pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                        is_ibc_cost,
 #endif
 #endif
@@ -1020,7 +1050,7 @@
   const MvCosts *mv_costs = &x->mv_costs;
 #if CONFIG_FLEX_MVRES
   MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision;
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int is_ibc_cost = 0;
 #endif
 #endif
@@ -1178,7 +1208,7 @@
                                        &ref_mv.as_mv,
 #if CONFIG_FLEX_MVRES
                                        pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                        is_ibc_cost,
 #endif
 #endif
@@ -1485,6 +1515,9 @@
   mbmi->use_intrabc[1] = 0;
 #endif  // CONFIG_IBC_SR_EXT
 
+#if CONFIG_CWP
+  mbmi->cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
 #if CONFIG_FLEX_MVRES
   set_default_max_mv_precision(mbmi, xd->sbi->sb_mv_precision);
   set_mv_precision(mbmi, mbmi->max_mv_precision);
@@ -1500,6 +1533,10 @@
   mbmi->bawp_flag = 0;
 #endif
 
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref);
   const YV12_BUFFER_CONFIG *scaled_ref_frame =
       av1_get_scaled_ref_frame(cpi, ref);
@@ -1530,7 +1567,7 @@
   const int fine_search_interval = use_fine_search_interval(cpi);
 #if CONFIG_FLEX_MVRES
   const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision;
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int is_ibc_cost = 0;
 #endif
 #endif
@@ -1539,7 +1576,7 @@
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
 #if CONFIG_FLEX_MVRES
                                      pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                      is_ibc_cost,
 #endif
 #endif
@@ -1677,7 +1714,7 @@
 
 #if CONFIG_FLEX_MVRES
   const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision;
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int is_ibc_cost = 0;
 #endif
 #endif
@@ -1686,7 +1723,7 @@
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv,
 #if CONFIG_FLEX_MVRES
                                      pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                      is_ibc_cost,
 #endif
 #endif
diff --git a/av1/encoder/motion_search_facade.h b/av1/encoder/motion_search_facade.h
index e116997..066c438 100644
--- a/av1/encoder/motion_search_facade.h
+++ b/av1/encoder/motion_search_facade.h
@@ -35,7 +35,13 @@
 void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
                               BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
                               int search_range, inter_mode_info *mode_info,
-                              int_mv *best_mv);
+                              int_mv *best_mv
+#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+                              ,
+                              const int_mv *warp_ref_mv
+#endif  // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+);
+
 #if CONFIG_FLEX_MVRES
 void av1_single_motion_search_high_precision(const AV1_COMP *const cpi,
                                              MACROBLOCK *x, BLOCK_SIZE bsize,
diff --git a/av1/encoder/mv_prec.c b/av1/encoder/mv_prec.c
index 2ed092d..2b0f175 100644
--- a/av1/encoder/mv_prec.c
+++ b/av1/encoder/mv_prec.c
@@ -28,23 +28,42 @@
 static AOM_INLINE int_mv get_ref_mv_for_mv_stats(
     const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame,
     int ref_idx) {
+#if CONFIG_SEP_COMP_DRL
+  const int ref_mv_idx = get_ref_mv_idx(mbmi, ref_idx);
+#else
   const int ref_mv_idx = mbmi->ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
   assert(IMPLIES(have_nearmv_newmv_in_inter_mode(mbmi->mode),
                  has_second_ref(mbmi)));
 
   const MV_REFERENCE_FRAME *ref_frames = mbmi->ref_frame;
   const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
+#if CONFIG_SEP_COMP_DRL
+  const CANDIDATE_MV *curr_ref_mv_stack =
+      has_second_drl(mbmi) ? mbmi_ext_frame->ref_mv_stack[ref_idx]
+                           : mbmi_ext_frame->ref_mv_stack[0];
+#else
   const CANDIDATE_MV *curr_ref_mv_stack = mbmi_ext_frame->ref_mv_stack;
+#endif  // CONFIG_SEP_COMP_DRL
 
   if (is_inter_ref_frame(ref_frames[1])) {
     assert(ref_idx == 0 || ref_idx == 1);
+#if CONFIG_SEP_COMP_DRL
+    return ref_idx && !has_second_drl(mbmi)
+               ? curr_ref_mv_stack[ref_mv_idx].comp_mv
+#else
     return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv
-                   : curr_ref_mv_stack[ref_mv_idx].this_mv;
+#endif  // CONFIG_SEP_COMP_DRL
+               : curr_ref_mv_stack[ref_mv_idx].this_mv;
   }
 
   assert(ref_idx == 0);
 #if CONFIG_TIP
+#if CONFIG_SEP_COMP_DRL
+  if (ref_mv_idx < mbmi_ext_frame->ref_mv_count[0]) {
+#else
   if (ref_mv_idx < mbmi_ext_frame->ref_mv_count) {
+#endif  // CONFIG_SEP_COMP_DRL
     return curr_ref_mv_stack[ref_mv_idx].this_mv;
   } else if (is_tip_ref_frame(ref_frame_type)) {
     int_mv zero_mv;
@@ -629,10 +648,14 @@
 
   const int hbs_w = mi_size_wide[bsize] / 2;
   const int hbs_h = mi_size_high[bsize] / 2;
-#if !CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+  const int ebs_w = mi_size_wide[bsize] / 8;
+  const int ebs_h = mi_size_high[bsize] / 8;
+#endif  // CONFIG_UNEVEN_4WAY
+#if !CONFIG_EXT_RECUR_PARTITIONS
   const int qbs_w = mi_size_wide[bsize] / 4;
   const int qbs_h = mi_size_high[bsize] / 4;
-#endif  // !CONFIG_H_PARTITION
+#endif  // !CONFIG_EXT_RECUR_PARTITIONS
   switch (partition) {
     case PARTITION_NONE:
       collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col);
@@ -670,7 +693,68 @@
                           subsize, ptree->sub_tree[3]);
       break;
 #if CONFIG_EXT_RECUR_PARTITIONS
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize,
+                          ptree->sub_tree[0]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row + ebs_h, mi_col, bsize_med,
+                          ptree->sub_tree[1]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row + 3 * ebs_h, mi_col, bsize_big,
+                          ptree->sub_tree[2]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row + 7 * ebs_h, mi_col, subsize,
+                          ptree->sub_tree[3]);
+      break;
+    }
+    case PARTITION_HORZ_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize,
+                          ptree->sub_tree[0]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row + ebs_h, mi_col, bsize_big,
+                          ptree->sub_tree[1]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row + 5 * ebs_h, mi_col, bsize_med,
+                          ptree->sub_tree[2]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row + 7 * ebs_h, mi_col, subsize,
+                          ptree->sub_tree[3]);
+      break;
+    }
+    case PARTITION_VERT_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize,
+                          ptree->sub_tree[0]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + ebs_w, bsize_med,
+                          ptree->sub_tree[1]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 3 * ebs_w, bsize_big,
+                          ptree->sub_tree[2]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 7 * ebs_w, subsize,
+                          ptree->sub_tree[3]);
+      break;
+    }
+    case PARTITION_VERT_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize,
+                          ptree->sub_tree[0]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + ebs_w, bsize_big,
+                          ptree->sub_tree[1]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 5 * ebs_w, bsize_med,
+                          ptree->sub_tree[2]);
+      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 7 * ebs_w, subsize,
+                          ptree->sub_tree[3]);
+      break;
+    }
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_HORZ_3:
     case PARTITION_VERT_3: {
       for (int i = 0; i < 4; ++i) {
@@ -686,28 +770,6 @@
       }
       break;
     }
-#else
-    case PARTITION_HORZ_3: {
-      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize,
-                          ptree->sub_tree[0]);
-      collect_mv_stats_sb(mv_stats, cpi, mi_row + qbs_h, mi_col,
-                          get_partition_subsize(bsize, PARTITION_HORZ),
-                          ptree->sub_tree[1]);
-      collect_mv_stats_sb(mv_stats, cpi, mi_row + 3 * qbs_h, mi_col, subsize,
-                          ptree->sub_tree[2]);
-      break;
-    }
-    case PARTITION_VERT_3: {
-      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize,
-                          ptree->sub_tree[0]);
-      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + qbs_w,
-                          get_partition_subsize(bsize, PARTITION_VERT),
-                          ptree->sub_tree[1]);
-      collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 3 * qbs_w, subsize,
-                          ptree->sub_tree[2]);
-      break;
-    }
-#endif  // CONFIG_H_PARTITION
 #else   // CONFIG_EXT_RECUR_PARTITIONS
     case PARTITION_HORZ_A:
       collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col);
@@ -788,14 +850,22 @@
   }
 
   mv_stats->q = current_q;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  mv_stats->order = cpi->common.current_frame.display_order_hint;
+#else
   mv_stats->order = cpi->common.current_frame.order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   mv_stats->valid = 1;
 }
 
 static AOM_INLINE int get_smart_mv_prec(AV1_COMP *cpi, const MV_STATS *mv_stats,
                                         int current_q) {
   const AV1_COMMON *cm = &cpi->common;
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+  const int order_hint = cpi->common.current_frame.display_order_hint;
+#else
   const int order_hint = cpi->common.current_frame.order_hint;
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
   const int order_diff = order_hint - mv_stats->order;
   aom_clear_system_state();
   const float area = (float)(cm->width * cm->height);
diff --git a/av1/encoder/palette.c b/av1/encoder/palette.c
index efeda05..0d963ab 100644
--- a/av1/encoder/palette.c
+++ b/av1/encoder/palette.c
@@ -87,7 +87,7 @@
   int n_in_cache = 0;
   int in_cache_flags[PALETTE_MAX_SIZE];
   memset(in_cache_flags, 0, sizeof(in_cache_flags));
-#if CONFIG_INDEP_PALETTE_PARSING
+#if CONFIG_PALETTE_IMPROVEMENTS
   for (int i = 0; i < n_cache; ++i) {
     int duplicate = 0;
     for (int j = 0; j < i; j++) {
@@ -96,7 +96,7 @@
     if (duplicate) continue;
 #else
   for (int i = 0; i < n_cache && n_in_cache < n_colors; ++i) {
-#endif  // CONFIG_INDEP_PALETTE_PARSING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
     for (int j = 0; j < n_colors; ++j) {
       if (colors[j] == color_cache[i]) {
         in_cache_flags[j] = 1;
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c
index f62672c..ca6a45a 100644
--- a/av1/encoder/partition_search.c
+++ b/av1/encoder/partition_search.c
@@ -417,8 +417,13 @@
                           pd->subsampling_x, pd->subsampling_y);
         }
         mismatch_record_block_pre(pd->dst.buf, pd->dst.stride,
-                                  cm->current_frame.order_hint, plane, pixel_c,
-                                  pixel_r, pd->width, pd->height);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                                  cm->current_frame.display_order_hint,
+#else
+                                  cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+                                  plane, pixel_c, pixel_r, pd->width,
+                                  pd->height);
       }
     }
 #else
@@ -491,15 +496,15 @@
       if (intra_tx_size != max_txsize_rect_lookup[bsize])
         ++x->txfm_search_info.txb_split_count;
     }
-#if CONFIG_REF_MV_BANK && !CONFIG_C043_MVP_IMPROVEMENTS
-#if CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT
+#if CONFIG_REF_MV_BANK && !CONFIG_MVP_IMPROVEMENT
+#if CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT
     if (cm->seq_params.enable_refmvbank && is_inter &&
         !is_intrabc_block(mbmi, xd->tree_type))
 #else
     if (cm->seq_params.enable_refmvbank && is_inter)
-#endif  // CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT
       av1_update_ref_mv_bank(cm, xd, mbmi);
-#endif  // CONFIG_REF_MV_BANK && !CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_REF_MV_BANK && !CONFIG_MVP_IMPROVEMENT
 
 #if CONFIG_WARP_REF_LIST && !WARP_CU_BANK
     if (is_inter) av1_update_warp_param_bank(cm, xd, mbmi);
@@ -534,12 +539,7 @@
       is_cfl_allowed(xd)) {
 #if CONFIG_ADAPTIVE_DS_FILTER
     cfl_store_block(xd, mbmi->sb_type[xd->tree_type == CHROMA_PART],
-                    mbmi->tx_size,
-#if DS_FRAME_LEVEL
-                    cm->features.ds_filter_type);
-#else
-                    cm->seq_params.enable_cfl_ds_filter);
-#endif  // DS_FRAME_LEVEL
+                    mbmi->tx_size, cm->seq_params.enable_cfl_ds_filter);
 #else
     cfl_store_block(xd, mbmi->sb_type[xd->tree_type == CHROMA_PART],
                     mbmi->tx_size);
@@ -742,16 +742,16 @@
     rd_cost->rate = ctx->rd_stats.rate;
     rd_cost->dist = ctx->rd_stats.dist;
     rd_cost->rdcost = ctx->rd_stats.rdcost;
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
     const int is_inter = is_inter_block(&ctx->mic, xd->tree_type);
-#if CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT
     if (cm->seq_params.enable_refmvbank && is_inter &&
         !is_intrabc_block(&ctx->mic, xd->tree_type))
 #else
     if (cm->seq_params.enable_refmvbank && is_inter)
-#endif  // CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT
       av1_update_ref_mv_bank(cm, xd, &ctx->mic);
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if WARP_CU_BANK
     if (is_inter) av1_update_warp_param_bank(cm, xd, &ctx->mic);
 #endif  // WARP_CU_BANK
@@ -796,6 +796,9 @@
     p[i].qcoeff = ctx->qcoeff[i];
     p[i].dqcoeff = ctx->dqcoeff[i];
     p[i].eobs = ctx->eobs[i];
+#if CONFIG_ATC_DCTX_ALIGNED
+    p[i].bobs = ctx->bobs[i];
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
   }
 
@@ -827,7 +830,6 @@
     start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
 #endif
     av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost);
-
 #if CONFIG_COLLECT_COMPONENT_TIMING
     end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
 #endif
@@ -847,16 +849,16 @@
 #endif
   }
 
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
   const int is_inter = is_inter_block(mbmi, xd->tree_type);
-#if CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT
   if (cm->seq_params.enable_refmvbank && is_inter &&
       !is_intrabc_block(mbmi, xd->tree_type))
 #else
   if (cm->seq_params.enable_refmvbank && is_inter)
-#endif  // CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT
     av1_update_ref_mv_bank(cm, xd, mbmi);
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 
 #if WARP_CU_BANK
   if (is_inter) av1_update_warp_param_bank(cm, xd, mbmi);
@@ -898,6 +900,34 @@
   if (mbmi->mode == AMVDNEWMV) max_drl_bits = AOMMIN(max_drl_bits, 1);
 #endif  // IMPROVED_AMVD
   uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+#if CONFIG_SEP_COMP_DRL
+  assert(mbmi->ref_mv_idx[0] < max_drl_bits + 1);
+  assert(mbmi->ref_mv_idx[1] < max_drl_bits + 1);
+  for (int ref = 0; ref < 1 + has_second_drl(mbmi); ++ref) {
+    for (int idx = 0; idx < max_drl_bits; ++idx) {
+      const uint16_t *weight = has_second_drl(mbmi)
+                                   ? mbmi_ext->weight[mbmi->ref_frame[ref]]
+                                   : mbmi_ext->weight[ref_frame_type];
+      aom_cdf_prob *drl_cdf = av1_get_drl_cdf(fc, weight, mode_ctx, idx);
+#if CONFIG_ENTROPY_STATS
+      int drl_ctx = av1_drl_ctx(mode_ctx);
+      switch (idx) {
+        case 0:
+          counts->drl_mode[0][drl_ctx][mbmi->ref_mv_idx[ref] != idx]++;
+          break;
+        case 1:
+          counts->drl_mode[1][drl_ctx][mbmi->ref_mv_idx[ref] != idx]++;
+          break;
+        default:
+          counts->drl_mode[2][drl_ctx][mbmi->ref_mv_idx[ref] != idx]++;
+          break;
+      }
+#endif  // CONFIG_ENTROPY_STATS
+      update_cdf(drl_cdf, mbmi->ref_mv_idx[ref] != idx, 2);
+      if (mbmi->ref_mv_idx[ref] == idx) break;
+    }
+  }
+#else
   assert(mbmi->ref_mv_idx < max_drl_bits + 1);
   for (int idx = 0; idx < max_drl_bits; ++idx) {
     aom_cdf_prob *drl_cdf =
@@ -913,9 +943,10 @@
     update_cdf(drl_cdf, mbmi->ref_mv_idx != idx, 2);
     if (mbmi->ref_mv_idx == idx) break;
   }
+#endif  // CONFIG_SEP_COMP_DRL
 }
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
 static void update_intrabc_drl_idx_stats(int max_ref_bv_num, FRAME_CONTEXT *fc,
                                          FRAME_COUNTS *counts,
                                          const MB_MODE_INFO *mbmi) {
@@ -934,7 +965,32 @@
     ++bit_cnt;
   }
 }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
+
+#if CONFIG_CWP
+// Update the stats for compound weighted prediction
+static void update_cwp_idx_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts,
+                                 const AV1_COMMON *const cm, MACROBLOCKD *xd) {
+#if !CONFIG_ENTROPY_STATS
+  (void)counts;
+#endif  // !CONFIG_ENTROPY_STATS
+  const MB_MODE_INFO *mbmi = xd->mi[0];
+
+  assert(mbmi->cwp_idx >= CWP_MIN && mbmi->cwp_idx <= CWP_MAX);
+  int bit_cnt = 0;
+  const int ctx = 0;
+
+  int8_t final_idx = get_cwp_coding_idx(mbmi->cwp_idx, 1, cm, mbmi);
+  for (int idx = 0; idx < MAX_CWP_NUM - 1; ++idx) {
+#if CONFIG_ENTROPY_STATS
+    counts->cwp_idx[bit_cnt][final_idx != idx]++;
+#endif  // CONFIG_ENTROPY_STATS
+    update_cdf(fc->cwp_idx_cdf[ctx][bit_cnt], final_idx != idx, 2);
+    if (final_idx == idx) break;
+    ++bit_cnt;
+  }
+}
+#endif  // CONFIG_CWP
 
 #if CONFIG_EXTENDED_WARP_PREDICTION
 static void update_warp_delta_param_stats(int index, int value,
@@ -975,7 +1031,11 @@
       if (mbmi->warp_ref_idx == bit_idx) break;
     }
   }
-  if (allow_warp_parameter_signaling(mbmi)) {
+  if (allow_warp_parameter_signaling(
+#if CONFIG_CWG_D067_IMPROVED_WARP
+          cm,
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+          mbmi)) {
 #endif  // CONFIG_WARP_REF_LIST
     const WarpedMotionParams *params = &mbmi->wm_params[0];
     WarpedMotionParams base_params;
@@ -1017,7 +1077,7 @@
 #endif  // CONFIG_WARP_REF_LIST
 }
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
 static void update_skip_drl_index_stats(int max_drl_bits, FRAME_CONTEXT *fc,
                                         FRAME_COUNTS *counts,
                                         const MB_MODE_INFO *mbmi) {
@@ -1025,9 +1085,26 @@
   (void)counts;
 #endif  // !CONFIG_ENTROPY_STATS
   assert(have_drl_index(mbmi->mode));
+#if CONFIG_SEP_COMP_DRL
+  assert(get_ref_mv_idx(mbmi, 0) < max_drl_bits + 1);
+  assert(get_ref_mv_idx(mbmi, 1) < max_drl_bits + 1);
+#else
   assert(mbmi->ref_mv_idx < max_drl_bits + 1);
+#endif  // CONFIG_SEP_COMP_DRL
   for (int idx = 0; idx < max_drl_bits; ++idx) {
     aom_cdf_prob *drl_cdf = fc->skip_drl_cdf[AOMMIN(idx, 2)];
+#if CONFIG_SEP_COMP_DRL
+    update_cdf(drl_cdf, mbmi->ref_mv_idx[0] != idx, 2);
+#if CONFIG_ENTROPY_STATS
+    switch (idx) {
+      case 0: counts->skip_drl_mode[idx][mbmi->ref_mv_idx[0] != idx]++; break;
+      case 1: counts->skip_drl_mode[idx][mbmi->ref_mv_idx[0] != idx]++; break;
+      default: counts->skip_drl_mode[2][mbmi->ref_mv_idx[0] != idx]++; break;
+    }
+#endif  // CONFIG_ENTROPY_STATS
+    if (mbmi->ref_mv_idx[0] == idx) break;
+#else
+    update_cdf(drl_cdf, mbmi->ref_mv_idx != idx, 2);
 #if CONFIG_ENTROPY_STATS
     switch (idx) {
       case 0: counts->skip_drl_mode[idx][mbmi->ref_mv_idx != idx]++; break;
@@ -1035,11 +1112,11 @@
       default: counts->skip_drl_mode[2][mbmi->ref_mv_idx != idx]++; break;
     }
 #endif  // CONFIG_ENTROPY_STATS
-    update_cdf(drl_cdf, mbmi->ref_mv_idx != idx, 2);
     if (mbmi->ref_mv_idx == idx) break;
+#endif  // CONFIG_SEP_COMP_DRL
   }
 }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
 static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
   MACROBLOCK *x = &td->mb;
@@ -1049,6 +1126,7 @@
   const CurrentFrame *const current_frame = &cm->current_frame;
   const BLOCK_SIZE bsize = mbmi->sb_type[xd->tree_type == CHROMA_PART];
   FRAME_CONTEXT *fc = xd->tile_ctx;
+  const int inter_block = mbmi->ref_frame[0] != INTRA_FRAME;
   const int seg_ref_active = 0;
 
   if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active &&
@@ -1059,6 +1137,50 @@
 #endif
     update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2);
   }
+
+#if CONFIG_SKIP_TXFM_OPT
+  const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type);
+  if (!seg_ref_active) {
+    if (!mbmi->skip_mode && !frame_is_intra_only(cm)) {
+      const int intra_inter_ctx = av1_get_intra_inter_context(xd);
+#if CONFIG_ENTROPY_STATS
+      td->counts->intra_inter[intra_inter_ctx][inter_block]++;
+#endif  // CONFIG_ENTROPY_STATS
+      update_cdf(fc->intra_inter_cdf[intra_inter_ctx], inter_block, 2);
+    }
+
+    if (!inter_block && av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) {
+#if CONFIG_NEW_CONTEXT_MODELING
+      const int intrabc_ctx = get_intrabc_ctx(xd);
+      update_cdf(fc->intrabc_cdf[intrabc_ctx], use_intrabc, 2);
+#if CONFIG_ENTROPY_STATS
+      ++td->counts->intrabc[intrabc_ctx][use_intrabc];
+#endif  // CONFIG_ENTROPY_STATS
+#else
+      update_cdf(fc->intrabc_cdf, use_intrabc, 2);
+#if CONFIG_ENTROPY_STATS
+      ++td->counts->intrabc[use_intrabc];
+#endif  // CONFIG_ENTROPY_STATS
+#endif  // CONFIG_NEW_CONTEXT_MODELING
+    }
+
+    if (inter_block || (!inter_block && use_intrabc)) {
+#if !CONFIG_SKIP_MODE_ENHANCEMENT
+      if (!mbmi->skip_mode) {
+#endif  // !CONFIG_SKIP_MODE_ENHANCEMENT
+        const int skip_ctx = av1_get_skip_txfm_context(xd);
+#if CONFIG_ENTROPY_STATS
+        td->counts->skip_txfm[skip_ctx]
+                             [mbmi->skip_txfm[xd->tree_type == CHROMA_PART]]++;
+#endif
+        update_cdf(fc->skip_txfm_cdfs[skip_ctx],
+                   mbmi->skip_txfm[xd->tree_type == CHROMA_PART], 2);
+#if !CONFIG_SKIP_MODE_ENHANCEMENT
+      }
+#endif  // !CONFIG_SKIP_MODE_ENHANCEMENT
+    }
+  }
+#else
 #if CONFIG_SKIP_MODE_ENHANCEMENT
   if (!seg_ref_active) {
 #else
@@ -1072,6 +1194,7 @@
     update_cdf(fc->skip_txfm_cdfs[skip_ctx],
                mbmi->skip_txfm[xd->tree_type == CHROMA_PART], 2);
   }
+#endif  // CONFIG_SKIP_TXFM_OPT
 
 #if CONFIG_ENTROPY_STATS
   // delta quant applies to both intra and inter
@@ -1121,6 +1244,7 @@
     av1_sum_intra_stats(cm, td->counts, xd, mbmi);
   }
   if (av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) {
+#if !CONFIG_SKIP_TXFM_OPT
     const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type);
 #if CONFIG_NEW_CONTEXT_MODELING
     const int intrabc_ctx = get_intrabc_ctx(xd);
@@ -1134,7 +1258,8 @@
     ++td->counts->intrabc[use_intrabc];
 #endif  // CONFIG_ENTROPY_STATS
 #endif  // CONFIG_NEW_CONTEXT_MODELING
-#if CONFIG_BVCOST_UPDATE
+#endif  // !CONFIG_SKIP_TXFM_OPT
+#if CONFIG_IBC_BV_IMPROVEMENT
     if (use_intrabc) {
       const int_mv ref_mv = mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv;
 #if CONFIG_FLEX_MVRES
@@ -1153,8 +1278,8 @@
     }
 #endif
 
-#endif  // CONFIG_BVCOST_UPDATE
-#if CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
     if (use_intrabc) {
       update_cdf(fc->intrabc_mode_cdf, mbmi->intrabc_mode, 2);
 #if CONFIG_ENTROPY_STATS
@@ -1162,29 +1287,37 @@
 #endif  // CONFIG_ENTROPY_STATS
       update_intrabc_drl_idx_stats(MAX_REF_BV_STACK_SIZE, fc, td->counts, mbmi);
     }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
   }
 
 #if CONFIG_SKIP_MODE_ENHANCEMENT
   if (mbmi->skip_mode && have_drl_index(mbmi->mode)) {
     FRAME_COUNTS *const counts = td->counts;
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     update_skip_drl_index_stats(cm->features.max_drl_bits, fc, counts, mbmi);
 #else
     const int16_t mode_ctx_pristine =
         av1_mode_context_pristine(mbmi_ext->mode_context, mbmi->ref_frame);
     update_drl_index_stats(cm->features.max_drl_bits, mode_ctx_pristine, fc,
                            counts, mbmi, mbmi_ext);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
   }
 #endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
+#if CONFIG_REFINEMV
+  if (mbmi->skip_mode && switchable_refinemv_flag(cm, mbmi)) {
+    const int refinemv_ctx = av1_get_refinemv_context(cm, xd, bsize);
+    update_cdf(fc->refinemv_flag_cdf[refinemv_ctx], mbmi->refinemv_flag,
+               REFINEMV_NUM_MODES);
+  }
+#endif  // CONFIG_REFINEMV
+
   if (frame_is_intra_only(cm) || mbmi->skip_mode) return;
 
   FRAME_COUNTS *const counts = td->counts;
-  const int inter_block = mbmi->ref_frame[0] != INTRA_FRAME;
 
   if (!seg_ref_active) {
+#if !CONFIG_SKIP_TXFM_OPT
 #if CONFIG_ENTROPY_STATS && !CONFIG_CONTEXT_DERIVATION
     counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
 #endif  // CONFIG_ENTROPY_STATS && !CONFIG_CONTEXT_DERIVATION
@@ -1200,6 +1333,7 @@
     update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
                inter_block, 2);
 #endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // !CONFIG_SKIP_TXFM_OPT
     // If the segment reference feature is enabled we have only a single
     // reference frame allowed for the segment so exclude it from
     // the reference frame counts used to work out probabilities.
@@ -1492,10 +1626,33 @@
       }
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      if (allow_warpmv_with_mvd_coding(cm, mbmi)) {
+        update_cdf(fc->warpmv_with_mvd_flag_cdf[mbmi->sb_type[PLANE_TYPE_Y]],
+                   mbmi->warpmv_with_mvd_flag, 2);
+      } else {
+        assert(mbmi->warpmv_with_mvd_flag == 0);
+      }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
+#if CONFIG_REFINEMV
+      int is_refinemv_signaled = switchable_refinemv_flag(cm, mbmi);
+      if (!mbmi->skip_mode && is_refinemv_signaled) {
+        const int refinemv_ctx = av1_get_refinemv_context(cm, xd, bsize);
+        update_cdf(fc->refinemv_flag_cdf[refinemv_ctx], mbmi->refinemv_flag,
+                   REFINEMV_NUM_MODES);
+      }
+      assert(IMPLIES(mbmi->refinemv_flag && is_refinemv_signaled,
+                     mbmi->comp_group_idx == 0 &&
+                         mbmi->interinter_comp.type == COMPOUND_AVERAGE));
+#endif  // CONFIG_REFINEMV
       if (has_second_ref(mbmi)
 #if CONFIG_OPTFLOW_REFINEMENT
           && mbmi->mode < NEAR_NEARMV_OPTFLOW
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_REFINEMV
+          && (!mbmi->refinemv_flag || !is_refinemv_signaled)
+#endif  // CONFIG_REFINEMV
 #if IMPROVED_AMVD && CONFIG_JOINT_MVD
           && !is_joint_amvd_coding_mode(mbmi->mode)
 #endif  // IMPROVED_AMVD && CONFIG_JOINT_MVD
@@ -1546,12 +1703,21 @@
 #endif  // CONFIG_WEDGE_MOD_EXT
         }
       }
+
+#if CONFIG_CWP
+      if (cm->features.enable_cwp && is_cwp_allowed(mbmi) && !mbmi->skip_mode) {
+        update_cwp_idx_stats(fc, td->counts, cm, xd);
+      }
+#endif  // CONFIG_CWP
     }
   }
 
   if (inter_block && cm->features.interp_filter == SWITCHABLE &&
-      !is_warp_mode(mbmi->motion_mode) &&
-      !is_nontrans_global_motion(xd, mbmi)) {
+      !is_warp_mode(mbmi->motion_mode) && !is_nontrans_global_motion(xd, mbmi)
+#if CONFIG_REFINEMV
+      && !(mbmi->refinemv_flag || mbmi->mode >= NEAR_NEARMV_OPTFLOW)
+#endif  // CONFIG_REFINEMV
+  ) {
     update_filter_type_cdf(xd, mbmi);
   }
   if (inter_block &&
@@ -1625,11 +1791,44 @@
       update_drl_index_stats(cm->features.max_drl_bits, mode_ctx_pristine, fc,
                              counts, mbmi, mbmi_ext);
     }
-    if (have_newmv_in_inter_mode(mbmi->mode) && xd->tree_type != CHROMA_PART) {
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
+    if (xd->tree_type != CHROMA_PART && mbmi->mode == WARPMV) {
+      if (mbmi->warpmv_with_mvd_flag) {
+        WarpedMotionParams ref_warp_model =
+            mbmi_ext
+                ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)]
+                                  [mbmi->warp_ref_idx]
+                .wm_params;
+        const int_mv ref_mv =
+            get_mv_from_wrl(xd, &ref_warp_model, mbmi->pb_mv_precision, bsize,
+                            xd->mi_col, xd->mi_row);
+        assert(is_adaptive_mvd == 0);
+
 #if CONFIG_FLEX_MVRES
-      const int pb_mv_precision = mbmi->pb_mv_precision;
-      assert(IMPLIES(cm->features.cur_frame_force_integer_mv,
-                     pb_mv_precision == MV_PRECISION_ONE_PEL));
+        av1_update_mv_stats(mbmi->mv[0].as_mv, ref_mv.as_mv, &fc->nmvc,
+#if CONFIG_ADAPTIVE_MVD
+                            is_adaptive_mvd,
+#endif  // CONFIG_ADAPTIVE_MVD
+                            mbmi->pb_mv_precision);
+#else
+        av1_update_mv_stats(&mbmi->mv[0].as_mv, &ref_mv.as_mv, &fc->nmvc,
+#if CONFIG_ADAPTIVE_MVD
+                            is_adaptive_mvd,
+#endif  // CONFIG_ADAPTIVE_MVD
+                            allow_hp);
+#endif
+      }
+
+    } else {
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
+      if (have_newmv_in_inter_mode(mbmi->mode) &&
+          xd->tree_type != CHROMA_PART) {
+#if CONFIG_FLEX_MVRES
+        const int pb_mv_precision = mbmi->pb_mv_precision;
+        assert(IMPLIES(cm->features.cur_frame_force_integer_mv,
+                       pb_mv_precision == MV_PRECISION_ONE_PEL));
 #else
       const int allow_hp = cm->features.cur_frame_force_integer_mv
                                ? MV_SUBPEL_NONE
@@ -1637,41 +1836,42 @@
 #endif
 
 #if CONFIG_FLEX_MVRES
-      if (is_pb_mv_precision_active(cm, mbmi, bsize)) {
+        if (is_pb_mv_precision_active(cm, mbmi, bsize)) {
 #if CONFIG_ADAPTIVE_MVD
-        assert(!is_adaptive_mvd);
+          assert(!is_adaptive_mvd);
 #endif
-        assert(mbmi->most_probable_pb_mv_precision <= mbmi->max_mv_precision);
-        const int mpp_flag_context = av1_get_mpp_flag_context(cm, xd);
-        const int mpp_flag =
-            (mbmi->pb_mv_precision == mbmi->most_probable_pb_mv_precision);
-        update_cdf(fc->pb_mv_mpp_flag_cdf[mpp_flag_context], mpp_flag, 2);
+          assert(mbmi->most_probable_pb_mv_precision <= mbmi->max_mv_precision);
+          const int mpp_flag_context = av1_get_mpp_flag_context(cm, xd);
+          const int mpp_flag =
+              (mbmi->pb_mv_precision == mbmi->most_probable_pb_mv_precision);
+          update_cdf(fc->pb_mv_mpp_flag_cdf[mpp_flag_context], mpp_flag, 2);
 
-        if (!mpp_flag) {
-          const PRECISION_SET *precision_def =
-              &av1_mv_precision_sets[mbmi->mb_precision_set];
-          int down = av1_get_pb_mv_precision_index(mbmi);
-          int nsymbs = precision_def->num_precisions - 1;
+          if (!mpp_flag) {
+            const PRECISION_SET *precision_def =
+                &av1_mv_precision_sets[mbmi->mb_precision_set];
+            int down = av1_get_pb_mv_precision_index(mbmi);
+            int nsymbs = precision_def->num_precisions - 1;
 
-          const int down_ctx = av1_get_pb_mv_precision_down_context(cm, xd);
+            const int down_ctx = av1_get_pb_mv_precision_down_context(cm, xd);
 
-          update_cdf(fc->pb_mv_precision_cdf[down_ctx][mbmi->max_mv_precision -
-                                                       MV_PRECISION_HALF_PEL],
-                     down, nsymbs);
+            update_cdf(
+                fc->pb_mv_precision_cdf[down_ctx][mbmi->max_mv_precision -
+                                                  MV_PRECISION_HALF_PEL],
+                down, nsymbs);
+          }
         }
-      }
 #endif  // CONFIG_FLEX_MVRES
 
-      if (new_mv) {
-        for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
-          const int_mv ref_mv = av1_get_ref_mv(x, ref);
+        if (new_mv) {
+          for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
+            const int_mv ref_mv = av1_get_ref_mv(x, ref);
 
 #if CONFIG_FLEX_MVRES
-          av1_update_mv_stats(mbmi->mv[ref].as_mv, ref_mv.as_mv, &fc->nmvc,
+            av1_update_mv_stats(mbmi->mv[ref].as_mv, ref_mv.as_mv, &fc->nmvc,
 #if CONFIG_ADAPTIVE_MVD
-                              is_adaptive_mvd,
+                                is_adaptive_mvd,
 #endif  // CONFIG_ADAPTIVE_MVD
-                              pb_mv_precision);
+                                pb_mv_precision);
 #else
           av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
 #if CONFIG_ADAPTIVE_MVD
@@ -1679,23 +1879,23 @@
 #endif  // CONFIG_ADAPTIVE_MVD
                               allow_hp);
 #endif
-        }
-      } else if (have_nearmv_newmv_in_inter_mode(mbmi->mode)) {
-        const int ref =
+          }
+        } else if (have_nearmv_newmv_in_inter_mode(mbmi->mode)) {
+          const int ref =
 #if CONFIG_OPTFLOW_REFINEMENT
-            mbmi->mode == NEAR_NEWMV_OPTFLOW ||
+              mbmi->mode == NEAR_NEWMV_OPTFLOW ||
 #endif  // CONFIG_OPTFLOW_REFINEMENT
 #if CONFIG_JOINT_MVD
-            jmvd_base_ref_list ||
+              jmvd_base_ref_list ||
 #endif  // CONFIG_JOINT_MVD
-            mbmi->mode == NEAR_NEWMV;
-        const int_mv ref_mv = av1_get_ref_mv(x, ref);
+              mbmi->mode == NEAR_NEWMV;
+          const int_mv ref_mv = av1_get_ref_mv(x, ref);
 #if CONFIG_FLEX_MVRES
-        av1_update_mv_stats(mbmi->mv[ref].as_mv, ref_mv.as_mv, &fc->nmvc,
+          av1_update_mv_stats(mbmi->mv[ref].as_mv, ref_mv.as_mv, &fc->nmvc,
 #if CONFIG_ADAPTIVE_MVD
-                            is_adaptive_mvd,
+                              is_adaptive_mvd,
 #endif  // CONFIG_ADAPTIVE_MVD
-                            pb_mv_precision);
+                              pb_mv_precision);
 #else
         av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc,
 #if CONFIG_ADAPTIVE_MVD
@@ -1703,8 +1903,12 @@
 #endif  // CONFIG_ADAPTIVE_MVD
                             allow_hp);
 #endif
+        }
       }
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
     }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
   }
 }
 
@@ -1828,9 +2032,9 @@
       assert(!frame_is_intra_only(cm));
       rdc->skip_mode_used_flag = 1;
       if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) {
-#if !CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if !CONFIG_SKIP_MODE_ENHANCEMENT
         assert(has_second_ref(mbmi));
-#endif  // !CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // !CONFIG_SKIP_MODE_ENHANCEMENT
         rdc->compound_ref_used_flag = 1;
       }
       set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
@@ -1859,7 +2063,11 @@
     if ((!cpi->sf.inter_sf.disable_obmc &&
          cpi->sf.inter_sf.prune_obmc_prob_thresh > 0) ||
 #if CONFIG_EXTENDED_WARP_PREDICTION
-        cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
+        cpi->sf.inter_sf.prune_warped_prob_thresh > 0
+#if CONFIG_CWG_D067_IMPROVED_WARP
+        || cpi->sf.inter_sf.prune_warpmv_prob_thresh > 0
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+    ) {
 #else
         (cm->features.allow_warped_motion &&
          cpi->sf.inter_sf.prune_warped_prob_thresh > 0)) {
@@ -1874,11 +2082,20 @@
           if (allowed_motion_modes & (1 << OBMC_CAUSAL)) {
             td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
           }
+#if CONFIG_CWG_D067_IMPROVED_WARP
+          int is_warp_allowed = (allowed_motion_modes & (1 << WARPED_CAUSAL)) ||
+                                (allowed_motion_modes & (1 << WARP_DELTA)) ||
+                                (allowed_motion_modes & (1 << WARP_EXTEND));
+          if (is_warp_allowed) {
+            td->rd_counts.warped_used[mbmi->motion_mode >= WARPED_CAUSAL]++;
+          }
+#else
           if (allowed_motion_modes & (1 << WARPED_CAUSAL)) {
             td->rd_counts.warped_used[mbmi->motion_mode == WARPED_CAUSAL]++;
           }
-          // TODO(rachelbarker): Add counts and pruning for WARP_DELTA and
-          // WARP_EXTEND
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+        // TODO(rachelbarker): Add counts and pruning for WARP_DELTA and
+        // WARP_EXTEND
         }
 #else
         const MOTION_MODE motion_allowed = motion_mode_allowed(cm, xd, mbmi);
@@ -1901,7 +2118,7 @@
   // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during
   // bitstream preparation.
   if (xd->tree_type != CHROMA_PART)
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   {
     if (mbmi->skip_mode) {
       const SkipModeInfo *const skip_mode_info =
@@ -1928,16 +2145,20 @@
       // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
       av1_copy_usable_ref_mv_stack_and_weight(xd, x->mbmi_ext, ref_frame_type);
     }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+
     av1_copy_mbmi_ext_to_mbmi_ext_frame(
         x->mbmi_ext_frame, x->mbmi_ext,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SEP_COMP_DRL
+        mbmi,
+#endif  // CONFIG_SEP_COMP_DRL
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         mbmi->skip_mode,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
         av1_ref_frame_type(xd->mi[0]->ref_frame));
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
   x->rdmult = origin_mult;
 }
 
@@ -1949,44 +2170,27 @@
                                    const CommonModeInfoParams *const mi_params,
 #if CONFIG_EXT_RECUR_PARTITIONS
                                    int disable_ext_part,
-#if !CONFIG_H_PARTITION
-                                   PARTITION_TREE const *ptree,
-#endif  // !CONFIG_H_PARTITION
                                    PARTITION_TREE const *ptree_luma,
                                    const CHROMA_REF_INFO *chroma_ref_info,
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
                                    PARTITION_TYPE partition, const int mi_row,
                                    const int mi_col, BLOCK_SIZE bsize,
                                    const int ctx, BLOCK_SIZE sb_size) {
-  const int plane_index = xd->tree_type == CHROMA_PART;
+  const TREE_TYPE tree_type = xd->tree_type;
+  const int plane_index = tree_type == CHROMA_PART;
   FRAME_CONTEXT *fc = xd->tile_ctx;
 
 #if CONFIG_EXT_RECUR_PARTITIONS
-  if (!is_partition_point(bsize)) {
-    return;
-  }
-  if (xd->tree_type == CHROMA_PART && bsize == BLOCK_8X8) {
-    return;
-  }
+  const bool ss_x = xd->plane[1].subsampling_x;
+  const bool ss_y = xd->plane[1].subsampling_y;
 
-  const int ss_x = xd->plane[1].subsampling_x;
-  const int ss_y = xd->plane[1].subsampling_y;
-  if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) {
-    PARTITION_TYPE derived_partition_mode =
-        sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ss_x, ss_y);
-    assert(partition == derived_partition_mode &&
-           "Chroma partition does not match the derived mode.");
-    (void)derived_partition_mode;
-    return;
-  }
-
-  PARTITION_TYPE implied_partition;
-  const bool is_part_implied = is_partition_implied_at_boundary(
-      mi_params, xd->tree_type, ss_x, ss_y, mi_row, mi_col, bsize,
-      chroma_ref_info, &implied_partition);
-  if (is_part_implied) {
-    assert(partition == implied_partition &&
-           "Partition doesn't match the implied partition at boundary.");
+  const PARTITION_TYPE derived_partition =
+      av1_get_normative_forced_partition_type(mi_params, tree_type, ss_x, ss_y,
+                                              mi_row, mi_col, bsize, ptree_luma,
+                                              chroma_ref_info);
+  if (derived_partition != PARTITION_INVALID) {
+    assert(partition == derived_partition &&
+           "Partition does not match normatively derived partition.");
     return;
   }
 
@@ -2016,7 +2220,7 @@
   }
 
   RECT_PART_TYPE rect_type = get_rect_part_type(partition);
-  if (rect_type_implied_by_bsize(bsize, xd->tree_type) == RECT_INVALID) {
+  if (rect_type_implied_by_bsize(bsize, tree_type) == RECT_INVALID) {
 #if CONFIG_ENTROPY_STATS
     counts->rect_type[plane_index][ctx][rect_type]++;
 #endif  // CONFIG_ENTROPY_STATS
@@ -2025,7 +2229,7 @@
 
   const bool ext_partition_allowed =
       !disable_ext_part &&
-      is_ext_partition_allowed(bsize, rect_type, xd->tree_type);
+      is_ext_partition_allowed(bsize, rect_type, tree_type);
   if (ext_partition_allowed) {
     const bool do_ext_partition = (partition >= PARTITION_HORZ_3);
 #if CONFIG_ENTROPY_STATS
@@ -2033,6 +2237,35 @@
 #endif  // CONFIG_ENTROPY_STATS
     update_cdf(fc->do_ext_partition_cdf[plane_index][rect_type][ctx],
                do_ext_partition, 2);
+#if CONFIG_UNEVEN_4WAY
+    if (do_ext_partition) {
+      const bool uneven_4way_partition_allowed =
+          is_uneven_4way_partition_allowed(bsize, rect_type, tree_type);
+      if (uneven_4way_partition_allowed) {
+        const bool do_uneven_4way_partition = (partition >= PARTITION_HORZ_4A);
+#if CONFIG_ENTROPY_STATS
+        counts->do_uneven_4way_partition[plane_index][rect_type][ctx]
+                                        [do_uneven_4way_partition]++;
+#endif  // CONFIG_ENTROPY_STATS
+        update_cdf(
+            fc->do_uneven_4way_partition_cdf[plane_index][rect_type][ctx],
+            do_uneven_4way_partition, 2);
+        if (do_uneven_4way_partition) {
+          const UNEVEN_4WAY_PART_TYPE uneven_4way_type =
+              (partition == PARTITION_HORZ_4A || partition == PARTITION_VERT_4A)
+                  ? UNEVEN_4A
+                  : UNEVEN_4B;
+#if CONFIG_ENTROPY_STATS
+          counts->uneven_4way_partition_type[plane_index][rect_type][ctx]
+                                            [uneven_4way_type]++;
+#endif  // CONFIG_ENTROPY_STATS
+          update_cdf(
+              fc->uneven_4way_partition_type_cdf[plane_index][rect_type][ctx],
+              uneven_4way_type, NUM_UNEVEN_4WAY_PARTS);
+        }
+      }
+    }
+#endif  // CONFIG_UNEVEN_4WAY
   }
 #else  // CONFIG_EXT_RECUR_PARTITIONS
   const int hbs_w = mi_size_wide[bsize] / 2;
@@ -2042,8 +2275,7 @@
   if (has_rows && has_cols) {
     int luma_split_flag = 0;
     int parent_block_width = block_size_wide[bsize];
-    if (xd->tree_type == CHROMA_PART &&
-        parent_block_width >= SHARED_PART_SIZE) {
+    if (tree_type == CHROMA_PART && parent_block_width >= SHARED_PART_SIZE) {
       luma_split_flag = get_luma_split_flag(bsize, mi_params, mi_row, mi_col);
     }
     if (luma_split_flag <= 3) {
@@ -2147,10 +2379,14 @@
   assert(bsize < BLOCK_SIZES_ALL);
   const int hbs_w = mi_size_wide[bsize] / 2;
   const int hbs_h = mi_size_high[bsize] / 2;
-#if !CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+  const int ebs_w = mi_size_wide[bsize] / 8;
+  const int ebs_h = mi_size_high[bsize] / 8;
+#endif  // CONFIG_UNEVEN_4WAY
+#if !CONFIG_EXT_RECUR_PARTITIONS
   const int qbs_w = mi_size_wide[bsize] / 4;
   const int qbs_h = mi_size_high[bsize] / 4;
-#endif  // !CONFIG_H_PARTITION
+#endif  // !CONFIG_EXT_RECUR_PARTITIONS
   const int is_partition_root = is_partition_point(bsize);
   const int ctx = is_partition_root
                       ? partition_plane_context(xd, mi_row, mi_col, bsize)
@@ -2159,9 +2395,10 @@
   const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
 #if CONFIG_EXT_RECUR_PARTITIONS
   const bool disable_ext_part = !cm->seq_params.enable_ext_partitions;
-#else
-  const BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
+#if !CONFIG_EXT_RECUR_PARTITIONS
+  const BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
+#endif  // !CONFIG_EXT_RECUR_PARTITIONS
 
   if (subsize == BLOCK_INVALID) return;
 
@@ -2172,11 +2409,8 @@
 #endif  // CONFIG_ENTROPY_STATS
                            tile_data->allow_update_cdf, mi_params,
 #if CONFIG_EXT_RECUR_PARTITIONS
-                           disable_ext_part,
-#if !CONFIG_H_PARTITION
-                           ptree,
-#endif  // !CONFIG_H_PARTITION
-                           ptree_luma, &pc_tree->chroma_ref_info,
+                           disable_ext_part, ptree_luma,
+                           &pc_tree->chroma_ref_info,
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
                            partition, mi_row, mi_col, bsize, ctx, cm->sb_size);
 
@@ -2198,12 +2432,18 @@
     const int ss_x = xd->plane[1].subsampling_x;
     const int ss_y = xd->plane[1].subsampling_y;
     set_chroma_ref_info(
-        mi_row, mi_col, ptree->index, bsize, &ptree->chroma_ref_info,
-        parent ? &parent->chroma_ref_info : NULL,
+        xd->tree_type, mi_row, mi_col, ptree->index, bsize,
+        &ptree->chroma_ref_info, parent ? &parent->chroma_ref_info : NULL,
         parent ? parent->bsize : BLOCK_INVALID,
         parent ? parent->partition : PARTITION_NONE, ss_x, ss_y);
 
     switch (partition) {
+#if CONFIG_UNEVEN_4WAY
+      case PARTITION_HORZ_4A:
+      case PARTITION_HORZ_4B:
+      case PARTITION_VERT_4A:
+      case PARTITION_VERT_4B:
+#endif  // CONFIG_UNEVEN_4WAY
       case PARTITION_SPLIT:
         ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0);
         ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1);
@@ -2221,9 +2461,7 @@
         ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0);
         ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1);
         ptree->sub_tree[2] = av1_alloc_ptree_node(ptree, 2);
-#if CONFIG_H_PARTITION
         ptree->sub_tree[3] = av1_alloc_ptree_node(ptree, 3);
-#endif  // CONFIG_H_PARTITION
         break;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
       default: break;
@@ -2290,7 +2528,96 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
       break;
 #if CONFIG_EXT_RECUR_PARTITIONS
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
+                pc_tree->horizontal4a[0], sub_tree[0],
+                track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate);
+      if (mi_row + ebs_h >= cm->mi_params.mi_rows) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row + ebs_h, mi_col, dry_run,
+                bsize_med, pc_tree->horizontal4a[1], sub_tree[1],
+                track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate);
+      if (mi_row + 3 * ebs_h >= cm->mi_params.mi_rows) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row + 3 * ebs_h, mi_col, dry_run,
+                bsize_big, pc_tree->horizontal4a[2], sub_tree[2],
+                track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate);
+      if (mi_row + 7 * ebs_h >= cm->mi_params.mi_rows) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row + 7 * ebs_h, mi_col, dry_run,
+                subsize, pc_tree->horizontal4a[3], sub_tree[3],
+                track_ptree_luma ? ptree_luma->sub_tree[3] : NULL, rate);
+      break;
+    }
+    case PARTITION_HORZ_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
+                pc_tree->horizontal4b[0], sub_tree[0],
+                track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate);
+      if (mi_row + ebs_h >= cm->mi_params.mi_rows) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row + ebs_h, mi_col, dry_run,
+                bsize_big, pc_tree->horizontal4b[1], sub_tree[1],
+                track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate);
+      if (mi_row + 5 * ebs_h >= cm->mi_params.mi_rows) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row + 5 * ebs_h, mi_col, dry_run,
+                bsize_med, pc_tree->horizontal4b[2], sub_tree[2],
+                track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate);
+      if (mi_row + 7 * ebs_h >= cm->mi_params.mi_rows) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row + 7 * ebs_h, mi_col, dry_run,
+                subsize, pc_tree->horizontal4b[3], sub_tree[3],
+                track_ptree_luma ? ptree_luma->sub_tree[3] : NULL, rate);
+      break;
+    }
+    case PARTITION_VERT_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
+                pc_tree->vertical4a[0], sub_tree[0],
+                track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate);
+      if (mi_col + ebs_w >= cm->mi_params.mi_cols) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + ebs_w, dry_run,
+                bsize_med, pc_tree->vertical4a[1], sub_tree[1],
+                track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate);
+      if (mi_col + 3 * ebs_w >= cm->mi_params.mi_cols) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 3 * ebs_w, dry_run,
+                bsize_big, pc_tree->vertical4a[2], sub_tree[2],
+                track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate);
+      if (mi_col + 7 * ebs_w >= cm->mi_params.mi_cols) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 7 * ebs_w, dry_run,
+                subsize, pc_tree->vertical4a[3], sub_tree[3],
+                track_ptree_luma ? ptree_luma->sub_tree[3] : NULL, rate);
+      break;
+    }
+    case PARTITION_VERT_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
+                pc_tree->vertical4b[0], sub_tree[0],
+                track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate);
+      if (mi_col + ebs_w >= cm->mi_params.mi_cols) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + ebs_w, dry_run,
+                bsize_big, pc_tree->vertical4b[1], sub_tree[1],
+                track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate);
+      if (mi_col + 5 * ebs_w >= cm->mi_params.mi_cols) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 5 * ebs_w, dry_run,
+                bsize_med, pc_tree->vertical4b[2], sub_tree[2],
+                track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate);
+      if (mi_col + 7 * ebs_w >= cm->mi_params.mi_cols) break;
+      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 7 * ebs_w, dry_run,
+                subsize, pc_tree->vertical4b[3], sub_tree[3],
+                track_ptree_luma ? ptree_luma->sub_tree[3] : NULL, rate);
+      break;
+    }
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_HORZ_3:
     case PARTITION_VERT_3: {
       for (int i = 0; i < 4; ++i) {
@@ -2315,38 +2642,6 @@
       }
       break;
     }
-#else
-    case PARTITION_HORZ_3: {
-      const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_HORZ);
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
-                pc_tree->horizontal3[0], sub_tree[0],
-                track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate);
-      if (mi_row + qbs_h >= cm->mi_params.mi_rows) break;
-      encode_sb(cpi, td, tile_data, tp, mi_row + qbs_h, mi_col, dry_run, bsize3,
-                pc_tree->horizontal3[1], sub_tree[1],
-                track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate);
-      if (mi_row + 3 * qbs_h >= cm->mi_params.mi_rows) break;
-      encode_sb(cpi, td, tile_data, tp, mi_row + 3 * qbs_h, mi_col, dry_run,
-                subsize, pc_tree->horizontal3[2], sub_tree[2],
-                track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate);
-      break;
-    }
-    case PARTITION_VERT_3: {
-      const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_VERT);
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
-                pc_tree->vertical3[0], sub_tree[0],
-                track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate);
-      if (mi_col + qbs_w >= cm->mi_params.mi_cols) break;
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + qbs_w, dry_run, bsize3,
-                pc_tree->vertical3[1], sub_tree[1],
-                track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate);
-      if (mi_col + 3 * qbs_w >= cm->mi_params.mi_cols) break;
-      encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 3 * qbs_w, dry_run,
-                subsize, pc_tree->vertical3[2], sub_tree[2],
-                track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate);
-      break;
-    }
-#endif  // CONFIG_H_PARTITION
     case PARTITION_SPLIT:
       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
                 pc_tree->split[0], sub_tree[0],
@@ -2443,7 +2738,7 @@
   const int ss_y = cm->seq_params.subsampling_y;
 
   PARTITION_TREE *parent = ptree->parent;
-  set_chroma_ref_info(mi_row, mi_col, ptree->index, bsize,
+  set_chroma_ref_info(tree_type, mi_row, mi_col, ptree->index, bsize,
                       &ptree->chroma_ref_info,
                       parent ? &parent->chroma_ref_info : NULL,
                       parent ? parent->bsize : BLOCK_INVALID,
@@ -2592,13 +2887,14 @@
   if (ptree) {
 #ifndef NDEBUG
 #if CONFIG_EXT_RECUR_PARTITIONS
-    const bool ssx = cm->cur_frame->buf.subsampling_x;
-    const bool ssy = cm->cur_frame->buf.subsampling_y;
-    PARTITION_TYPE implied_partition;
-    const bool is_part_implied = is_partition_implied_at_boundary(
-        &cm->mi_params, tree_type, ssx, ssy, mi_row, mi_col, bsize,
-        &ptree->chroma_ref_info, &implied_partition);
-    assert(IMPLIES(is_part_implied, ptree->partition == implied_partition));
+    const bool ss_x = cm->cur_frame->buf.subsampling_x;
+    const bool ss_y = cm->cur_frame->buf.subsampling_y;
+    const PARTITION_TYPE derived_partition =
+        av1_get_normative_forced_partition_type(
+            &cm->mi_params, tree_type, ss_x, ss_y, mi_row, mi_col, bsize,
+            /* ptree_luma= */ NULL, &ptree->chroma_ref_info);
+    assert(IMPLIES(derived_partition != PARTITION_INVALID,
+                   ptree->partition == derived_partition));
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 #endif  // NDEBUG
     return ptree->partition;
@@ -2681,8 +2977,8 @@
 
   if (pc_tree->none == NULL) {
     pc_tree->none =
-        av1_alloc_pmc(cm, mi_row, mi_col, bsize, pc_tree, PARTITION_NONE, 0,
-                      ss_x, ss_y, &td->shared_coeff_buf);
+        av1_alloc_pmc(cm, xd->tree_type, mi_row, mi_col, bsize, pc_tree,
+                      PARTITION_NONE, 0, ss_x, ss_y, &td->shared_coeff_buf);
   }
   PICK_MODE_CONTEXT *ctx_none = pc_tree->none;
 
@@ -2719,9 +3015,9 @@
   for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
     int x_idx = (i & 1) * hbs;
     int y_idx = (i >> 1) * hbs;
-    pc_tree->split[i] =
-        av1_alloc_pc_tree_node(mi_row + y_idx, mi_col + x_idx, split_subsize,
-                               pc_tree, PARTITION_SPLIT, i, i == 3, ss_x, ss_y);
+    pc_tree->split[i] = av1_alloc_pc_tree_node(
+        xd->tree_type, mi_row + y_idx, mi_col + x_idx, split_subsize, pc_tree,
+        PARTITION_SPLIT, i, i == 3, ss_x, ss_y);
   }
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
   switch (partition) {
@@ -2731,11 +3027,12 @@
       break;
     case PARTITION_HORZ:
 #if CONFIG_EXT_RECUR_PARTITIONS
-      pc_tree->horizontal[0] = av1_alloc_pc_tree_node(
-          mi_row, mi_col, subsize, pc_tree, PARTITION_HORZ, 0, 0, ss_x, ss_y);
+      pc_tree->horizontal[0] =
+          av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, subsize,
+                                 pc_tree, PARTITION_HORZ, 0, 0, ss_x, ss_y);
       pc_tree->horizontal[1] =
-          av1_alloc_pc_tree_node(mi_row + hbh, mi_col, subsize, pc_tree,
-                                 PARTITION_HORZ, 1, 1, ss_x, ss_y);
+          av1_alloc_pc_tree_node(xd->tree_type, mi_row + hbh, mi_col, subsize,
+                                 pc_tree, PARTITION_HORZ, 1, 1, ss_x, ss_y);
       av1_rd_use_partition(cpi, td, tile_data, mib, tp, mi_row, mi_col, subsize,
                            &last_part_rdc.rate, &last_part_rdc.dist, 1,
                            ptree ? ptree->sub_tree[0] : NULL,
@@ -2744,8 +3041,8 @@
       for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
         if (pc_tree->horizontal[i] == NULL) {
           pc_tree->horizontal[i] = av1_alloc_pmc(
-              cm, mi_row + hbs * i, mi_col, subsize, pc_tree, PARTITION_HORZ, i,
-              ss_x, ss_y, &td->shared_coeff_buf);
+              cm, xd->tree_type, mi_row + hbs * i, mi_col, subsize, pc_tree,
+              PARTITION_HORZ, i, ss_x, ss_y, &td->shared_coeff_buf);
         }
       }
       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
@@ -2781,11 +3078,12 @@
       break;
     case PARTITION_VERT:
 #if CONFIG_EXT_RECUR_PARTITIONS
-      pc_tree->vertical[0] = av1_alloc_pc_tree_node(
-          mi_row, mi_col, subsize, pc_tree, PARTITION_VERT, 0, 0, ss_x, ss_y);
+      pc_tree->vertical[0] =
+          av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, subsize,
+                                 pc_tree, PARTITION_VERT, 0, 0, ss_x, ss_y);
       pc_tree->vertical[1] =
-          av1_alloc_pc_tree_node(mi_row, mi_col + hbw, subsize, pc_tree,
-                                 PARTITION_VERT, 1, 1, ss_x, ss_y);
+          av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col + hbw, subsize,
+                                 pc_tree, PARTITION_VERT, 1, 1, ss_x, ss_y);
       av1_rd_use_partition(cpi, td, tile_data, mib, tp, mi_row, mi_col, subsize,
                            &last_part_rdc.rate, &last_part_rdc.dist, 1,
                            ptree ? ptree->sub_tree[0] : NULL,
@@ -2794,8 +3092,8 @@
       for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
         if (pc_tree->vertical[i] == NULL) {
           pc_tree->vertical[i] = av1_alloc_pmc(
-              cm, mi_row, mi_col + hbs * i, subsize, pc_tree, PARTITION_VERT, i,
-              ss_x, ss_y, &td->shared_coeff_buf);
+              cm, xd->tree_type, mi_row, mi_col + hbs * i, subsize, pc_tree,
+              PARTITION_VERT, i, ss_x, ss_y, &td->shared_coeff_buf);
         }
       }
       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
@@ -2842,8 +3140,8 @@
             (mi_col + x_idx >= mi_params->mi_cols))
           continue;
         pc_tree->split[i] = av1_alloc_pc_tree_node(
-            mi_row + y_idx, mi_col + x_idx, subsize, pc_tree, PARTITION_SPLIT,
-            i, i == 3, ss_x, ss_y);
+            xd->tree_type, mi_row + y_idx, mi_col + x_idx, subsize, pc_tree,
+            PARTITION_SPLIT, i, i == 3, ss_x, ss_y);
 
         av1_init_rd_stats(&tmp_rdc);
         av1_rd_use_partition(
@@ -2860,6 +3158,12 @@
         last_part_rdc.dist += tmp_rdc.dist;
       }
       break;
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A:
+    case PARTITION_HORZ_4B:
+    case PARTITION_VERT_4A:
+    case PARTITION_VERT_4B:
+#endif  // CONFIG_UNEVEN_4WAY
     case PARTITION_HORZ_3:
     case PARTITION_VERT_3:
 #else   // CONFIG_EXT_RECUR_PARTITIONS
@@ -2950,6 +3254,44 @@
   *dist = last_part_rdc.dist;
   x->rdmult = orig_rdmult;
 }
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+/*! \brief Contains level banks used for rdopt.*/
+typedef struct LevelBanksRDO {
+#if CONFIG_MVP_IMPROVEMENT
+  //! The current level bank, used to restore the level bank in MACROBLOCKD.
+  REF_MV_BANK curr_level_bank;
+  //! The best level bank from the rdopt process.
+  REF_MV_BANK best_level_bank;
+#endif  // CONFIG_MVP_IMPROVEMENT
+#if WARP_CU_BANK
+  //! The current warp, level bank, used to restore the warp level bank in
+  //! MACROBLOCKD.
+  WARP_PARAM_BANK curr_level_warp_bank;
+  //! The best warp level bank from the rdopt process.
+  WARP_PARAM_BANK best_level_warp_bank;
+#endif  // WARP_CU_BANK
+} LevelBanksRDO;
+
+static AOM_INLINE void update_best_level_banks(LevelBanksRDO *level_banks,
+                                               const MACROBLOCKD *xd) {
+#if CONFIG_MVP_IMPROVEMENT
+  level_banks->best_level_bank = xd->ref_mv_bank;
+#endif  // CONFIG_MVP_IMPROVEMENT
+#if WARP_CU_BANK
+  level_banks->best_level_warp_bank = xd->warp_param_bank;
+#endif  // WARP_CU_BANK
+}
+
+static AOM_INLINE void restore_level_banks(MACROBLOCKD *xd,
+                                           const LevelBanksRDO *level_banks) {
+#if CONFIG_MVP_IMPROVEMENT
+  xd->ref_mv_bank = level_banks->curr_level_bank;
+#endif  // CONFIG_MVP_IMPROVEMENT
+#if WARP_CU_BANK
+  xd->warp_param_bank = level_banks->curr_level_warp_bank;
+#endif  // WARP_CU_BANK
+}
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 
 #if !CONFIG_EXT_RECUR_PARTITIONS
 // Try searching for an encoding for the given subblock. Returns zero if the
@@ -3012,15 +3354,10 @@
                                PARTITION_TYPE partition,
                                const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB],
                                const int ab_mi_pos[SUB_PARTITIONS_AB][2]
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
                                ,
-                               REF_MV_BANK *best_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                               ,
-                               WARP_PARAM_BANK *best_level_warp_bank
-#endif  // WARP_CU_BANK
-
+                               LevelBanksRDO *level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 ) {
   const MACROBLOCK *const x = &td->mb;
   const MACROBLOCKD *const xd = &x->e_mbd;
@@ -3045,22 +3382,46 @@
   if (sum_rdc.rdcost >= best_rdc->rdcost) return false;
 
   *best_rdc = sum_rdc;
-#if CONFIG_C043_MVP_IMPROVEMENTS
-  *best_level_bank = x->e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-  *best_level_warp_bank = x->e_mbd.warp_param_bank;
-#endif  // WARP_CU_BANK
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
   pc_tree->partitioning = partition;
   return true;
 }
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
 
 #if CONFIG_EXT_RECUR_PARTITIONS
+static AOM_INLINE PARTITION_TYPE get_forced_partition_type(
+    const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, int mi_col,
+    BLOCK_SIZE bsize, const PARTITION_TREE *ptree_luma,
+    const PARTITION_TREE *template_tree,
+    const CHROMA_REF_INFO *chroma_ref_info) {
+  // Partition types forced by bitstream syntax.
+  const MACROBLOCKD *xd = &x->e_mbd;
+  const bool ss_x = cm->seq_params.subsampling_x;
+  const bool ss_y = cm->seq_params.subsampling_y;
+  const PARTITION_TYPE derived_partition =
+      av1_get_normative_forced_partition_type(&cm->mi_params, xd->tree_type,
+                                              ss_x, ss_y, mi_row, mi_col, bsize,
+                                              ptree_luma, chroma_ref_info);
+  if (derived_partition != PARTITION_INVALID) {
+    return derived_partition;
+  }
+
+  // Partition types forced by speed_features.
+  if (template_tree) {
+    return template_tree->partition;
+  }
+
+  if (should_reuse_mode(x, REUSE_PARTITION_MODE_FLAG)) {
+    return av1_get_prev_partition(x, mi_row, mi_col, bsize, cm->sb_size);
+  }
+  return PARTITION_INVALID;
+}
+
 static AOM_INLINE void init_allowed_partitions(
     PartitionSearchState *part_search_state, const PartitionCfg *part_cfg,
-    const CHROMA_REF_INFO *chroma_ref_info,
-    const CommonModeInfoParams *mi_params, TREE_TYPE tree_type) {
+    const CHROMA_REF_INFO *chroma_ref_info, TREE_TYPE tree_type) {
   const PartitionBlkParams *blk_params = &part_search_state->part_blk_params;
   const int mi_row = blk_params->mi_row;
   const int mi_col = blk_params->mi_col;
@@ -3099,38 +3460,89 @@
       is_bsize_geq(vert_subsize, blk_params->min_partition_size) &&
       is_vert_size_valid;
 
-  // Boundary Handling
-  PARTITION_TYPE implied_partition;
-  const bool is_part_implied = is_partition_implied_at_boundary(
-      mi_params, tree_type, ss_x, ss_y, mi_row, mi_col, bsize, chroma_ref_info,
-      &implied_partition);
-  if (is_part_implied) {
-    part_search_state->partition_none_allowed = false;
-    if (implied_partition == PARTITION_HORZ) {
-      part_search_state->partition_rect_allowed[VERT] = false;
-    } else {
-      assert(implied_partition == PARTITION_VERT);
-      part_search_state->partition_rect_allowed[HORZ] = false;
-    }
-  }
+  const int ext_partition_allowed = part_search_state->ext_partition_allowed =
+      part_cfg->enable_ext_partitions &&
+      is_ext_partition_allowed_at_bsize(bsize, tree_type);
+
+  part_search_state->partition_3_allowed[HORZ] =
+      ext_partition_allowed &&
+      get_partition_subsize(bsize, PARTITION_HORZ_3) != BLOCK_INVALID &&
+      check_is_chroma_size_valid(tree_type, PARTITION_HORZ_3, bsize, mi_row,
+                                 mi_col, ss_x, ss_y, chroma_ref_info) &&
+      is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ_3),
+                   blk_params->min_partition_size);
+
+  part_search_state->partition_3_allowed[VERT] =
+      ext_partition_allowed &&
+      get_partition_subsize(bsize, PARTITION_VERT_3) != BLOCK_INVALID &&
+      check_is_chroma_size_valid(tree_type, PARTITION_VERT_3, bsize, mi_row,
+                                 mi_col, ss_x, ss_y, chroma_ref_info) &&
+      is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT_3),
+                   blk_params->min_partition_size);
+
+#if CONFIG_UNEVEN_4WAY
+  part_search_state->partition_4a_allowed[HORZ] =
+      ext_partition_allowed &&
+      get_partition_subsize(bsize, PARTITION_HORZ_4A) != BLOCK_INVALID &&
+      check_is_chroma_size_valid(tree_type, PARTITION_HORZ_4A, bsize, mi_row,
+                                 mi_col, ss_x, ss_y, chroma_ref_info) &&
+      is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ_4A),
+                   blk_params->min_partition_size) &&
+      IMPLIES(have_nz_chroma_ref_offset(bsize, PARTITION_HORZ_4A, ss_x, ss_y),
+              blk_params->has_7_8th_rows);
+
+  part_search_state->partition_4b_allowed[HORZ] =
+      ext_partition_allowed &&
+      get_partition_subsize(bsize, PARTITION_HORZ_4B) != BLOCK_INVALID &&
+      check_is_chroma_size_valid(tree_type, PARTITION_HORZ_4B, bsize, mi_row,
+                                 mi_col, ss_x, ss_y, chroma_ref_info) &&
+      is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ_4B),
+                   blk_params->min_partition_size) &&
+      IMPLIES(have_nz_chroma_ref_offset(bsize, PARTITION_HORZ_4B, ss_x, ss_y),
+              blk_params->has_7_8th_rows);
+
+  part_search_state->partition_4a_allowed[VERT] =
+      ext_partition_allowed &&
+      get_partition_subsize(bsize, PARTITION_VERT_4A) != BLOCK_INVALID &&
+      check_is_chroma_size_valid(tree_type, PARTITION_VERT_4A, bsize, mi_row,
+                                 mi_col, ss_x, ss_y, chroma_ref_info) &&
+      is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT_4A),
+                   blk_params->min_partition_size) &&
+      IMPLIES(have_nz_chroma_ref_offset(bsize, PARTITION_VERT_4A, ss_x, ss_y),
+              blk_params->has_7_8th_cols);
+
+  part_search_state->partition_4b_allowed[VERT] =
+      ext_partition_allowed &&
+      get_partition_subsize(bsize, PARTITION_VERT_4B) != BLOCK_INVALID &&
+      check_is_chroma_size_valid(tree_type, PARTITION_VERT_4B, bsize, mi_row,
+                                 mi_col, ss_x, ss_y, chroma_ref_info) &&
+      is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT_4B),
+                   blk_params->min_partition_size) &&
+      IMPLIES(have_nz_chroma_ref_offset(bsize, PARTITION_VERT_4B, ss_x, ss_y),
+              blk_params->has_7_8th_cols);
+#endif  // CONFIG_UNEVEN_4WAY
 
   // Reset the flag indicating whether a partition leading to a rdcost lower
   // than the bound best_rdc has been found.
   part_search_state->found_best_partition = false;
 }
+
+static const int kZeroPartitionCosts[ALL_PARTITION_TYPES];
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 // Initialize state variables of partition search used in
 // av1_rd_pick_partition().
 static void init_partition_search_state_params(
     MACROBLOCK *x, AV1_COMP *const cpi, PartitionSearchState *part_search_state,
 #if CONFIG_EXT_RECUR_PARTITIONS
-    PC_TREE *pc_tree,
+    PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma,
+    const PARTITION_TREE *template_tree, int max_recursion_depth,
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
     int mi_row, int mi_col, BLOCK_SIZE bsize) {
   MACROBLOCKD *const xd = &x->e_mbd;
   const AV1_COMMON *const cm = &cpi->common;
   PartitionBlkParams *blk_params = &part_search_state->part_blk_params;
   const CommonModeInfoParams *const mi_params = &cpi->common.mi_params;
+  const TREE_TYPE tree_type = xd->tree_type;
 
   assert(bsize < BLOCK_SIZES_ALL);
 
@@ -3163,10 +3575,19 @@
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
   blk_params->bsize = bsize;
 
+  // Chroma subsampling.
+  part_search_state->ss_x = x->e_mbd.plane[1].subsampling_x;
+  part_search_state->ss_y = x->e_mbd.plane[1].subsampling_y;
+
   // Check if the partition corresponds to edge block.
   blk_params->has_rows = (blk_params->mi_row_edge < mi_params->mi_rows);
   blk_params->has_cols = (blk_params->mi_col_edge < mi_params->mi_cols);
 
+  const int ebw = mi_size_wide[bsize] / 8;
+  const int ebh = mi_size_high[bsize] / 8;
+  blk_params->has_7_8th_rows = (mi_row + 7 * ebh < mi_params->mi_rows);
+  blk_params->has_7_8th_cols = (mi_col + 7 * ebw < mi_params->mi_cols);
+
   // Update intra partitioning related info.
   part_search_state->intra_part_info = &x->part_search_info;
   // Prepare for segmentation CNN-based partitioning for intra-frame.
@@ -3188,8 +3609,16 @@
   // Partition cost buffer update
   ModeCosts *mode_costs = &x->mode_costs;
   part_search_state->partition_cost =
-      mode_costs->partition_cost[xd->tree_type == CHROMA_PART]
+      mode_costs->partition_cost[tree_type == CHROMA_PART]
                                 [part_search_state->pl_ctx_idx];
+#if CONFIG_EXT_RECUR_PARTITIONS
+  if (av1_get_normative_forced_partition_type(
+          mi_params, tree_type, part_search_state->ss_x,
+          part_search_state->ss_y, mi_row, mi_col, bsize, ptree_luma,
+          &pc_tree->chroma_ref_info) != PARTITION_INVALID) {
+    part_search_state->partition_cost = kZeroPartitionCosts;
+  }
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   // Initialize HORZ and VERT win flags as true for all split partitions.
   for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) {
@@ -3210,26 +3639,46 @@
   // Initialize HORZ and VERT partitions to be not ready.
   av1_zero(part_search_state->is_rect_ctx_is_ready);
 
-  // Chroma subsampling.
-  part_search_state->ss_x = x->e_mbd.plane[1].subsampling_x;
-  part_search_state->ss_y = x->e_mbd.plane[1].subsampling_y;
-
   // Initialize partition search flags to defaults.
   part_search_state->terminate_partition_search = 0;
 
   av1_zero(part_search_state->prune_rect_part);
 
 #if CONFIG_EXT_RECUR_PARTITIONS
+  part_search_state->partition_boundaries = NULL;
+  part_search_state->prune_partition_none = false;
+  av1_zero(part_search_state->prune_partition_3);
+#if CONFIG_UNEVEN_4WAY
+  av1_zero(part_search_state->prune_partition_4a);
+  av1_zero(part_search_state->prune_partition_4b);
+#endif  // CONFIG_UNEVEN_4WAY
+
+  part_search_state->forced_partition =
+      get_forced_partition_type(cm, x, mi_row, mi_col, bsize, ptree_luma,
+                                template_tree, &pc_tree->chroma_ref_info);
+
   init_allowed_partitions(part_search_state, &cpi->oxcf.part_cfg,
-                          &pc_tree->chroma_ref_info, &cm->mi_params,
-                          xd->tree_type);
+                          &pc_tree->chroma_ref_info, tree_type);
+
+  if (max_recursion_depth == 0) {
+    part_search_state->prune_rect_part[HORZ] =
+        part_search_state->prune_rect_part[VERT] = true;
+    part_search_state->prune_partition_3[HORZ] =
+        part_search_state->prune_partition_3[VERT] = true;
+#if CONFIG_UNEVEN_4WAY
+    part_search_state->prune_partition_4a[HORZ] =
+        part_search_state->prune_partition_4a[VERT] = true;
+    part_search_state->prune_partition_4b[HORZ] =
+        part_search_state->prune_partition_4b[VERT] = true;
+#endif  // CONFIG_UNEVEN_4WAY
+  }
 #else
   part_search_state->do_square_split =
       blk_params->bsize_at_least_8x8 &&
-      (xd->tree_type != CHROMA_PART || bsize > BLOCK_8X8);
+      (tree_type != CHROMA_PART || bsize > BLOCK_8X8);
   part_search_state->do_rectangular_split =
       cpi->oxcf.part_cfg.enable_rect_partitions &&
-      (xd->tree_type != CHROMA_PART || bsize > BLOCK_8X8);
+      (tree_type != CHROMA_PART || bsize > BLOCK_8X8);
 
   const BLOCK_SIZE horz_subsize = get_partition_subsize(bsize, PARTITION_HORZ);
   const BLOCK_SIZE vert_subsize = get_partition_subsize(bsize, PARTITION_VERT);
@@ -3242,7 +3691,7 @@
       get_plane_block_size(vert_subsize, part_search_state->ss_x,
                            part_search_state->ss_y) != BLOCK_INVALID;
   const bool no_sub_16_chroma_part =
-      xd->tree_type != CHROMA_PART ||
+      tree_type != CHROMA_PART ||
       (block_size_wide[bsize] > 8 && block_size_high[bsize] > 8);
 
   // Initialize allowed partition types for the partition block.
@@ -3264,27 +3713,11 @@
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 }
 
+#if !CONFIG_EXT_RECUR_PARTITIONS
 // Override partition cost buffer for the edge blocks.
 static void set_partition_cost_for_edge_blk(
     AV1_COMMON const *cm, MACROBLOCKD *const xd,
-#if CONFIG_EXT_RECUR_PARTITIONS
-    const CHROMA_REF_INFO *chroma_ref_info,
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
     PartitionSearchState *part_search_state) {
-#if CONFIG_EXT_RECUR_PARTITIONS
-  const PartitionBlkParams *blk_params = &part_search_state->part_blk_params;
-  const bool is_part_implied = is_partition_implied_at_boundary(
-      &cm->mi_params, xd->tree_type, part_search_state->ss_x,
-      part_search_state->ss_y, blk_params->mi_row, blk_params->mi_col,
-      blk_params->bsize, chroma_ref_info, NULL);
-  if (is_part_implied) {
-    for (int i = 0; i < PARTITION_TYPES; ++i) {
-      part_search_state->tmp_partition_cost[i] = 0;
-    }
-    part_search_state->partition_cost = part_search_state->tmp_partition_cost;
-  }
-  (void)xd;
-#else   // CONFIG_EXT_RECUR_PARTITIONS
   PartitionBlkParams blk_params = part_search_state->part_blk_params;
   assert(blk_params.bsize_at_least_8x8 && part_search_state->pl_ctx_idx >= 0);
   const int plane = xd->tree_type == CHROMA_PART;
@@ -3313,10 +3746,8 @@
   }
   // Override the partition cost buffer.
   part_search_state->partition_cost = part_search_state->tmp_partition_cost;
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
 }
 
-#if !CONFIG_EXT_RECUR_PARTITIONS
 // Reset the partition search state flags when
 // must_find_valid_partition is equal to 1.
 static AOM_INLINE void reset_part_limitations(
@@ -3448,13 +3879,16 @@
     }
   }
 }
+
+static AOM_INLINE bool is_part_pruned_by_forced_partition(
+    const PartitionSearchState *part_state, PARTITION_TYPE partition) {
+  const PARTITION_TYPE forced_partition = part_state->forced_partition;
+  return forced_partition != PARTITION_INVALID && forced_partition != partition;
+}
 #endif
 
 typedef int (*active_edge_info)(const AV1_COMP *cpi, int mi_col, int mi_step);
 
-#define IS_FORCED_PARTITION_TYPE(cur_partition) \
-  (forced_partition == PARTITION_INVALID || forced_partition == (cur_partition))
-
 // Checks if HORZ / VERT partition search is allowed.
 static AOM_INLINE int is_rect_part_allowed(
     const AV1_COMP *cpi, PartitionSearchState *part_search_state,
@@ -3479,35 +3913,30 @@
 }
 
 #if CONFIG_EXT_RECUR_PARTITIONS
-static AOM_INLINE PARTITION_TYPE get_forced_partition_type(
-    const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, int mi_col,
-    BLOCK_SIZE bsize, const PARTITION_TREE *template_tree,
-    const PARTITION_TREE *ptree_luma, const CHROMA_REF_INFO *chroma_ref_info) {
-  if (template_tree) {
-    return template_tree->partition;
+static AOM_INLINE void prune_rect_with_none_rd(
+    PartitionSearchState *part_search_state, BLOCK_SIZE bsize, int q_index,
+    int rdmult, int64_t part_none_rd, const int *is_not_edge_block) {
+  for (RECT_PART_TYPE rect = 0; rect < NUM_RECT_PARTS; rect++) {
+    // Disable pruning on the boundary
+    if (!is_not_edge_block[rect]) {
+      continue;
+    }
+    const PARTITION_TYPE partition_type = rect_partition_type[rect];
+    float discount_factor = 1.1f;
+    const int q_thresh = 180;
+    if (q_index < q_thresh) {
+      discount_factor -= 0.025f;
+    }
+    if (AOMMAX(block_size_wide[bsize], block_size_high[bsize]) < 16) {
+      discount_factor -= 0.02f;
+    }
+    const int part_rate = part_search_state->partition_cost[partition_type];
+    const int64_t est_rd = (int64_t)(part_none_rd / discount_factor) +
+                           RDCOST(rdmult, part_rate, 0);
+    if (est_rd > part_none_rd) {
+      part_search_state->prune_rect_part[rect] = true;
+    }
   }
-
-  const MACROBLOCKD *xd = &x->e_mbd;
-  const int ss_x = cm->seq_params.subsampling_x;
-  const int ss_y = cm->seq_params.subsampling_y;
-  if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) {
-    const PARTITION_TYPE derived_partition_mode =
-        sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ss_x, ss_y);
-    return derived_partition_mode;
-  }
-
-  PARTITION_TYPE implied_partition;
-  const bool is_part_implied = is_partition_implied_at_boundary(
-      &cm->mi_params, xd->tree_type, ss_x, ss_y, mi_row, mi_col, bsize,
-      chroma_ref_info, &implied_partition);
-  if (is_part_implied) {
-    return implied_partition;
-  }
-
-  if (should_reuse_mode(x, REUSE_PARTITION_MODE_FLAG)) {
-    return av1_get_prev_partition(x, mi_row, mi_col, bsize, cm->sb_size);
-  }
-  return PARTITION_INVALID;
 }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
@@ -3522,13 +3951,9 @@
     const PARTITION_TREE *template_tree, int max_recursion_depth,
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
     RD_RECT_PART_WIN_INFO *rect_part_win_info,
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    REF_MV_BANK *best_level_bank, REF_MV_BANK *curr_level_bank,
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    WARP_PARAM_BANK *best_level_warp_bank,
-    WARP_PARAM_BANK *curr_level_warp_bank,
-#endif  // WARP_CU_BANK
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    LevelBanksRDO *level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     int64_t part_none_rd) {
   const AV1_COMMON *const cm = &cpi->common;
   PartitionBlkParams blk_params = part_search_state->part_blk_params;
@@ -3543,9 +3968,6 @@
 #if CONFIG_EXT_RECUR_PARTITIONS
   const int ss_x = xd->plane[1].subsampling_x;
   const int ss_y = xd->plane[1].subsampling_y;
-  PARTITION_TYPE forced_partition = get_forced_partition_type(
-      cm, x, blk_params.mi_row, blk_params.mi_col, blk_params.bsize,
-      template_tree, ptree_luma, &pc_tree->chroma_ref_info);
 #else   // !CONFIG_EXT_RECUR_PARTITIONS
   (void)part_none_rd;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
@@ -3585,14 +4007,13 @@
       (blk_params.mi_col + mi_size_wide[bsize] < mi_params->mi_cols);
   const bool try_prune_with_ml =
       cpi->sf.part_sf.prune_rect_with_ml && !frame_is_intra_only(cm) &&
-      forced_partition == PARTITION_INVALID && is_whole_block_inside &&
-      part_none_rd < INT64_MAX &&
+      part_search_state->forced_partition == PARTITION_INVALID &&
+      is_whole_block_inside && part_none_rd < INT64_MAX &&
       (is_rect_part_allowed(cpi, part_search_state, active_edge_type, HORZ,
                             mi_pos_rect[HORZ][0][HORZ]) ||
        is_rect_part_allowed(cpi, part_search_state, active_edge_type, VERT,
                             mi_pos_rect[VERT][0][VERT]));
 
-  bool prune_horz = false, prune_vert = false;
   if (try_prune_with_ml && bsize != BLOCK_4X8 && bsize != BLOCK_8X4 &&
       is_partition_point(bsize)) {
     float ml_features[19];
@@ -3601,7 +4022,15 @@
                                  mi_pos_rect);
     const bool is_hd = AOMMIN(cm->width, cm->height) >= 1080;
 
-    av1_erp_prune_rect(bsize, is_hd, ml_features, &prune_horz, &prune_vert);
+    av1_erp_prune_rect(bsize, is_hd, ml_features,
+                       &part_search_state->prune_rect_part[HORZ],
+                       &part_search_state->prune_rect_part[VERT]);
+  }
+  if (cpi->sf.part_sf.prune_rect_with_none_rd &&
+      part_search_state->forced_partition == PARTITION_INVALID &&
+      !frame_is_intra_only(cm) && part_none_rd < INT64_MAX) {
+    prune_rect_with_none_rd(part_search_state, bsize, x->qindex, x->rdmult,
+                            part_none_rd, is_not_edge_block);
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
@@ -3615,16 +4044,6 @@
                               mi_pos_rect[i][0][i]))
       continue;
 
-#if CONFIG_EXT_RECUR_PARTITIONS
-    if (pc_tree->parent) {
-      if ((pc_tree->parent->horizontal3[1] == pc_tree && i == HORZ) ||
-          (pc_tree->parent->vertical3[1] == pc_tree && i == VERT)) {
-        continue;
-      }
-    }
-
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
-
     // Sub-partition idx.
     const PARTITION_TYPE partition_type = rect_partition_type[i];
     blk_params.subsize =
@@ -3639,38 +4058,12 @@
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
     av1_init_rd_stats(sum_rdc);
 #if CONFIG_EXT_RECUR_PARTITIONS
-    if (!IS_FORCED_PARTITION_TYPE(partition_type)) {
+    if (is_part_pruned_by_forced_partition(part_search_state, partition_type)) {
       continue;
     }
 
-    if (partition_type == PARTITION_HORZ && prune_horz) {
-      continue;
-    } else if (partition_type == PARTITION_VERT && prune_vert) {
-      continue;
-    }
-
-    if (cpi->sf.part_sf.prune_rect_with_none_rd &&
-        forced_partition == PARTITION_INVALID && !frame_is_intra_only(cm) &&
-        part_none_rd < INT64_MAX && sum_rdc->rate < INT_MAX &&
-        is_not_edge_block[i]) {
-      float discount_factor = 1.1f;
-      const int q_thresh = 180;
-      const int q = x->qindex;
-      if (q < q_thresh) {
-        discount_factor -= 0.025f;
-      }
-      if (AOMMAX(block_size_wide[blk_params.bsize],
-                 block_size_high[blk_params.bsize]) < 16) {
-        discount_factor -= 0.02f;
-      }
-      const int64_t est_rd = (int64_t)(part_none_rd / discount_factor) +
-                             RDCOST(x->rdmult, part_hv_rate, 0);
-      if (est_rd > part_none_rd) {
-        continue;
-      }
-    }
-
     PC_TREE **sub_tree = (i == HORZ) ? pc_tree->horizontal : pc_tree->vertical;
+    assert(sub_tree);
 
     const int num_planes = av1_num_planes(cm);
     for (int idx = 0; idx < SUB_PARTITIONS_RECT; idx++) {
@@ -3680,11 +4073,11 @@
       }
     }
     sub_tree[0] = av1_alloc_pc_tree_node(
-        mi_pos_rect[i][0][0], mi_pos_rect[i][0][1], blk_params.subsize, pc_tree,
-        partition_type, 0, 0, ss_x, ss_y);
+        xd->tree_type, mi_pos_rect[i][0][0], mi_pos_rect[i][0][1],
+        blk_params.subsize, pc_tree, partition_type, 0, 0, ss_x, ss_y);
     sub_tree[1] = av1_alloc_pc_tree_node(
-        mi_pos_rect[i][1][0], mi_pos_rect[i][1][1], blk_params.subsize, pc_tree,
-        partition_type, 1, 1, ss_x, ss_y);
+        xd->tree_type, mi_pos_rect[i][1][0], mi_pos_rect[i][1][1],
+        blk_params.subsize, pc_tree, partition_type, 1, 1, ss_x, ss_y);
 
     bool both_blocks_skippable = true;
 
@@ -3703,10 +4096,11 @@
     for (int j = 0; j < SUB_PARTITIONS_RECT; j++) {
       assert(cur_ctx[i][j] != NULL);
       if (cur_ctx[i][j][0] == NULL) {
-        cur_ctx[i][j][0] = av1_alloc_pmc(
-            cm, mi_pos_rect[i][j][0], mi_pos_rect[i][j][1], blk_params.subsize,
-            pc_tree, partition_type, j, part_search_state->ss_x,
-            part_search_state->ss_y, &td->shared_coeff_buf);
+        cur_ctx[i][j][0] =
+            av1_alloc_pmc(cm, xd->tree_type, mi_pos_rect[i][j][0],
+                          mi_pos_rect[i][j][1], blk_params.subsize, pc_tree,
+                          partition_type, j, part_search_state->ss_x,
+                          part_search_state->ss_y, &td->shared_coeff_buf);
       }
     }
     sum_rdc->rate = part_search_state->partition_cost[partition_type];
@@ -3766,12 +4160,10 @@
         pc_tree->skippable = both_blocks_skippable;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
         *best_rdc = *sum_rdc;
-#if CONFIG_C043_MVP_IMPROVEMENTS
-        *best_level_bank = x->e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-        *best_level_warp_bank = x->e_mbd.warp_param_bank;
-#endif  // WARP_CU_BANK
+
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+        update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
         part_search_state->found_best_partition = true;
         pc_tree->partitioning = partition_type;
       }
@@ -3780,12 +4172,9 @@
       if (rect_part_win_info != NULL)
         rect_part_win_info->rect_part_win[i] = false;
     }
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    x->e_mbd.ref_mv_bank = *curr_level_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    x->e_mbd.warp_param_bank = *curr_level_warp_bank;
-#endif  // WARP_CU_BANK
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     av1_restore_context(cm, x, x_ctx, blk_params.mi_row, blk_params.mi_col,
                         blk_params.bsize, av1_num_planes(cm));
 #if CONFIG_EXT_RECUR_PARTITIONS
@@ -3822,14 +4211,10 @@
     PartitionSearchState *part_search_state, RD_STATS *best_rdc,
     const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB],
     const int ab_mi_pos[SUB_PARTITIONS_AB][2], const PARTITION_TYPE part_type
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     ,
-    REF_MV_BANK *best_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    ,
-    WARP_PARAM_BANK *best_level_warp_bank
-#endif  // WARP_CU_BANK
+    LevelBanksRDO *level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 ) {
   const AV1_COMMON *const cm = &cpi->common;
   PartitionBlkParams blk_params = part_search_state->part_blk_params;
@@ -3856,14 +4241,10 @@
   part_search_state->found_best_partition |=
       rd_test_partition3(cpi, td, tile_data, tp, pc_tree, best_rdc, dst_ctxs,
                          mi_row, mi_col, bsize, part_type, ab_subsize, ab_mi_pos
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
                          ,
-                         best_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                         ,
-                         best_level_warp_bank
-#endif  // WARP_CU_BANK
+                         level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
       );
 
 #if CONFIG_COLLECT_PARTITION_STATS
@@ -3913,14 +4294,10 @@
     PC_TREE *pc_tree, PartitionSearchState *part_search_state,
     RD_STATS *best_rdc, RD_RECT_PART_WIN_INFO *rect_part_win_info,
     int pb_source_variance, int ext_partition_allowed
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     ,
-    REF_MV_BANK *best_level_bank, REF_MV_BANK *curr_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    ,
-    WARP_PARAM_BANK *best_level_warp_bank, WARP_PARAM_BANK *curr_level_warp_bank
-#endif  // WARP_CU_BANK
+    LevelBanksRDO *level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 ) {
   const AV1_COMMON *const cm = &cpi->common;
   PartitionBlkParams blk_params = part_search_state->part_blk_params;
@@ -4004,9 +4381,9 @@
       // Set AB partition context.
       if (cur_part_ctxs[ab_part_type][i] == NULL)
         cur_part_ctxs[ab_part_type][i] = av1_alloc_pmc(
-            cm, ab_mi_pos[ab_part_type][i][0], ab_mi_pos[ab_part_type][i][1],
-            ab_subsize[ab_part_type][i], pc_tree, part_type, i,
-            part_search_state->ss_x, part_search_state->ss_y,
+            cm, x->e_mbd.tree_type, ab_mi_pos[ab_part_type][i][0],
+            ab_mi_pos[ab_part_type][i][1], ab_subsize[ab_part_type][i], pc_tree,
+            part_type, i, part_search_state->ss_x, part_search_state->ss_y,
             &td->shared_coeff_buf);
       // Set mode as not ready.
       cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0;
@@ -4030,21 +4407,14 @@
     rd_pick_ab_part(cpi, td, tile_data, tp, x, x_ctx, pc_tree,
                     cur_part_ctxs[ab_part_type], part_search_state, best_rdc,
                     ab_subsize[ab_part_type], ab_mi_pos[ab_part_type], part_type
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
                     ,
-                    best_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                    ,
-                    best_level_warp_bank
-#endif  // WARP_CU_BANK
+                    level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     );
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    x->e_mbd.ref_mv_bank = *curr_level_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    x->e_mbd.warp_param_bank = *curr_level_warp_bank;
-#endif  // WARP_CU_BANK
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
   }
 }
 
@@ -4073,10 +4443,10 @@
       RDCOST(x->rdmult, part_search_state->sum_rdc.rate, 0);
   for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i) {
     if (cur_part_ctx[i] == NULL)
-      cur_part_ctx[i] =
-          av1_alloc_pmc(cm, mi_pos[i][0], mi_pos[i][1], subsize, pc_tree,
-                        partition_type, i, part_search_state->ss_x,
-                        part_search_state->ss_y, &td->shared_coeff_buf);
+      cur_part_ctx[i] = av1_alloc_pmc(
+          cm, x->e_mbd.tree_type, mi_pos[i][0], mi_pos[i][1], subsize, pc_tree,
+          partition_type, i, part_search_state->ss_x, part_search_state->ss_y,
+          &td->shared_coeff_buf);
   }
 }
 
@@ -4087,14 +4457,10 @@
     PC_TREE *pc_tree, PICK_MODE_CONTEXT *cur_part_ctx[SUB_PARTITIONS_PART4],
     PartitionSearchState *part_search_state, RD_STATS *best_rdc,
     const int inc_step[NUM_PART4_TYPES], PARTITION_TYPE partition_type
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     ,
-    REF_MV_BANK *best_level_bank, REF_MV_BANK *curr_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    ,
-    WARP_PARAM_BANK *best_level_warp_bank, WARP_PARAM_BANK *curr_level_warp_bank
-#endif  // WARP_CU_BANK
+    LevelBanksRDO *level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 ) {
   const AV1_COMMON *const cm = &cpi->common;
   PartitionBlkParams blk_params = part_search_state->part_blk_params;
@@ -4136,12 +4502,9 @@
   av1_rd_cost_update(x->rdmult, &part_search_state->sum_rdc);
   if (part_search_state->sum_rdc.rdcost < best_rdc->rdcost) {
     *best_rdc = part_search_state->sum_rdc;
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    *best_level_bank = x->e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    *best_level_warp_bank = x->e_mbd.warp_param_bank;
-#endif  // WARP_CU_BANK
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     part_search_state->found_best_partition = true;
     pc_tree->partitioning = partition_type;
   }
@@ -4153,14 +4516,11 @@
     partition_timer_on = 0;
   }
 #endif
-#if CONFIG_C043_MVP_IMPROVEMENTS
-  x->e_mbd.ref_mv_bank = *curr_level_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-  x->e_mbd.warp_param_bank = *curr_level_warp_bank;
-#endif  // WARP_CU_BANK
   av1_restore_context(cm, x, x_ctx, blk_params.mi_row, blk_params.mi_col,
                       blk_params.bsize, av1_num_planes(cm));
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 }
 
 // Prune 4-way partitions based on the number of horz/vert wins
@@ -4276,12 +4636,13 @@
       blk_params.has_rows && blk_params.has_cols)
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
     part_search_state->partition_none_allowed = 1;
-  assert(part_search_state->terminate_partition_search == 0);
 
+#if !CONFIG_EXT_RECUR_PARTITIONS
   if (part_search_state->partition_none_allowed == BLOCK_INVALID) {
     part_search_state->partition_none_allowed = 0;
     return;
   }
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   // Set PARTITION_NONE for screen content.
   if (cpi->is_screen_content_type)
@@ -4301,9 +4662,9 @@
   // Set PARTITION_NONE context.
   if (pc_tree->none == NULL)
     pc_tree->none = av1_alloc_pmc(
-        cm, blk_params.mi_row, blk_params.mi_col, blk_params.bsize, pc_tree,
-        PARTITION_NONE, 0, part_search_state->ss_x, part_search_state->ss_y,
-        &td->shared_coeff_buf);
+        cm, x->e_mbd.tree_type, blk_params.mi_row, blk_params.mi_col,
+        blk_params.bsize, pc_tree, PARTITION_NONE, 0, part_search_state->ss_x,
+        part_search_state->ss_y, &td->shared_coeff_buf);
 
   // Set PARTITION_NONE type cost.
   if (part_search_state->partition_none_allowed) {
@@ -4478,14 +4839,10 @@
     RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
     PartitionSearchState *part_search_state, RD_STATS *best_rdc,
     unsigned int *pb_source_variance, int64_t *none_rd, int64_t *part_none_rd
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     ,
-    REF_MV_BANK *best_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    ,
-    WARP_PARAM_BANK *best_level_warp_bank
-#endif  // WARP_CU_BANK
+    LevelBanksRDO *level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 ) {
   const AV1_COMMON *const cm = &cpi->common;
   PartitionBlkParams blk_params = part_search_state->part_blk_params;
@@ -4495,13 +4852,25 @@
   const BLOCK_SIZE bsize = blk_params.bsize;
   assert(bsize < BLOCK_SIZES_ALL);
 
+#if CONFIG_EXT_RECUR_PARTITIONS
+  if (is_part_pruned_by_forced_partition(part_search_state, PARTITION_NONE)) {
+    return;
+  }
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
   // Set PARTITION_NONE allowed flag.
   set_part_none_allowed_flag(cpi,
 #if CONFIG_EXT_RECUR_PARTITIONS
                              x->e_mbd.tree_type,
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
                              part_search_state);
-  if (!part_search_state->partition_none_allowed) return;
+  if (!part_search_state->partition_none_allowed) {
+    return;
+  }
+#if CONFIG_EXT_RECUR_PARTITIONS
+  if (part_search_state->prune_partition_none) {
+    return;
+  }
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   int pt_cost = 0;
   RD_STATS best_remain_rdcost;
@@ -4569,12 +4938,9 @@
     *part_none_rd = this_rdc->rdcost;
     if (this_rdc->rdcost < best_rdc->rdcost) {
       *best_rdc = *this_rdc;
-#if CONFIG_C043_MVP_IMPROVEMENTS
-      *best_level_bank = x->e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-      *best_level_warp_bank = x->e_mbd.warp_param_bank;
-#endif  // WARP_CU_BANK
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+      update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
       part_search_state->found_best_partition = true;
 #if !CONFIG_EXT_RECUR_PARTITIONS
       if (blk_params.bsize_at_least_8x8) {
@@ -4592,6 +4958,9 @@
     }
   }
   av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 }
 
 // PARTITION_SPLIT search.
@@ -4601,14 +4970,10 @@
     SIMPLE_MOTION_DATA_TREE *sms_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
     PartitionSearchState *part_search_state, RD_STATS *best_rdc,
     SB_MULTI_PASS_MODE multi_pass_mode, int64_t *part_split_rd
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     ,
-    REF_MV_BANK *best_level_bank
+    LevelBanksRDO *level_banks
 #endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    ,
-    WARP_PARAM_BANK *best_level_warp_bank
-#endif  // WARP_CU_BANK
 #if CONFIG_EXT_RECUR_PARTITIONS
     ,
     const PARTITION_TREE *ptree_luma, const PARTITION_TREE *template_tree,
@@ -4632,6 +4997,13 @@
       !is_square_split_eligible(bsize, cm->sb_size)) {
     return;
   }
+  if (part_search_state->forced_partition != PARTITION_INVALID &&
+      part_search_state->forced_partition != PARTITION_SPLIT) {
+    return;
+  }
+  if (max_recursion_depth < 0) {
+    return;
+  }
 
   const int num_planes = av1_num_planes(cm);
   PC_TREE **sub_tree = pc_tree->split;
@@ -4677,8 +5049,9 @@
 
     if (pc_tree->split[idx] == NULL) {
       pc_tree->split[idx] = av1_alloc_pc_tree_node(
-          mi_row + y_idx, mi_col + x_idx, subsize, pc_tree, PARTITION_SPLIT,
-          idx, idx == 3, part_search_state->ss_x, part_search_state->ss_y);
+          x->e_mbd.tree_type, mi_row + y_idx, mi_col + x_idx, subsize, pc_tree,
+          PARTITION_SPLIT, idx, idx == 3, part_search_state->ss_x,
+          part_search_state->ss_y);
     }
 #if !CONFIG_EXT_RECUR_PARTITIONS
     int64_t *p_split_rd = &part_search_state->split_rd[idx];
@@ -4751,12 +5124,9 @@
     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
     if (sum_rdc.rdcost < best_rdc->rdcost) {
       *best_rdc = sum_rdc;
-#if CONFIG_C043_MVP_IMPROVEMENTS
-      *best_level_bank = x->e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-      *best_level_warp_bank = x->e_mbd.warp_param_bank;
-#endif  // WARP_CU_BANK
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+      update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
       part_search_state->found_best_partition = true;
       pc_tree->partitioning = PARTITION_SPLIT;
     }
@@ -4774,6 +5144,9 @@
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
   }
   av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 }
 
 #if CONFIG_EXT_RECUR_PARTITIONS
@@ -4799,15 +5172,23 @@
 /*!\brief Whether the current partition node uses horizontal type partitions. */
 static AOM_INLINE bool node_uses_horz(const PC_TREE *pc_tree) {
   assert(pc_tree);
-  return pc_tree->partitioning == PARTITION_HORZ ||
-         pc_tree->partitioning == PARTITION_HORZ_3;
+  return pc_tree->partitioning == PARTITION_HORZ
+#if CONFIG_UNEVEN_4WAY
+         || pc_tree->partitioning == PARTITION_HORZ_4A ||
+         pc_tree->partitioning == PARTITION_HORZ_4B
+#endif  // CONFIG_UNEVEN_4WAY
+         || pc_tree->partitioning == PARTITION_HORZ_3;
 }
 
 /*!\brief Whether the current partition node uses vertical type partitions. */
 static AOM_INLINE bool node_uses_vert(const PC_TREE *pc_tree) {
   assert(pc_tree);
-  return pc_tree->partitioning == PARTITION_VERT ||
-         pc_tree->partitioning == PARTITION_VERT_3;
+  return pc_tree->partitioning == PARTITION_VERT
+#if CONFIG_UNEVEN_4WAY
+         || pc_tree->partitioning == PARTITION_VERT_4A ||
+         pc_tree->partitioning == PARTITION_VERT_4B
+#endif  // CONFIG_UNEVEN_4WAY
+         || pc_tree->partitioning == PARTITION_VERT_3;
 }
 
 /*!\brief Try searching for an encoding for the given subblock.
@@ -4863,43 +5244,1054 @@
   return 1;
 }
 
+/*!\brief Trace out the partition boundaries using the structure in pc_tree.
+ *
+ * The results are stored in partition_boundaries. The array
+ * partition_boundaries has a stride of MAX_MIB_SIZE, and the units are in mi.
+ * The actual values stored is a bitmask, with 1 << HORZ means that there is a
+ * horizontal boundary, and 1 << VERT means that there is a vertical boundary.
+ * */
+static AOM_INLINE void trace_partition_boundary(bool *partition_boundaries,
+                                                const PC_TREE *pc_tree,
+                                                int mi_row, int mi_col,
+                                                BLOCK_SIZE bsize) {
+  mi_row &= MAX_MIB_MASK;
+  mi_col &= MAX_MIB_MASK;
+  const PARTITION_TYPE partition = pc_tree->partitioning;
+  assert(bsize < BLOCK_SIZES_ALL);
+  const int mi_width = mi_size_wide[bsize];
+  const int mi_height = mi_size_high[bsize];
+#if CONFIG_UNEVEN_4WAY
+  const int ebs_w = mi_size_wide[bsize] / 8;
+  const int ebs_h = mi_size_high[bsize] / 8;
+  const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
+#endif  // CONFIG_UNEVEN_4WAY
+  switch (partition) {
+    case PARTITION_NONE:
+      for (int col = 0; col < mi_width; col++) {
+        partition_boundaries[(mi_row + mi_height - 1) * MAX_MIB_SIZE +
+                             (mi_col + col)] |= (1 << HORZ);
+      }
+      for (int row = 0; row < mi_height; row++) {
+        partition_boundaries[(mi_row + row) * MAX_MIB_SIZE + mi_col + mi_width -
+                             1] |= (1 << VERT);
+      }
+      break;
+    case PARTITION_HORZ:
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal[0],
+                               mi_row, mi_col,
+                               get_partition_subsize(bsize, PARTITION_HORZ));
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal[1],
+                               mi_row + mi_height / 2, mi_col,
+                               get_partition_subsize(bsize, PARTITION_HORZ));
+      break;
+    case PARTITION_VERT:
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical[0],
+                               mi_row, mi_col,
+                               get_partition_subsize(bsize, PARTITION_VERT));
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical[1],
+                               mi_row, mi_col + mi_width / 2,
+                               get_partition_subsize(bsize, PARTITION_VERT));
+      break;
+    case PARTITION_HORZ_3:
+      trace_partition_boundary(
+          partition_boundaries, pc_tree->horizontal3[0], mi_row, mi_col,
+          get_h_partition_subsize(bsize, 0, PARTITION_HORZ_3));
+      trace_partition_boundary(
+          partition_boundaries, pc_tree->horizontal3[1], mi_row + mi_height / 4,
+          mi_col, get_h_partition_subsize(bsize, 1, PARTITION_HORZ_3));
+      trace_partition_boundary(
+          partition_boundaries, pc_tree->horizontal3[2], mi_row + mi_height / 4,
+          mi_col + mi_width / 2,
+          get_h_partition_subsize(bsize, 1, PARTITION_HORZ_3));
+      trace_partition_boundary(
+          partition_boundaries, pc_tree->horizontal3[3],
+          mi_row + 3 * mi_height / 4, mi_col,
+          get_h_partition_subsize(bsize, 0, PARTITION_HORZ_3));
+      break;
+    case PARTITION_VERT_3:
+      trace_partition_boundary(
+          partition_boundaries, pc_tree->vertical3[0], mi_row, mi_col,
+          get_h_partition_subsize(bsize, 0, PARTITION_VERT_3));
+      trace_partition_boundary(
+          partition_boundaries, pc_tree->vertical3[1], mi_row,
+          mi_col + mi_width / 4,
+          get_h_partition_subsize(bsize, 1, PARTITION_VERT_3));
+      trace_partition_boundary(
+          partition_boundaries, pc_tree->vertical3[2], mi_row + mi_height / 2,
+          mi_col + mi_width / 4,
+          get_h_partition_subsize(bsize, 1, PARTITION_VERT_3));
+      trace_partition_boundary(
+          partition_boundaries, pc_tree->vertical3[3], mi_row,
+          mi_col + 3 * mi_width / 4,
+          get_h_partition_subsize(bsize, 0, PARTITION_VERT_3));
+      break;
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal4a[0],
+                               mi_row, mi_col, subsize);
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal4a[1],
+                               mi_row + ebs_h, mi_col, bsize_med);
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal4a[2],
+                               mi_row + 3 * ebs_h, mi_col, bsize_big);
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal4a[3],
+                               mi_row + 7 * ebs_h, mi_col, subsize);
+      break;
+    }
+    case PARTITION_HORZ_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_HORZ);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ));
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal4b[0],
+                               mi_row, mi_col, subsize);
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal4b[1],
+                               mi_row + ebs_h, mi_col, bsize_big);
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal4b[2],
+                               mi_row + 5 * ebs_h, mi_col, bsize_med);
+      trace_partition_boundary(partition_boundaries, pc_tree->horizontal4b[3],
+                               mi_row + 7 * ebs_h, mi_col, subsize);
+      break;
+    }
+    case PARTITION_VERT_4A: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical4a[0],
+                               mi_row, mi_col, subsize);
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical4a[1],
+                               mi_row, mi_col + ebs_w, bsize_med);
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical4a[2],
+                               mi_row, mi_col + 3 * ebs_w, bsize_big);
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical4a[3],
+                               mi_row, mi_col + 7 * ebs_w, subsize);
+      break;
+    }
+    case PARTITION_VERT_4B: {
+      const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT);
+      const BLOCK_SIZE bsize_med =
+          get_partition_subsize(bsize_big, PARTITION_VERT);
+      assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT));
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical4b[0],
+                               mi_row, mi_col, subsize);
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical4b[1],
+                               mi_row, mi_col + ebs_w, bsize_big);
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical4b[2],
+                               mi_row, mi_col + 5 * ebs_w, bsize_med);
+      trace_partition_boundary(partition_boundaries, pc_tree->vertical4b[3],
+                               mi_row, mi_col + 7 * ebs_w, subsize);
+      break;
+    }
+#endif  // CONFIG_UNEVEN_4WAY
+    default: assert(0 && "Invalid partition type in trace_partition_boundary!");
+  }
+}
+
+/*!\brief Prunes h partitions using the current best partition boundaries.
+ *
+ * If the H-shaped partitions don't have any overlap with the current best
+ * partition boundaries, then they are pruned from the search.
+ * */
+static AOM_INLINE void prune_part_3_with_partition_boundary(
+    PartitionSearchState *part_search_state, BLOCK_SIZE bsize, int mi_row,
+    int mi_col, bool can_search_horz, bool can_search_vert) {
+  const int mi_width = mi_size_wide[bsize];
+  const int mi_height = mi_size_high[bsize];
+  const int masked_mi_row = mi_row & MAX_MIB_MASK;
+  const int masked_mi_col = mi_col & MAX_MIB_MASK;
+  const bool *partition_boundaries = part_search_state->partition_boundaries;
+  if (can_search_horz) {
+    bool keep_horz_3 = false;
+    for (int col = 0; col < mi_width; col++) {
+      if (partition_boundaries[(masked_mi_row + mi_height / 4 - 1) *
+                                   MAX_MIB_SIZE +
+                               masked_mi_col + col] &
+          (1 << HORZ)) {
+        keep_horz_3 = true;
+        break;
+      }
+    }
+    if (!keep_horz_3) {
+      for (int col = 0; col < mi_width; col++) {
+        if (partition_boundaries[(masked_mi_row + 3 * mi_height / 4 - 1) *
+                                     MAX_MIB_SIZE +
+                                 masked_mi_col + col] &
+            (1 << HORZ)) {
+          keep_horz_3 = true;
+          break;
+        }
+      }
+    }
+    if (!keep_horz_3) {
+      for (int row = 0; row < mi_height / 2; row++) {
+        if (partition_boundaries[(masked_mi_row + mi_height / 4 + row) *
+                                     MAX_MIB_SIZE +
+                                 masked_mi_col + mi_width / 2 - 1] &
+            (1 << VERT)) {
+          keep_horz_3 = true;
+          break;
+        }
+      }
+    }
+    part_search_state->prune_partition_3[HORZ] |= !keep_horz_3;
+  }
+  if (can_search_vert) {
+    bool keep_vert_3 = false;
+    for (int row = 0; row < mi_height; row++) {
+      if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE +
+                               masked_mi_col + mi_width / 4 - 1] &
+          (1 << VERT)) {
+        keep_vert_3 = true;
+        break;
+      }
+    }
+    if (!keep_vert_3) {
+      for (int row = 0; row < mi_height; row++) {
+        if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE +
+                                 masked_mi_col + 3 * mi_width / 4 - 1] &
+            (1 << VERT)) {
+          keep_vert_3 = true;
+          break;
+        }
+      }
+    }
+    if (!keep_vert_3) {
+      for (int col = 0; col < mi_width / 2; col++) {
+        if (partition_boundaries[(masked_mi_row + mi_height / 2 - 1) *
+                                     MAX_MIB_SIZE +
+                                 masked_mi_col + mi_width / 4 + col] &
+            (1 << HORZ)) {
+          keep_vert_3 = true;
+          break;
+        }
+      }
+    }
+    part_search_state->prune_partition_3[VERT] |= !keep_vert_3;
+  }
+}
+
+#if CONFIG_UNEVEN_4WAY
+/*!\brief Prunes 4-way partitions using the current best partition boundaries.
+ *
+ * If the 4-way partitions don't have any overlap with the current best
+ * partition boundaries, then they are pruned from the search.
+ */
+static AOM_INLINE void prune_part_4_with_partition_boundary(
+    PartitionSearchState *part_search_state, const bool *partition_boundaries,
+    BLOCK_SIZE bsize, int mi_row, int mi_col, bool can_search_horz_4a,
+    bool can_search_horz_4b, bool can_search_vert_4a, bool can_search_vert_4b) {
+  const int mi_width = mi_size_wide[bsize];
+  const int mi_height = mi_size_high[bsize];
+  const int masked_mi_row = mi_row & MAX_MIB_MASK;
+  const int masked_mi_col = mi_col & MAX_MIB_MASK;
+  bool keep_horz_4a = false, keep_horz_4b = false;
+  bool keep_vert_4a = false, keep_vert_4b = false;
+  if (can_search_horz_4a || can_search_horz_4b) {
+    for (int col = 0; col < mi_width; col++) {
+      if (partition_boundaries[(masked_mi_row + mi_height / 8 - 1) *
+                                   MAX_MIB_SIZE +
+                               masked_mi_col + col] &
+          (1 << HORZ)) {
+        keep_horz_4a = true;
+        keep_horz_4b = true;
+        break;
+      }
+      if (partition_boundaries[(masked_mi_row + 7 * mi_height / 8 - 1) *
+                                   MAX_MIB_SIZE +
+                               masked_mi_col + col] &
+          (1 << HORZ)) {
+        keep_horz_4a = true;
+        keep_horz_4b = true;
+        break;
+      }
+    }
+    if (can_search_horz_4a && !keep_horz_4a) {
+      for (int col = 0; col < mi_width; col++) {
+        if (partition_boundaries[(masked_mi_row + 3 * mi_height / 8 - 1) *
+                                     MAX_MIB_SIZE +
+                                 masked_mi_col + col] &
+            (1 << HORZ)) {
+          keep_horz_4a = true;
+          break;
+        }
+      }
+    }
+    if (can_search_horz_4b && !keep_horz_4b) {
+      for (int col = 0; col < mi_width; col++) {
+        if (partition_boundaries[(masked_mi_row + 5 * mi_height / 8 - 1) *
+                                     MAX_MIB_SIZE +
+                                 masked_mi_col + col] &
+            (1 << HORZ)) {
+          keep_horz_4b = true;
+          break;
+        }
+      }
+    }
+    part_search_state->prune_partition_4a[HORZ] |= !keep_horz_4a;
+    part_search_state->prune_partition_4b[HORZ] |= !keep_horz_4b;
+  }
+  if (can_search_vert_4a || can_search_vert_4b) {
+    for (int row = 0; row < mi_height; row++) {
+      if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE +
+                               masked_mi_col + mi_width / 8 - 1] &
+          (1 << VERT)) {
+        keep_vert_4a = true;
+        keep_vert_4b = true;
+        break;
+      }
+      if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE +
+                               masked_mi_col + 7 * mi_width / 8 - 1] &
+          (1 << VERT)) {
+        keep_vert_4a = true;
+        keep_vert_4b = true;
+        break;
+      }
+    }
+    if (can_search_vert_4a && !keep_vert_4a) {
+      for (int row = 0; row < mi_height; row++) {
+        if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE +
+                                 masked_mi_col + 3 * mi_width / 8 - 1] &
+            (1 << VERT)) {
+          keep_vert_4a = true;
+          break;
+        }
+      }
+    }
+    if (can_search_vert_4b && !keep_vert_4b) {
+      for (int row = 0; row < mi_height; row++) {
+        if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE +
+                                 masked_mi_col + 5 * mi_width / 8 - 1] &
+            (1 << VERT)) {
+          keep_vert_4b = true;
+          break;
+        }
+      }
+    }
+    part_search_state->prune_partition_4a[VERT] |= !keep_vert_4a;
+    part_search_state->prune_partition_4b[VERT] |= !keep_vert_4b;
+  }
+}
+
+#endif  // CONFIG_UNEVEN_4WAY
+
+// Pruning logic for PARTITION_HORZ_3 and PARTITION_VERT_3.
+static AOM_INLINE void prune_ext_partitions_3way(
+    AV1_COMP *const cpi, PC_TREE *pc_tree,
+    PartitionSearchState *part_search_state, bool *partition_boundaries) {
+  const AV1_COMMON *const cm = &cpi->common;
+  const PARTITION_SPEED_FEATURES *part_sf = &cpi->sf.part_sf;
+  const PARTITION_TYPE forced_partition = part_search_state->forced_partition;
+  if (part_search_state->forced_partition != PARTITION_INVALID) {
+    return;
+  }
+
+  // Prune horz 3 with speed features
+  if (part_search_state->partition_3_allowed[HORZ] &&
+      !frame_is_intra_only(cm) && forced_partition != PARTITION_HORZ_3) {
+    if (part_sf->prune_ext_part_with_part_none &&
+        pc_tree->partitioning == PARTITION_NONE) {
+      // Prune if the best partition does not split
+      part_search_state->prune_partition_3[HORZ] = 1;
+    }
+    if (part_sf->prune_ext_part_with_part_rect) {
+      // Prune if the best partition is rect but the subtrees did not further
+      // split in horz
+      if (pc_tree->partitioning == PARTITION_HORZ &&
+          !node_uses_horz(pc_tree->horizontal[0]) &&
+          !node_uses_horz(pc_tree->horizontal[1])) {
+        part_search_state->prune_partition_3[HORZ] = 1;
+      }
+      if (pc_tree->partitioning == PARTITION_VERT &&
+          !node_uses_horz(pc_tree->vertical[0]) &&
+          !node_uses_horz(pc_tree->vertical[1])) {
+        part_search_state->prune_partition_3[HORZ] = 1;
+      }
+    }
+  }
+
+  if (part_search_state->partition_3_allowed[VERT] &&
+      !frame_is_intra_only(cm) && forced_partition != PARTITION_VERT_3) {
+    if (part_sf->prune_ext_part_with_part_none &&
+        pc_tree->partitioning == PARTITION_NONE) {
+      // Prune if the best partition does not split
+      part_search_state->prune_partition_3[VERT] = 1;
+    }
+    if (part_sf->prune_ext_part_with_part_rect) {
+      // Prune if the best partition is rect but the subtrees did not further
+      // split in vert
+      if (pc_tree->partitioning == PARTITION_VERT &&
+          !node_uses_vert(pc_tree->vertical[0]) &&
+          !node_uses_vert(pc_tree->vertical[1])) {
+        part_search_state->prune_partition_3[VERT] = 1;
+      }
+      if (pc_tree->partitioning == PARTITION_HORZ &&
+          !node_uses_vert(pc_tree->horizontal[0]) &&
+          !node_uses_vert(pc_tree->horizontal[1])) {
+        part_search_state->prune_partition_3[VERT] = 1;
+      }
+    }
+  }
+
+  const bool can_search_horz = part_search_state->partition_3_allowed[HORZ] &&
+                               !part_search_state->prune_partition_3[HORZ];
+  const bool can_search_vert = part_search_state->partition_3_allowed[VERT] &&
+                               !part_search_state->prune_partition_3[VERT];
+  const PartitionBlkParams *blk_params = &part_search_state->part_blk_params;
+  const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col,
+            bsize = blk_params->bsize;
+  if (part_sf->prune_part_h_with_partition_boundary &&
+      (can_search_horz || can_search_vert) &&
+      part_search_state->found_best_partition) {
+    if (!part_search_state->partition_boundaries) {
+      part_search_state->partition_boundaries = partition_boundaries;
+      trace_partition_boundary(partition_boundaries, pc_tree, mi_row, mi_col,
+                               bsize);
+    }
+    prune_part_3_with_partition_boundary(part_search_state, bsize, mi_row,
+                                         mi_col, can_search_horz,
+                                         can_search_vert);
+  }
+}
+
+#if CONFIG_UNEVEN_4WAY
+// Pruning logic for PARTITION_HORZ_4A/B and PARTITION_VERT_4A/B.
+static AOM_INLINE void prune_ext_partitions_4way(
+    AV1_COMP *const cpi, PC_TREE *pc_tree,
+    PartitionSearchState *part_search_state, bool *partition_boundaries) {
+  const AV1_COMMON *const cm = &cpi->common;
+  const PARTITION_SPEED_FEATURES *part_sf = &cpi->sf.part_sf;
+  const PARTITION_TYPE forced_partition = part_search_state->forced_partition;
+  if (part_search_state->partition_4a_allowed[HORZ] &&
+      forced_partition != PARTITION_HORZ_4A) {
+    if (part_sf->prune_ext_part_with_part_none &&
+        pc_tree->partitioning == PARTITION_NONE) {
+      // Prune if the best partition does not split
+      part_search_state->prune_partition_4a[HORZ] = 1;
+    }
+    if (part_sf->prune_ext_part_with_part_rect &&
+        pc_tree->partitioning == PARTITION_HORZ &&
+        !node_uses_horz(pc_tree->horizontal[0]) &&
+        !node_uses_horz(pc_tree->horizontal[1])) {
+      // Prune if the best partition is horz but horz did not further split in
+      // horz
+      part_search_state->prune_partition_4a[HORZ] = 1;
+    }
+    if (part_sf->prune_part_4_with_part_3 && !frame_is_intra_only(cm) &&
+        pc_tree->partitioning == PARTITION_HORZ_3 &&
+        !node_uses_horz(pc_tree->horizontal3[0]) &&
+        !node_uses_horz(pc_tree->horizontal3[3])) {
+      // Prune is best partition is horizontal H, but first and last
+      // subpartitions did not further split in horizontal direction.
+      part_search_state->prune_partition_4a[HORZ] = 1;
+    }
+    if (part_sf->prune_part_4_horz_or_vert && !frame_is_intra_only(cm) &&
+        pc_tree->partitioning == PARTITION_VERT &&
+        part_search_state->partition_rect_allowed[HORZ]) {
+      part_search_state->prune_partition_4a[HORZ] = 1;
+    }
+  }
+
+  // Prune HORZ 4B with speed features
+  if (part_search_state->partition_4b_allowed[HORZ] &&
+      forced_partition != PARTITION_HORZ_4B) {
+    if (part_sf->prune_ext_part_with_part_none &&
+        pc_tree->partitioning == PARTITION_NONE) {
+      // Prune if the best partition does not split
+      part_search_state->prune_partition_4b[HORZ] = 1;
+    }
+    if (part_sf->prune_ext_part_with_part_rect &&
+        pc_tree->partitioning == PARTITION_HORZ &&
+        !node_uses_horz(pc_tree->horizontal[0]) &&
+        !node_uses_horz(pc_tree->horizontal[1])) {
+      // Prune if the best partition is horz but horz did not further split in
+      // horz
+      part_search_state->prune_partition_4b[HORZ] = 1;
+    }
+    if (part_sf->prune_part_4_with_part_3 && !frame_is_intra_only(cm) &&
+        pc_tree->partitioning == PARTITION_HORZ_3 &&
+        !node_uses_horz(pc_tree->horizontal3[0]) &&
+        !node_uses_horz(pc_tree->horizontal3[3])) {
+      // Prune is best partition is horizontal H, but first and last
+      // subpartitions did not further split in horizontal direction.
+      part_search_state->prune_partition_4b[HORZ] = 1;
+    }
+    if (part_sf->prune_part_4_horz_or_vert && !frame_is_intra_only(cm) &&
+        pc_tree->partitioning == PARTITION_VERT &&
+        part_search_state->partition_rect_allowed[HORZ]) {
+      part_search_state->prune_partition_4b[HORZ] = 1;
+    }
+  }
+
+  // Prune VERT_4A with speed features
+  if (part_search_state->partition_4a_allowed[VERT] &&
+      forced_partition != PARTITION_VERT_4A) {
+    if (part_sf->prune_ext_part_with_part_none &&
+        pc_tree->partitioning == PARTITION_NONE) {
+      // Prune if the best partition does not split
+      part_search_state->prune_partition_4a[VERT] = 1;
+    }
+    if (part_sf->prune_ext_part_with_part_rect &&
+        pc_tree->partitioning == PARTITION_VERT &&
+        !node_uses_vert(pc_tree->vertical[0]) &&
+        !node_uses_vert(pc_tree->vertical[1])) {
+      // Prune if the best partition is vert but vert did not further split in
+      // vert
+      part_search_state->prune_partition_4a[VERT] = 1;
+    }
+    if (part_sf->prune_part_4_with_part_3 && !frame_is_intra_only(cm) &&
+        pc_tree->partitioning == PARTITION_VERT_3 &&
+        !node_uses_vert(pc_tree->vertical3[0]) &&
+        !node_uses_vert(pc_tree->vertical3[3])) {
+      // Prune is best partition is vertical H, but first and last
+      // subpartitions did not further split in vertical direction.
+      part_search_state->prune_partition_4a[VERT] = 1;
+    }
+    if (part_sf->prune_part_4_horz_or_vert && !frame_is_intra_only(cm) &&
+        pc_tree->partitioning == PARTITION_HORZ &&
+        part_search_state->partition_rect_allowed[VERT]) {
+      part_search_state->prune_partition_4a[VERT] = 1;
+    }
+  }
+
+  // Prune VERT_4B with speed features
+  if (part_search_state->partition_4b_allowed[VERT] &&
+      forced_partition != PARTITION_VERT_4B) {
+    if (part_sf->prune_ext_part_with_part_none &&
+        pc_tree->partitioning == PARTITION_NONE) {
+      // Prune if the best partition does not split
+      part_search_state->prune_partition_4b[VERT] = 1;
+    }
+    if (part_sf->prune_ext_part_with_part_rect &&
+        pc_tree->partitioning == PARTITION_VERT &&
+        !node_uses_vert(pc_tree->vertical[0]) &&
+        !node_uses_vert(pc_tree->vertical[1])) {
+      // Prune if the best partition is vert but vert did not further split in
+      // vert
+      part_search_state->prune_partition_4b[VERT] = 1;
+    }
+    if (part_sf->prune_part_4_with_part_3 && !frame_is_intra_only(cm) &&
+        pc_tree->partitioning == PARTITION_VERT_3 &&
+        !node_uses_vert(pc_tree->vertical3[0]) &&
+        !node_uses_vert(pc_tree->vertical3[3])) {
+      // Prune is best partition is vertical H, but first and last
+      // subpartitions did not further split in vertical direction.
+      part_search_state->prune_partition_4b[VERT] = 1;
+    }
+    if (part_sf->prune_part_4_horz_or_vert && !frame_is_intra_only(cm) &&
+        pc_tree->partitioning == PARTITION_HORZ &&
+        part_search_state->partition_rect_allowed[VERT]) {
+      part_search_state->prune_partition_4b[VERT] = 1;
+    }
+  }
+
+  const bool can_search_horz_4a =
+      part_search_state->partition_4a_allowed[HORZ] &&
+      !part_search_state->prune_partition_4a[HORZ];
+  const bool can_search_horz_4b =
+      part_search_state->partition_4b_allowed[HORZ] &&
+      !part_search_state->prune_partition_4b[HORZ];
+  const bool can_search_vert_4a =
+      part_search_state->partition_4a_allowed[VERT] &&
+      !part_search_state->prune_partition_4a[VERT];
+  const bool can_search_vert_4b =
+      part_search_state->partition_4b_allowed[VERT] &&
+      !part_search_state->prune_partition_4b[VERT];
+  const PartitionBlkParams *blk_params = &part_search_state->part_blk_params;
+  const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col,
+            bsize = blk_params->bsize;
+  if (part_sf->prune_part_4_with_partition_boundary &&
+      (can_search_horz_4a || can_search_vert_4a || can_search_horz_4b ||
+       can_search_vert_4b) &&
+      part_search_state->found_best_partition) {
+    if (!part_search_state->partition_boundaries ||
+        pc_tree->partitioning == PARTITION_HORZ_3 ||
+        pc_tree->partitioning == PARTITION_VERT_3) {
+      part_search_state->partition_boundaries = partition_boundaries;
+      trace_partition_boundary(partition_boundaries, pc_tree, mi_row, mi_col,
+                               bsize);
+    }
+    prune_part_4_with_partition_boundary(
+        part_search_state, partition_boundaries, bsize, mi_row, mi_col,
+        can_search_horz_4a, can_search_horz_4b, can_search_vert_4a,
+        can_search_vert_4b);
+  }
+}
+
+static INLINE void search_partition_horz_4a(
+    PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td,
+    TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc,
+    PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma,
+    const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
+    const PartitionSearchState *part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    LevelBanksRDO *level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) {
+  const AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &td->mb;
+  const int num_planes = av1_num_planes(cm);
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int ss_x = xd->plane[1].subsampling_x;
+  const int ss_y = xd->plane[1].subsampling_y;
+
+  const PartitionBlkParams *blk_params = &search_state->part_blk_params;
+  const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col;
+  const BLOCK_SIZE bsize = blk_params->bsize;
+
+  if (is_part_pruned_by_forced_partition(part_search_state,
+                                         PARTITION_HORZ_4A) ||
+      !part_search_state->partition_4a_allowed[HORZ] ||
+      part_search_state->prune_partition_4a[HORZ]) {
+    return;
+  }
+
+  if (search_state->terminate_partition_search || !blk_params->has_rows ||
+      !is_partition_valid(bsize, PARTITION_HORZ_4A) ||
+      !(search_state->do_rectangular_split ||
+        av1_active_h_edge(cpi, mi_row, blk_params->mi_step_h))) {
+    return;
+  }
+
+  const int part_h4a_rate = search_state->partition_cost[PARTITION_HORZ_4A];
+  if (part_h4a_rate == INT_MAX ||
+      RDCOST(x->rdmult, part_h4a_rate, 0) >= best_rdc->rdcost) {
+    return;
+  }
+  RD_STATS sum_rdc;
+  av1_init_rd_stats(&sum_rdc);
+  const int eighth_step = mi_size_high[bsize] / 8;
+
+  sum_rdc.rate = search_state->partition_cost[PARTITION_HORZ_4A];
+  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
+
+  const BLOCK_SIZE sml_subsize =
+      get_partition_subsize(bsize, PARTITION_HORZ_4A);
+  const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_HORZ);
+  const BLOCK_SIZE med_subsize =
+      get_partition_subsize(big_subsize, PARTITION_HORZ);
+  assert(sml_subsize == get_partition_subsize(med_subsize, PARTITION_HORZ));
+
+  const int cum_step_multipliers[4] = { 0, 1, 3, 7 };
+  const BLOCK_SIZE subblock_sizes[4] = { sml_subsize, med_subsize, big_subsize,
+                                         sml_subsize };
+
+  for (int idx = 0; idx < 4; idx++) {
+    if (pc_tree->horizontal4a[idx]) {
+      av1_free_pc_tree_recursive(pc_tree->horizontal4a[idx], num_planes, 0, 0);
+      pc_tree->horizontal4a[idx] = NULL;
+    }
+    const int this_mi_row = mi_row + eighth_step * cum_step_multipliers[idx];
+    pc_tree->horizontal4a[idx] = av1_alloc_pc_tree_node(
+        xd->tree_type, this_mi_row, mi_col, subblock_sizes[idx], pc_tree,
+        PARTITION_HORZ_4A, idx, idx == 3, ss_x, ss_y);
+  }
+
+  bool skippable = true;
+  for (int i = 0; i < 4; ++i) {
+    const int this_mi_row = mi_row + eighth_step * cum_step_multipliers[i];
+
+    if (i > 0 && this_mi_row >= cm->mi_params.mi_rows) break;
+
+    SUBBLOCK_RDO_DATA rdo_data = { pc_tree->horizontal4a[i],
+                                   get_partition_subtree_const(ptree_luma, i),
+                                   get_partition_subtree_const(template_tree,
+                                                               i),
+                                   this_mi_row,
+                                   mi_col,
+                                   subblock_sizes[i],
+                                   PARTITION_HORZ_4A };
+    if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc,
+                             &sum_rdc, multi_pass_mode, &skippable,
+                             max_recursion_depth)) {
+      av1_invalid_rd_stats(&sum_rdc);
+      break;
+    }
+  }
+
+  av1_rd_cost_update(x->rdmult, &sum_rdc);
+  if (sum_rdc.rdcost < best_rdc->rdcost) {
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    *best_rdc = sum_rdc;
+    search_state->found_best_partition = true;
+    pc_tree->partitioning = PARTITION_HORZ_4A;
+    pc_tree->skippable = skippable;
+  }
+
+  av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+}
+
+static INLINE void search_partition_horz_4b(
+    PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td,
+    TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc,
+    PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma,
+    const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
+    const PartitionSearchState *part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    LevelBanksRDO *level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) {
+  const AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &td->mb;
+  const int num_planes = av1_num_planes(cm);
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int ss_x = xd->plane[1].subsampling_x;
+  const int ss_y = xd->plane[1].subsampling_y;
+
+  const PartitionBlkParams *blk_params = &search_state->part_blk_params;
+  const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col;
+  const BLOCK_SIZE bsize = blk_params->bsize;
+
+  if (is_part_pruned_by_forced_partition(part_search_state,
+                                         PARTITION_HORZ_4B) ||
+      !part_search_state->partition_4b_allowed[HORZ] ||
+      part_search_state->prune_partition_4b[HORZ]) {
+    return;
+  }
+
+  if (search_state->terminate_partition_search || !blk_params->has_rows ||
+      !is_partition_valid(bsize, PARTITION_HORZ_4B) ||
+      !(search_state->do_rectangular_split ||
+        av1_active_h_edge(cpi, mi_row, blk_params->mi_step_h))) {
+    return;
+  }
+
+  const int part_h4b_rate = search_state->partition_cost[PARTITION_HORZ_4B];
+  if (part_h4b_rate == INT_MAX ||
+      RDCOST(x->rdmult, part_h4b_rate, 0) >= best_rdc->rdcost) {
+    return;
+  }
+  RD_STATS sum_rdc;
+  av1_init_rd_stats(&sum_rdc);
+  const int eighth_step = mi_size_high[bsize] / 8;
+
+  sum_rdc.rate = search_state->partition_cost[PARTITION_HORZ_4B];
+  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
+
+  const BLOCK_SIZE sml_subsize =
+      get_partition_subsize(bsize, PARTITION_HORZ_4B);
+  const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_HORZ);
+  const BLOCK_SIZE med_subsize =
+      get_partition_subsize(big_subsize, PARTITION_HORZ);
+  assert(sml_subsize == get_partition_subsize(med_subsize, PARTITION_HORZ));
+
+  const int cum_step_multipliers[4] = { 0, 1, 5, 7 };
+  const BLOCK_SIZE subblock_sizes[4] = { sml_subsize, big_subsize, med_subsize,
+                                         sml_subsize };
+
+  for (int idx = 0; idx < 4; idx++) {
+    if (pc_tree->horizontal4b[idx]) {
+      av1_free_pc_tree_recursive(pc_tree->horizontal4b[idx], num_planes, 0, 0);
+      pc_tree->horizontal4b[idx] = NULL;
+    }
+    const int this_mi_row = mi_row + eighth_step * cum_step_multipliers[idx];
+    pc_tree->horizontal4b[idx] = av1_alloc_pc_tree_node(
+        xd->tree_type, this_mi_row, mi_col, subblock_sizes[idx], pc_tree,
+        PARTITION_HORZ_4B, idx, idx == 3, ss_x, ss_y);
+  }
+
+  bool skippable = true;
+  for (int i = 0; i < 4; ++i) {
+    const int this_mi_row = mi_row + eighth_step * cum_step_multipliers[i];
+
+    if (i > 0 && this_mi_row >= cm->mi_params.mi_rows) break;
+
+    SUBBLOCK_RDO_DATA rdo_data = { pc_tree->horizontal4b[i],
+                                   get_partition_subtree_const(ptree_luma, i),
+                                   get_partition_subtree_const(template_tree,
+                                                               i),
+                                   this_mi_row,
+                                   mi_col,
+                                   subblock_sizes[i],
+                                   PARTITION_HORZ_4B };
+    if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc,
+                             &sum_rdc, multi_pass_mode, &skippable,
+                             max_recursion_depth)) {
+      av1_invalid_rd_stats(&sum_rdc);
+      break;
+    }
+  }
+
+  av1_rd_cost_update(x->rdmult, &sum_rdc);
+  if (sum_rdc.rdcost < best_rdc->rdcost) {
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    *best_rdc = sum_rdc;
+    search_state->found_best_partition = true;
+    pc_tree->partitioning = PARTITION_HORZ_4B;
+    pc_tree->skippable = skippable;
+  }
+
+  av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+}
+
+static INLINE void search_partition_vert_4a(
+    PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td,
+    TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc,
+    PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma,
+    const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
+    const PartitionSearchState *part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    LevelBanksRDO *level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) {
+  const AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &td->mb;
+  const int num_planes = av1_num_planes(cm);
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int ss_x = xd->plane[1].subsampling_x;
+  const int ss_y = xd->plane[1].subsampling_y;
+
+  const PartitionBlkParams *blk_params = &search_state->part_blk_params;
+  const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col;
+  const BLOCK_SIZE bsize = blk_params->bsize;
+
+  if (is_part_pruned_by_forced_partition(part_search_state,
+                                         PARTITION_VERT_4A) ||
+      !part_search_state->partition_4a_allowed[VERT] ||
+      part_search_state->prune_partition_4a[VERT]) {
+    return;
+  }
+
+  if (search_state->terminate_partition_search || !blk_params->has_cols ||
+      !is_partition_valid(bsize, PARTITION_VERT_4A) ||
+      !(search_state->do_rectangular_split ||
+        av1_active_v_edge(cpi, mi_col, blk_params->mi_step_w))) {
+    return;
+  }
+
+  const int part_v4a_rate = search_state->partition_cost[PARTITION_VERT_4A];
+  if (part_v4a_rate == INT_MAX ||
+      RDCOST(x->rdmult, part_v4a_rate, 0) >= best_rdc->rdcost) {
+    return;
+  }
+  RD_STATS sum_rdc;
+  av1_init_rd_stats(&sum_rdc);
+  const int eighth_step = mi_size_wide[bsize] / 8;
+
+  sum_rdc.rate = search_state->partition_cost[PARTITION_VERT_4A];
+  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
+
+  const BLOCK_SIZE sml_subsize =
+      get_partition_subsize(bsize, PARTITION_VERT_4A);
+  const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_VERT);
+  const BLOCK_SIZE med_subsize =
+      get_partition_subsize(big_subsize, PARTITION_VERT);
+  assert(sml_subsize == get_partition_subsize(med_subsize, PARTITION_VERT));
+
+  const int cum_step_multipliers[4] = { 0, 1, 3, 7 };
+  const BLOCK_SIZE subblock_sizes[4] = { sml_subsize, med_subsize, big_subsize,
+                                         sml_subsize };
+
+  for (int idx = 0; idx < 4; idx++) {
+    if (pc_tree->vertical4a[idx]) {
+      av1_free_pc_tree_recursive(pc_tree->vertical4a[idx], num_planes, 0, 0);
+      pc_tree->vertical4a[idx] = NULL;
+    }
+    const int this_mi_col = mi_col + eighth_step * cum_step_multipliers[idx];
+    pc_tree->vertical4a[idx] = av1_alloc_pc_tree_node(
+        xd->tree_type, mi_row, this_mi_col, subblock_sizes[idx], pc_tree,
+        PARTITION_VERT_4A, idx, idx == 3, ss_x, ss_y);
+  }
+
+  bool skippable = true;
+  for (int i = 0; i < 4; ++i) {
+    const int this_mi_col = mi_col + eighth_step * cum_step_multipliers[i];
+
+    if (i > 0 && this_mi_col >= cm->mi_params.mi_cols) break;
+
+    SUBBLOCK_RDO_DATA rdo_data = { pc_tree->vertical4a[i],
+                                   get_partition_subtree_const(ptree_luma, i),
+                                   get_partition_subtree_const(template_tree,
+                                                               i),
+                                   mi_row,
+                                   this_mi_col,
+                                   subblock_sizes[i],
+                                   PARTITION_VERT_4A };
+    if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc,
+                             &sum_rdc, multi_pass_mode, &skippable,
+                             max_recursion_depth)) {
+      av1_invalid_rd_stats(&sum_rdc);
+      break;
+    }
+  }
+
+  av1_rd_cost_update(x->rdmult, &sum_rdc);
+  if (sum_rdc.rdcost < best_rdc->rdcost) {
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    *best_rdc = sum_rdc;
+    search_state->found_best_partition = true;
+    pc_tree->partitioning = PARTITION_VERT_4A;
+    pc_tree->skippable = skippable;
+  }
+
+  av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+}
+
+static INLINE void search_partition_vert_4b(
+    PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td,
+    TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc,
+    PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma,
+    const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
+    const PartitionSearchState *part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    LevelBanksRDO *level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) {
+  const AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &td->mb;
+  const int num_planes = av1_num_planes(cm);
+  MACROBLOCKD *const xd = &x->e_mbd;
+  const int ss_x = xd->plane[1].subsampling_x;
+  const int ss_y = xd->plane[1].subsampling_y;
+
+  const PartitionBlkParams *blk_params = &search_state->part_blk_params;
+  const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col;
+  const BLOCK_SIZE bsize = blk_params->bsize;
+
+  if (is_part_pruned_by_forced_partition(part_search_state,
+                                         PARTITION_VERT_4B) ||
+      !part_search_state->partition_4b_allowed[VERT] ||
+      part_search_state->prune_partition_4b[VERT]) {
+    return;
+  }
+
+  if (search_state->terminate_partition_search || !blk_params->has_cols ||
+      !is_partition_valid(bsize, PARTITION_VERT_4B) ||
+      !(search_state->do_rectangular_split ||
+        av1_active_v_edge(cpi, mi_col, blk_params->mi_step_w))) {
+    return;
+  }
+
+  const int part_v4b_rate = search_state->partition_cost[PARTITION_VERT_4B];
+  if (part_v4b_rate == INT_MAX ||
+      RDCOST(x->rdmult, part_v4b_rate, 0) >= best_rdc->rdcost) {
+    return;
+  }
+  RD_STATS sum_rdc;
+  av1_init_rd_stats(&sum_rdc);
+  const int eighth_step = mi_size_wide[bsize] / 8;
+
+  sum_rdc.rate = search_state->partition_cost[PARTITION_VERT_4B];
+  sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
+
+  const BLOCK_SIZE sml_subsize =
+      get_partition_subsize(bsize, PARTITION_VERT_4B);
+  const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_VERT);
+  const BLOCK_SIZE med_subsize =
+      get_partition_subsize(big_subsize, PARTITION_VERT);
+  assert(sml_subsize == get_partition_subsize(med_subsize, PARTITION_VERT));
+
+  const int cum_step_multipliers[4] = { 0, 1, 5, 7 };
+  const BLOCK_SIZE subblock_sizes[4] = { sml_subsize, big_subsize, med_subsize,
+                                         sml_subsize };
+
+  for (int idx = 0; idx < 4; idx++) {
+    if (pc_tree->vertical4b[idx]) {
+      av1_free_pc_tree_recursive(pc_tree->vertical4b[idx], num_planes, 0, 0);
+      pc_tree->vertical4b[idx] = NULL;
+    }
+    const int this_mi_col = mi_col + eighth_step * cum_step_multipliers[idx];
+    pc_tree->vertical4b[idx] = av1_alloc_pc_tree_node(
+        xd->tree_type, mi_row, this_mi_col, subblock_sizes[idx], pc_tree,
+        PARTITION_VERT_4B, idx, idx == 3, ss_x, ss_y);
+  }
+
+  bool skippable = true;
+  for (int i = 0; i < 4; ++i) {
+    const int this_mi_col = mi_col + eighth_step * cum_step_multipliers[i];
+
+    if (i > 0 && this_mi_col >= cm->mi_params.mi_cols) break;
+
+    SUBBLOCK_RDO_DATA rdo_data = { pc_tree->vertical4b[i],
+                                   get_partition_subtree_const(ptree_luma, i),
+                                   get_partition_subtree_const(template_tree,
+                                                               i),
+                                   mi_row,
+                                   this_mi_col,
+                                   subblock_sizes[i],
+                                   PARTITION_VERT_4B };
+    if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc,
+                             &sum_rdc, multi_pass_mode, &skippable,
+                             max_recursion_depth)) {
+      av1_invalid_rd_stats(&sum_rdc);
+      break;
+    }
+  }
+
+  av1_rd_cost_update(x->rdmult, &sum_rdc);
+  if (sum_rdc.rdcost < best_rdc->rdcost) {
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    *best_rdc = sum_rdc;
+    search_state->found_best_partition = true;
+    pc_tree->partitioning = PARTITION_VERT_4B;
+    pc_tree->skippable = skippable;
+  }
+
+  av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+}
+#endif  // CONFIG_UNEVEN_4WAY
+
 /*!\brief Performs rdopt on PARTITION_HORZ_3. */
 static INLINE void search_partition_horz_3(
     PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td,
     TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc,
     PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma,
     const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    REF_MV_BANK *best_level_bank,
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    WARP_PARAM_BANK *best_level_warp_bank,
-#endif  // WARP_CU_BANK
+    const PartitionSearchState *part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    LevelBanksRDO *level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   const int num_planes = av1_num_planes(cm);
-#if CONFIG_EXT_RECUR_PARTITIONS
   MACROBLOCKD *const xd = &x->e_mbd;
   const int ss_x = xd->plane[1].subsampling_x;
   const int ss_y = xd->plane[1].subsampling_y;
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   const PartitionBlkParams *blk_params = &search_state->part_blk_params;
   const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col;
   const BLOCK_SIZE bsize = blk_params->bsize;
 
+  if (is_part_pruned_by_forced_partition(part_search_state, PARTITION_HORZ_3) ||
+      !part_search_state->partition_3_allowed[HORZ] ||
+      part_search_state->prune_partition_3[HORZ]) {
+    return;
+  }
+
   if (search_state->terminate_partition_search || !blk_params->has_rows ||
       !is_partition_valid(bsize, PARTITION_HORZ_3) ||
       !(search_state->do_rectangular_split ||
         av1_active_h_edge(cpi, mi_row, blk_params->mi_step_h))) {
     return;
   }
-#if CONFIG_H_PARTITION
   // TODO(yuec): set default partition modes for the edge directly by ruling out
   // h partitions from the syntax if the 2nd middle block is not in the frame.
   if (mi_col + (mi_size_wide[bsize] >> 1) >= cm->mi_params.mi_cols) return;
-#endif  // CONFIG_H_PARTITION
 
   const int part_h3_rate = search_state->partition_cost[PARTITION_HORZ_3];
   if (part_h3_rate == INT_MAX ||
@@ -4913,7 +6305,6 @@
   sum_rdc.rate = search_state->partition_cost[PARTITION_HORZ_3];
   sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
 
-#if CONFIG_H_PARTITION
   const BLOCK_SIZE sml_subsize =
       get_h_partition_subsize(bsize, 0, PARTITION_HORZ_3);
   const BLOCK_SIZE big_subsize =
@@ -4930,59 +6321,26 @@
     }
 
     pc_tree->horizontal3[idx] = av1_alloc_pc_tree_node(
-        mi_row + offset_mr[idx], mi_col + offset_mc[idx], subblock_sizes[idx],
-        pc_tree, PARTITION_HORZ_3, idx, idx == 3, ss_x, ss_y);
+        xd->tree_type, mi_row + offset_mr[idx], mi_col + offset_mc[idx],
+        subblock_sizes[idx], pc_tree, PARTITION_HORZ_3, idx, idx == 3, ss_x,
+        ss_y);
   }
-#else   // CONFIG_H_PARTITION
-  const BLOCK_SIZE sml_subsize = get_partition_subsize(bsize, PARTITION_HORZ_3);
-  const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_HORZ);
-  const int step_multipliers[3] = { 0, 1, 2 };
-  const BLOCK_SIZE subblock_sizes[3] = { sml_subsize, big_subsize,
-                                         sml_subsize };
-
-  for (int idx = 0; idx < 3; idx++) {
-    if (pc_tree->horizontal3[idx]) {
-      av1_free_pc_tree_recursive(pc_tree->horizontal3[idx], num_planes, 0, 0);
-      pc_tree->horizontal3[idx] = NULL;
-    }
-  }
-  pc_tree->horizontal3[0] =
-      av1_alloc_pc_tree_node(mi_row, mi_col, subblock_sizes[0], pc_tree,
-                             PARTITION_HORZ_3, 0, 0, ss_x, ss_y);
-  pc_tree->horizontal3[1] =
-      av1_alloc_pc_tree_node(mi_row + quarter_step, mi_col, subblock_sizes[1],
-                             pc_tree, PARTITION_HORZ_3, 1, 0, ss_x, ss_y);
-  pc_tree->horizontal3[2] = av1_alloc_pc_tree_node(
-      mi_row + quarter_step * 3, mi_col, subblock_sizes[2], pc_tree,
-      PARTITION_HORZ_3, 2, 1, ss_x, ss_y);
-#endif  // CONFIG_H_PARTITION
 
   bool skippable = true;
-#if CONFIG_H_PARTITION
   for (int i = 0; i < 4; ++i) {
     const int this_mi_row = mi_row + offset_mr[i];
     const int this_mi_col = mi_col + offset_mc[i];
-#else   //  CONFIG_H_PARTITION
-  int this_mi_row = mi_row;
-  for (int i = 0; i < 3; ++i) {
-    this_mi_row += quarter_step * step_multipliers[i];
-#endif  // CONFIG_H_PARTITION
 
     if (i > 0 && this_mi_row >= cm->mi_params.mi_rows) break;
 
-    SUBBLOCK_RDO_DATA rdo_data = {
-      pc_tree->horizontal3[i],
-      get_partition_subtree_const(ptree_luma, i),
-      get_partition_subtree_const(template_tree, i),
-      this_mi_row,
-#if CONFIG_H_PARTITION
-      this_mi_col,
-#else
-      mi_col,
-#endif  // CONFIG_H_PARTITION
-      subblock_sizes[i],
-      PARTITION_HORZ_3
-    };
+    SUBBLOCK_RDO_DATA rdo_data = { pc_tree->horizontal3[i],
+                                   get_partition_subtree_const(ptree_luma, i),
+                                   get_partition_subtree_const(template_tree,
+                                                               i),
+                                   this_mi_row,
+                                   this_mi_col,
+                                   subblock_sizes[i],
+                                   PARTITION_HORZ_3 };
     if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc,
                              &sum_rdc, multi_pass_mode, &skippable,
                              max_recursion_depth)) {
@@ -4993,12 +6351,9 @@
 
   av1_rd_cost_update(x->rdmult, &sum_rdc);
   if (sum_rdc.rdcost < best_rdc->rdcost) {
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    *best_level_bank = x->e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    *best_level_warp_bank = x->e_mbd.warp_param_bank;
-#endif  // WARP_CU_BANK
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     *best_rdc = sum_rdc;
     search_state->found_best_partition = true;
     pc_tree->partitioning = PARTITION_HORZ_3;
@@ -5006,6 +6361,9 @@
   }
 
   av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 }
 
 /*!\brief Performs rdopt on PARTITION_VERT_3. */
@@ -5014,37 +6372,37 @@
     TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc,
     PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma,
     const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    REF_MV_BANK *best_level_bank,
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    WARP_PARAM_BANK *best_level_warp_bank,
-#endif  // WARP_CU_BANK
+    const PartitionSearchState *part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    LevelBanksRDO *level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) {
   const AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &td->mb;
   const int num_planes = av1_num_planes(cm);
-#if CONFIG_EXT_RECUR_PARTITIONS
   MACROBLOCKD *const xd = &x->e_mbd;
   const int ss_x = xd->plane[1].subsampling_x;
   const int ss_y = xd->plane[1].subsampling_y;
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   const PartitionBlkParams *blk_params = &search_state->part_blk_params;
   const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col;
   const BLOCK_SIZE bsize = blk_params->bsize;
 
+  if (is_part_pruned_by_forced_partition(part_search_state, PARTITION_VERT_3) ||
+      !part_search_state->partition_3_allowed[VERT] ||
+      part_search_state->prune_partition_3[VERT]) {
+    return;
+  }
+
   if (search_state->terminate_partition_search || !blk_params->has_cols ||
       !is_partition_valid(bsize, PARTITION_VERT_3) ||
       !(search_state->do_rectangular_split ||
         av1_active_v_edge(cpi, mi_col, blk_params->mi_step_w))) {
     return;
   }
-#if CONFIG_H_PARTITION
   // TODO(yuec): set default partition modes for the edge directly by ruling out
   // h partitions from the syntax if the 2nd middle block is not in the frame.
   if (mi_row + (mi_size_high[bsize] >> 1) >= cm->mi_params.mi_rows) return;
-#endif  // CONFIG_H_PARTITION
 
   const int part_v3_rate = search_state->partition_cost[PARTITION_VERT_3];
   if (part_v3_rate == INT_MAX ||
@@ -5059,7 +6417,6 @@
   sum_rdc.rate = search_state->partition_cost[PARTITION_VERT_3];
   sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
 
-#if CONFIG_H_PARTITION
   const BLOCK_SIZE sml_subsize =
       get_h_partition_subsize(bsize, 0, PARTITION_VERT_3);
   const BLOCK_SIZE big_subsize =
@@ -5076,59 +6433,26 @@
     }
 
     pc_tree->vertical3[idx] = av1_alloc_pc_tree_node(
-        mi_row + offset_mr[idx], mi_col + offset_mc[idx], subblock_sizes[idx],
-        pc_tree, PARTITION_VERT_3, idx, idx == 3, ss_x, ss_y);
+        xd->tree_type, mi_row + offset_mr[idx], mi_col + offset_mc[idx],
+        subblock_sizes[idx], pc_tree, PARTITION_VERT_3, idx, idx == 3, ss_x,
+        ss_y);
   }
-#else
-  const BLOCK_SIZE sml_subsize = get_partition_subsize(bsize, PARTITION_VERT_3);
-  const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_VERT);
-  const int step_multipliers[3] = { 0, 1, 2 };
-  const BLOCK_SIZE subblock_sizes[3] = { sml_subsize, big_subsize,
-                                         sml_subsize };
-
-  for (int idx = 0; idx < 3; idx++) {
-    if (pc_tree->vertical3[idx]) {
-      av1_free_pc_tree_recursive(pc_tree->vertical3[idx], num_planes, 0, 0);
-      pc_tree->vertical3[idx] = NULL;
-    }
-  }
-  pc_tree->vertical3[0] =
-      av1_alloc_pc_tree_node(mi_row, mi_col, subblock_sizes[0], pc_tree,
-                             PARTITION_VERT_3, 0, 0, ss_x, ss_y);
-  pc_tree->vertical3[1] =
-      av1_alloc_pc_tree_node(mi_row, mi_col + quarter_step, subblock_sizes[1],
-                             pc_tree, PARTITION_VERT_3, 1, 0, ss_x, ss_y);
-  pc_tree->vertical3[2] = av1_alloc_pc_tree_node(
-      mi_row, mi_col + quarter_step * 3, subblock_sizes[2], pc_tree,
-      PARTITION_VERT_3, 2, 1, ss_x, ss_y);
-#endif  // CONFIG_H_PARTITION
 
   bool skippable = true;
-#if CONFIG_H_PARTITION
   for (int i = 0; i < 4; ++i) {
     const int this_mi_row = mi_row + offset_mr[i];
     const int this_mi_col = mi_col + offset_mc[i];
-#else
-  int this_mi_col = mi_col;
-  for (int i = 0; i < 3; ++i) {
-    this_mi_col += quarter_step * step_multipliers[i];
-#endif  // CONFIG_H_PARTITION
 
     if (i > 0 && this_mi_col >= cm->mi_params.mi_cols) break;
 
-    SUBBLOCK_RDO_DATA rdo_data = {
-      pc_tree->vertical3[i],
-      get_partition_subtree_const(ptree_luma, i),
-      get_partition_subtree_const(template_tree, i),
-#if CONFIG_H_PARTITION
-      this_mi_row,
-#else   // CONFIG_H_PARTITION
-      mi_row,
-#endif  // CONFIG_H_PARTITION
-      this_mi_col,
-      subblock_sizes[i],
-      PARTITION_VERT_3
-    };
+    SUBBLOCK_RDO_DATA rdo_data = { pc_tree->vertical3[i],
+                                   get_partition_subtree_const(ptree_luma, i),
+                                   get_partition_subtree_const(template_tree,
+                                                               i),
+                                   this_mi_row,
+                                   this_mi_col,
+                                   subblock_sizes[i],
+                                   PARTITION_VERT_3 };
     if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc,
                              &sum_rdc, multi_pass_mode, &skippable,
                              max_recursion_depth)) {
@@ -5139,19 +6463,20 @@
 
   av1_rd_cost_update(x->rdmult, &sum_rdc);
   if (sum_rdc.rdcost < best_rdc->rdcost) {
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    *best_level_bank = x->e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    *best_level_warp_bank = x->e_mbd.warp_param_bank;
-#endif  // WARP_CU_BANK
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    update_best_level_banks(level_banks, &x->e_mbd);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     *best_rdc = sum_rdc;
     search_state->found_best_partition = true;
     pc_tree->partitioning = PARTITION_VERT_3;
     pc_tree->skippable = skippable;
   }
   av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+  restore_level_banks(&x->e_mbd, level_banks);
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 }
+
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
 static AOM_INLINE int get_partition_depth(const PC_TREE *pc_tree,
@@ -5194,12 +6519,144 @@
             get_partition_depth(pc_tree->vertical3[idx], curr_depth + 1));
       }
       break;
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A:
+      for (int idx = 0; idx < 4; idx++) {
+        max_depth = AOMMAX(
+            max_depth,
+            get_partition_depth(pc_tree->horizontal4a[idx], curr_depth + 1));
+      }
+      break;
+    case PARTITION_HORZ_4B:
+      for (int idx = 0; idx < 4; idx++) {
+        max_depth = AOMMAX(
+            max_depth,
+            get_partition_depth(pc_tree->horizontal4b[idx], curr_depth + 1));
+      }
+      break;
+    case PARTITION_VERT_4A:
+      for (int idx = 0; idx < 4; idx++) {
+        max_depth = AOMMAX(
+            max_depth,
+            get_partition_depth(pc_tree->vertical4a[idx], curr_depth + 1));
+      }
+      break;
+    case PARTITION_VERT_4B:
+      for (int idx = 0; idx < 4; idx++) {
+        max_depth = AOMMAX(
+            max_depth,
+            get_partition_depth(pc_tree->vertical4b[idx], curr_depth + 1));
+      }
+      break;
+#endif  // CONFIG_UNEVEN_4WAY
     default: assert(0); break;
   }
   return max_depth;
 }
 
 #if CONFIG_EXT_RECUR_PARTITIONS
+static AOM_INLINE bool try_none_after_rect(
+    const MACROBLOCKD *xd, const CommonModeInfoParams *mi_params,
+    BLOCK_SIZE bsize, int mi_row, int mi_col) {
+  if (!is_partition_point(bsize)) {
+    return false;
+  }
+  const int tree_idx = av1_get_sdp_idx(xd->tree_type);
+  // This speed feature is not applicable if either the above or left block is
+  // unavailable.
+  if (tree_idx == 0 && !(xd->up_available && xd->left_available)) {
+    return false;
+  }
+  if (tree_idx == 1 &&
+      !(xd->chroma_up_available && xd->chroma_left_available)) {
+    return false;
+  }
+  // Scan for the maximum and minimum dimension of the above and left blocks.
+  const int mi_stride = xd->mi_stride;
+  int min_left_dim_log2 = INT_MAX, min_above_dim_log2 = INT_MAX;
+  int max_left_dim_log2 = 0, max_above_dim_log2 = 0;
+  const int mi_height =
+      AOMMIN(mi_size_high[bsize], mi_params->mi_rows - mi_row);
+  const int mi_width = AOMMIN(mi_size_wide[bsize], mi_params->mi_cols - mi_col);
+  for (int row = 0; row < mi_height;) {
+    const MB_MODE_INFO *mi = xd->mi[row * mi_stride - 1];
+    const BLOCK_SIZE left_bsize = mi->sb_type[tree_idx];
+
+    min_left_dim_log2 =
+        AOMMIN(min_left_dim_log2, mi_size_high_log2[left_bsize]);
+    max_left_dim_log2 =
+        AOMMAX(max_left_dim_log2, mi_size_high_log2[left_bsize]);
+    const int row_step =
+        tree_idx == 0
+            ? mi_size_high[left_bsize] - AOMMAX(mi_row - mi->mi_row_start, 0)
+            : mi_size_high[left_bsize] -
+                  AOMMAX(mi_row - mi->chroma_mi_row_start, 0);
+    row += row_step;
+    assert(row_step > 0);
+  }
+  for (int col = 0; col < mi_width;) {
+    const MB_MODE_INFO *mi = xd->mi[-1 * mi_stride + col];
+    const BLOCK_SIZE above_bsize = mi->sb_type[tree_idx];
+
+    min_above_dim_log2 =
+        AOMMIN(min_above_dim_log2, mi_size_wide_log2[above_bsize]);
+    max_above_dim_log2 =
+        AOMMAX(max_above_dim_log2, mi_size_wide_log2[above_bsize]);
+    const int col_step =
+        tree_idx == 0
+            ? mi_size_wide[above_bsize] - AOMMAX(mi_col - mi->mi_col_start, 0)
+            : mi_size_wide[above_bsize] -
+                  AOMMAX(mi_col - mi->chroma_mi_col_start, 0);
+    col += col_step;
+    assert(col_step > 0);
+  }
+  // Delay the search for partition none if the above width and left height
+  // are not bigger than the current block dimension AND at least one of the
+  // dimensions if smaller than the current block by a factor of 4.
+  if ((mi_size_high_log2[bsize] > max_left_dim_log2 + 1 &&
+       mi_size_wide_log2[bsize] >= min_above_dim_log2) ||
+      (mi_size_wide_log2[bsize] > max_above_dim_log2 + 1 &&
+       mi_size_high_log2[bsize] >= min_left_dim_log2)) {
+    return true;
+  }
+  return false;
+}
+
+/*!\brief Prune PARTITION_NONE search if rect partitions split deeper.
+ */
+static AOM_INLINE void prune_none_with_rect_results(
+    PartitionSearchState *part_search_state, const PC_TREE *pc_tree) {
+  if (!part_search_state->found_best_partition) {
+    return;
+  }
+
+  const PARTITION_TYPE cur_best_partition = pc_tree->partitioning;
+  PC_TREE *const *tree = NULL;
+  int num_sub_parts = 0;
+  if (cur_best_partition == PARTITION_SPLIT) {
+    tree = pc_tree->split;
+    num_sub_parts = SUB_PARTITIONS_SPLIT;
+  } else if (cur_best_partition == PARTITION_HORZ) {
+    tree = pc_tree->horizontal;
+    num_sub_parts = NUM_RECT_PARTS;
+  } else if (cur_best_partition == PARTITION_VERT) {
+    tree = pc_tree->vertical;
+    num_sub_parts = NUM_RECT_PARTS;
+  } else {
+    assert(0 &&
+           "Unexpected best partition type in prune_none_with_rect_results.");
+  }
+  // Give up on PARTITION_NONE if either of the subtrees decided to split
+  // further.
+  for (int idx = 0; idx < num_sub_parts; idx++) {
+    if (!tree[idx]) {
+      break;
+    }
+    part_search_state->prune_partition_none |=
+        tree[idx]->partitioning != PARTITION_NONE;
+  }
+}
+
 /*!\brief AV1 block partition search (full search).
 *
 * \ingroup partition_search
@@ -5309,14 +6766,12 @@
   // Initialization of state variables used in partition search.
   init_partition_search_state_params(x, cpi, &part_search_state,
 #if CONFIG_EXT_RECUR_PARTITIONS
-                                     pc_tree,
+                                     pc_tree, ptree_luma, template_tree,
+                                     max_recursion_depth,
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
                                      mi_row, mi_col, bsize);
   PartitionBlkParams blk_params = part_search_state.part_blk_params;
 #if CONFIG_EXT_RECUR_PARTITIONS
-  PARTITION_TYPE forced_partition =
-      get_forced_partition_type(cm, x, mi_row, mi_col, bsize, template_tree,
-                                ptree_luma, &pc_tree->chroma_ref_info);
   if (sms_tree != NULL)
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
     sms_tree->partitioning = PARTITION_NONE;
@@ -5334,11 +6789,12 @@
     if (counterpart_block->rd_cost.rate != INT_MAX) {
       av1_copy_pc_tree_recursive(cm, pc_tree, counterpart_block,
                                  part_search_state.ss_x, part_search_state.ss_y,
-                                 &td->shared_coeff_buf, num_planes);
+                                 &td->shared_coeff_buf, xd->tree_type,
+                                 num_planes);
       *rd_cost = pc_tree->rd_cost;
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT
       x->e_mbd.ref_mv_bank = counterpart_block->ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if WARP_CU_BANK
       x->e_mbd.warp_param_bank = counterpart_block->warp_param_bank;
 #endif  // WARP_CU_BANK
@@ -5377,14 +6833,12 @@
 #endif
 #endif
 
+#if !CONFIG_EXT_RECUR_PARTITIONS
   // Override partition costs at the edges of the frame in the same
   // way as in read_partition (see decodeframe.c).
   if (!(blk_params.has_rows && blk_params.has_cols))
-    set_partition_cost_for_edge_blk(cm, xd,
-#if CONFIG_EXT_RECUR_PARTITIONS
-                                    &pc_tree->chroma_ref_info,
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
-                                    &part_search_state);
+    set_partition_cost_for_edge_blk(cm, xd, &part_search_state);
+#endif  // !CONFIG_EXT_RECUR_PARTITIONS
 
   // Disable rectangular partitions for inner blocks when the current block is
   // forced to only use square partitions.
@@ -5412,6 +6866,17 @@
   av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize,
                   &pc_tree->chroma_ref_info);
 
+  bool search_none_after_rect = false;
+#if CONFIG_EXT_RECUR_PARTITIONS
+  if (part_search_state.forced_partition == PARTITION_INVALID) {
+    if (cpi->sf.part_sf.adaptive_partition_search_order) {
+      search_none_after_rect =
+          try_none_after_rect(xd, &cm->mi_params, bsize, mi_row, mi_col);
+    }
+    search_none_after_rect |= bsize == BLOCK_256X256;
+  }
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
+
   // Save rdmult before it might be changed, so it can be restored later.
   const int orig_rdmult = x->rdmult;
   setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
@@ -5428,6 +6893,16 @@
   xd->left_txfm_context =
       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
   av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_MVP_IMPROVEMENT
+  LevelBanksRDO level_banks = {
+    x->e_mbd.ref_mv_bank, /* curr_level_bank*/
+    x->e_mbd.ref_mv_bank, /* best_level_bank*/
+#if WARP_CU_BANK
+    x->e_mbd.warp_param_bank, /* curr_level_warp_bank*/
+    x->e_mbd.warp_param_bank, /* best_level_warp_bank*/
+#endif                        // WARP_CU_BANK
+  };
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if CONFIG_EXT_RECUR_PARTITIONS
   {
     SimpleMotionData *sms_data =
@@ -5439,11 +6914,11 @@
   int *partition_horz_allowed = &part_search_state.partition_rect_allowed[HORZ];
   int *partition_vert_allowed = &part_search_state.partition_rect_allowed[VERT];
 #if CONFIG_EXT_RECUR_PARTITIONS
-  if (forced_partition == PARTITION_INVALID &&
+  if (part_search_state.forced_partition == PARTITION_INVALID &&
       is_bsize_gt(bsize, x->sb_enc.min_partition_size)) {
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
-    int *prune_horz = &part_search_state.prune_rect_part[HORZ];
-    int *prune_vert = &part_search_state.prune_rect_part[VERT];
+    bool *prune_horz = &part_search_state.prune_rect_part[HORZ];
+    bool *prune_vert = &part_search_state.prune_rect_part[VERT];
 #if CONFIG_EXT_RECUR_PARTITIONS
     int do_square_split = true;
     int *sqr_split_ptr = &do_square_split;
@@ -5458,9 +6933,9 @@
         partition_vert_allowed, &part_search_state.do_rectangular_split,
         sqr_split_ptr, prune_horz, prune_vert, pc_tree);
 #if CONFIG_EXT_RECUR_PARTITIONS
-    forced_partition = get_forced_partition_type(
+    part_search_state.forced_partition = get_forced_partition_type(
         cm, x, blk_params.mi_row, blk_params.mi_col, blk_params.bsize,
-        template_tree, ptree_luma, &pc_tree->chroma_ref_info);
+        ptree_luma, template_tree, &pc_tree->chroma_ref_info);
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
@@ -5476,34 +6951,7 @@
 #endif
 
   int luma_split_flag = 0;
-#if CONFIG_EXT_RECUR_PARTITIONS
-  int horz_3_allowed_sdp = 1;
-  int vert_3_allowed_sdp = 1;
-  if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) {
-    PARTITION_TYPE derived_partition_mode = sdp_chroma_part_from_luma(
-        bsize, ptree_luma->partition, part_search_state.ss_x,
-        part_search_state.ss_y);
-
-    if (derived_partition_mode != PARTITION_NONE)
-      part_search_state.partition_none_allowed = BLOCK_INVALID;
-    if (derived_partition_mode != PARTITION_HORZ)
-      part_search_state.partition_rect_allowed[HORZ] = 0;
-    if (derived_partition_mode != PARTITION_VERT)
-      part_search_state.partition_rect_allowed[VERT] = 0;
-    if (derived_partition_mode != PARTITION_HORZ_3) horz_3_allowed_sdp = 0;
-    if (derived_partition_mode != PARTITION_VERT_3) vert_3_allowed_sdp = 0;
-
-    // TODO(yuec): Need to make sure there is at least one valid partition
-    // mode
-    assert(IMPLIES(
-        is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize),
-        forced_partition == PARTITION_INVALID ||
-            forced_partition == sdp_chroma_part_from_luma(
-                                    bsize, ptree_luma->partition,
-                                    cpi->common.seq_params.subsampling_x,
-                                    cpi->common.seq_params.subsampling_x)));
-  }
-#else   // CONFIG_EXT_RECUR_PARTITIONS
+#if !CONFIG_EXT_RECUR_PARTITIONS
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
   const int parent_block_width = block_size_wide[bsize];
   if (xd->tree_type == CHROMA_PART && parent_block_width >= SHARED_PART_SIZE) {
@@ -5515,7 +6963,7 @@
     part_search_state.partition_rect_allowed[HORZ] = 0;
     part_search_state.partition_rect_allowed[VERT] = 0;
   }
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
+#endif  // !CONFIG_EXT_RECUR_PARTITIONS
 
   // Partition search
 BEGIN_PARTITION_SEARCH:
@@ -5525,8 +6973,7 @@
   if (x->must_find_valid_partition) {
 #if CONFIG_EXT_RECUR_PARTITIONS
     init_allowed_partitions(&part_search_state, &cpi->oxcf.part_cfg,
-                            &pc_tree->chroma_ref_info, &cm->mi_params,
-                            xd->tree_type);
+                            &pc_tree->chroma_ref_info, xd->tree_type);
 #else
     reset_part_limitations(cpi, &part_search_state);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
@@ -5537,41 +6984,21 @@
 
   // PARTITION_NONE search stage.
   int64_t part_none_rd = INT64_MAX;
-#if CONFIG_C043_MVP_IMPROVEMENTS
-  REF_MV_BANK curr_level_bank = x->e_mbd.ref_mv_bank;
-  REF_MV_BANK best_level_bank = x->e_mbd.ref_mv_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-  WARP_PARAM_BANK curr_level_warp_bank = x->e_mbd.warp_param_bank;
-  WARP_PARAM_BANK best_level_warp_bank = x->e_mbd.warp_param_bank;
-#endif  // WARP_CU_BANK
-#if CONFIG_EXT_RECUR_PARTITIONS
-  if (IS_FORCED_PARTITION_TYPE(PARTITION_NONE) &&
-      (forced_partition == PARTITION_NONE || bsize != BLOCK_256X256)) {
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
+  if (!search_none_after_rect) {
     none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx,
                           &part_search_state, &best_rdc, &pb_source_variance,
                           none_rd, &part_none_rd
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
                           ,
-                          &best_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                          ,
-                          &best_level_warp_bank
-#endif  // WARP_CU_BANK
+                          &level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     );
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    x->e_mbd.ref_mv_bank = curr_level_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    x->e_mbd.warp_param_bank = curr_level_warp_bank;
-#endif  // WARP_CU_BANK
-#if CONFIG_EXT_RECUR_PARTITIONS
   }
 
+#if CONFIG_EXT_RECUR_PARTITIONS
   if (cpi->sf.part_sf.end_part_search_after_consec_failures && x->is_whole_sb &&
-      !frame_is_intra_only(cm) && forced_partition == PARTITION_INVALID &&
+      !frame_is_intra_only(cm) &&
+      part_search_state.forced_partition == PARTITION_INVALID &&
       pc_tree->parent && pc_tree->parent->parent) {
     if (pc_tree->none_rd.rate == INT_MAX &&
         pc_tree->parent->none_rd.rate == INT_MAX &&
@@ -5585,33 +7012,20 @@
 
   // PARTITION_SPLIT search stage.
   int64_t part_split_rd = INT64_MAX;
-  if (IS_FORCED_PARTITION_TYPE(PARTITION_SPLIT) && max_recursion_depth > 0 &&
-      !frame_is_intra_only(cm)) {
-    split_partition_search(cpi, td, tile_data, tp, x, pc_tree, sms_tree, &x_ctx,
-                           &part_search_state, &best_rdc, multi_pass_mode,
-                           &part_split_rd
-#if CONFIG_C043_MVP_IMPROVEMENTS
-                           ,
-                           &best_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                           ,
-                           &best_level_warp_bank
-#endif  // WARP_CU_BANK
+  split_partition_search(cpi, td, tile_data, tp, x, pc_tree, sms_tree, &x_ctx,
+                         &part_search_state, &best_rdc, multi_pass_mode,
+                         &part_split_rd
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                         ,
+                         &level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
 #if CONFIG_EXT_RECUR_PARTITIONS
-                           ,
-                           ptree_luma, template_tree, max_recursion_depth - 1
+                         ,
+                         ptree_luma, template_tree, max_recursion_depth - 1
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
-    );
-  }
-#if CONFIG_C043_MVP_IMPROVEMENTS
-  x->e_mbd.ref_mv_bank = curr_level_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-  x->e_mbd.warp_param_bank = curr_level_warp_bank;
-#endif  // WARP_CU_BANK
-#if !CONFIG_EXT_RECUR_PARTITIONS
+  );
 
+#if !CONFIG_EXT_RECUR_PARTITIONS
   // Terminate partition search for child partition,
   // when NONE and SPLIT partition rd_costs are INT64_MAX.
   if (cpi->sf.part_sf.early_term_after_none_split &&
@@ -5626,15 +7040,18 @@
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
 #if CONFIG_EXT_RECUR_PARTITIONS
   bool prune_none = false;
-  if (forced_partition == PARTITION_INVALID && bsize == BLOCK_256X256) {
+  if (part_search_state.forced_partition == PARTITION_INVALID &&
+      bsize == BLOCK_256X256) {
+    assert(pc_tree->partitioning == PARTITION_SPLIT);
     for (int idx = 0; idx < 4; idx++) {
       const int depth = get_partition_depth(pc_tree->split[idx], 0);
       prune_none |= depth > 0;
     }
   }
   if (cpi->sf.part_sf.prune_rect_with_split_depth && !frame_is_intra_only(cm) &&
-      forced_partition == PARTITION_INVALID && pc_tree->split[0] &&
-      pc_tree->split[1] && pc_tree->split[2] && pc_tree->split[3]) {
+      part_search_state.forced_partition == PARTITION_INVALID &&
+      pc_tree->split[0] && pc_tree->split[1] && pc_tree->split[2] &&
+      pc_tree->split[3]) {
     int min_depth = INT_MAX, max_depth = 0;
     for (int idx = 0; idx < 4; idx++) {
       const int depth = get_partition_depth(pc_tree->split[idx], 0);
@@ -5649,52 +7066,35 @@
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 #if CONFIG_EXT_RECUR_PARTITIONS
-  if (forced_partition == PARTITION_INVALID && bsize == BLOCK_256X256 &&
-      !prune_none) {
+  bool none_searched = false;
+  if (part_search_state.forced_partition == PARTITION_INVALID &&
+      bsize == BLOCK_256X256 && !prune_none) {
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
     none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx,
                           &part_search_state, &best_rdc, &pb_source_variance,
                           none_rd, &part_none_rd
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
                           ,
-                          &best_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                          ,
-                          &best_level_warp_bank
-#endif  // WARP_CU_BANK
+                          &level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     );
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    x->e_mbd.ref_mv_bank = curr_level_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    x->e_mbd.warp_param_bank = curr_level_warp_bank;
-#endif  // WARP_CU_BANK
 #if CONFIG_EXT_RECUR_PARTITIONS
+    none_searched = true;
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
   // Rectangular partitions search stage.
+  rectangular_partition_search(
+      cpi, td, tile_data, tp, x, pc_tree, &x_ctx, &part_search_state, &best_rdc,
 #if CONFIG_EXT_RECUR_PARTITIONS
-  if (max_recursion_depth > 0) {
+      multi_pass_mode, ptree_luma, template_tree, max_recursion_depth - 1,
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
-    rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
-                                 &part_search_state, &best_rdc,
-#if CONFIG_EXT_RECUR_PARTITIONS
-                                 multi_pass_mode, ptree_luma, template_tree,
-                                 max_recursion_depth - 1,
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
-                                 rect_part_win_info,
-#if CONFIG_C043_MVP_IMPROVEMENTS
-                                 &best_level_bank, &curr_level_bank,
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                                 &best_level_warp_bank, &curr_level_warp_bank,
-#endif  // WARP_CU_BANK
-                                 part_none_rd);
-#if CONFIG_EXT_RECUR_PARTITIONS
-  }
-#endif  // CONFIG_EXT_RECUR_PARTITIONS
+      rect_part_win_info,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+      &level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+      part_none_rd);
+
   if (pb_source_variance == UINT_MAX) {
     av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, NULL);
     pb_source_variance = av1_high_get_sby_perpixel_variance(
@@ -5703,6 +7103,19 @@
 
   assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
                  !part_search_state.do_rectangular_split));
+#if CONFIG_EXT_RECUR_PARTITIONS
+  if (search_none_after_rect && !none_searched) {
+    prune_none_with_rect_results(&part_search_state, pc_tree);
+    none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx,
+                          &part_search_state, &best_rdc, &pb_source_variance,
+                          none_rd, &part_none_rd
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                          ,
+                          &level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+    );
+  }
+#endif  // CONFIG_EXT_RECUR_PARTITIONS
 
 #if !CONFIG_EXT_RECUR_PARTITIONS
   const int ext_partition_allowed =
@@ -5714,14 +7127,10 @@
   ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
                        &part_search_state, &best_rdc, rect_part_win_info,
                        pb_source_variance, ext_partition_allowed
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
                        ,
-                       &best_level_bank, &curr_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                       ,
-                       &best_level_warp_bank, &curr_level_warp_bank
-#endif  // WARP_CU_BANK
+                       &level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
   );
 
   // 4-way partitions search stage.
@@ -5754,14 +7163,10 @@
     rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
                        pc_tree->horizontal4, &part_search_state, &best_rdc,
                        inc_step, PARTITION_HORZ_4
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
                        ,
-                       &best_level_bank, &curr_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                       ,
-                       &best_level_warp_bank, &curr_level_warp_bank
-#endif  // WARP_CU_BANK
+                       &level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     );
   }
 
@@ -5778,132 +7183,88 @@
     rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
                        pc_tree->vertical4, &part_search_state, &best_rdc,
                        inc_step, PARTITION_VERT_4
-#if CONFIG_C043_MVP_IMPROVEMENTS
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
                        ,
-                       &best_level_bank, &curr_level_bank
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-                       ,
-                       &best_level_warp_bank, &curr_level_warp_bank
-#endif  // WARP_CU_BANK
+                       &level_banks
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
     );
   }
 #endif  // !CONFIG_EXT_RECUR_PARTITIONS
 
 #if CONFIG_EXT_RECUR_PARTITIONS
-  const int ext_partition_allowed = !is_partition_implied_at_boundary(
-      &cm->mi_params, xd->tree_type, part_search_state.ss_x,
-      part_search_state.ss_y, mi_row, mi_col, bsize, &pc_tree->chroma_ref_info,
-      NULL);
-  const int partition_3_allowed = ext_partition_allowed &&
-                                  max_recursion_depth > 0 &&
-                                  cpi->oxcf.part_cfg.enable_ext_partitions;
-  const int is_wide_block = block_size_wide[bsize] > block_size_high[bsize];
-  const int is_tall_block = block_size_wide[bsize] < block_size_high[bsize];
-  const PARTITION_SPEED_FEATURES *part_sf = &cpi->sf.part_sf;
-
-  int horz_3_allowed =
-      partition_3_allowed && !is_wide_block && horz_3_allowed_sdp &&
-      check_is_chroma_size_valid(xd->tree_type, PARTITION_HORZ_3, bsize, mi_row,
-                                 mi_col, part_search_state.ss_x,
-                                 part_search_state.ss_y,
-                                 &pc_tree->chroma_ref_info) &&
-      is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ_3),
-                   blk_params.min_partition_size);
-  // Prune horz 3 with speed features
-  if (horz_3_allowed && !frame_is_intra_only(cm) &&
-      forced_partition != PARTITION_HORZ_3) {
-    if (part_sf->prune_part_3_with_part_none &&
-        pc_tree->partitioning == PARTITION_NONE) {
-      // Prune if the best partition does not split
-      horz_3_allowed = 0;
-    }
-    if (part_sf->prune_part_3_with_part_rect &&
-        pc_tree->partitioning == PARTITION_HORZ &&
-        !node_uses_horz(pc_tree->horizontal[0]) &&
-        !node_uses_horz(pc_tree->horizontal[1])) {
-      // Prune if the best partition is horz but horz did not further split in
-      // horz
-      horz_3_allowed = 0;
-    }
-  }
-
-  int vert_3_allowed =
-      partition_3_allowed && !is_tall_block && vert_3_allowed_sdp &&
-      check_is_chroma_size_valid(xd->tree_type, PARTITION_VERT_3, bsize, mi_row,
-                                 mi_col, part_search_state.ss_x,
-                                 part_search_state.ss_y,
-                                 &pc_tree->chroma_ref_info) &&
-      is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT_3),
-                   blk_params.min_partition_size);
-
-  if (vert_3_allowed && !frame_is_intra_only(cm) &&
-      forced_partition != PARTITION_VERT_3) {
-    if (part_sf->prune_part_3_with_part_none &&
-        pc_tree->partitioning == PARTITION_NONE) {
-      // Prune if the best partition does not split
-      vert_3_allowed = 0;
-    }
-    if (part_sf->prune_part_3_with_part_rect &&
-        pc_tree->partitioning == PARTITION_VERT &&
-        !node_uses_vert(pc_tree->vertical[0]) &&
-        !node_uses_vert(pc_tree->vertical[1])) {
-      // Prune if the best partition is vert but vert did not further split in
-      // vert
-      vert_3_allowed = 0;
-    }
-  }
+  bool partition_boundaries[MAX_MIB_SQUARE] = { 0 };
+  prune_ext_partitions_3way(cpi, pc_tree, &part_search_state,
+                            partition_boundaries);
 
   const int ext_recur_depth =
       AOMMIN(max_recursion_depth - 1, cpi->sf.part_sf.ext_recur_depth);
+  const bool track_ptree_luma =
+      is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize);
 
   // PARTITION_HORZ_3
-  if (IS_FORCED_PARTITION_TYPE(PARTITION_HORZ_3) && horz_3_allowed) {
-    search_partition_horz_3(
-        &part_search_state, cpi, td, tile_data, tp, &best_rdc, pc_tree,
-        (ptree_luma && ptree_luma->partition == PARTITION_HORZ_3) ? ptree_luma
-                                                                  : NULL,
-
-        template_tree, &x_ctx,
-#if CONFIG_C043_MVP_IMPROVEMENTS
-        &best_level_bank,
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-        &best_level_warp_bank,
-#endif  // WARP_CU_BANK
-        multi_pass_mode, ext_recur_depth);
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    x->e_mbd.ref_mv_bank = curr_level_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    x->e_mbd.warp_param_bank = curr_level_warp_bank;
-#endif  // WARP_CU_BANK
-  }
+  search_partition_horz_3(&part_search_state, cpi, td, tile_data, tp, &best_rdc,
+                          pc_tree, track_ptree_luma ? ptree_luma : NULL,
+                          template_tree, &x_ctx, &part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                          &level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                          multi_pass_mode, ext_recur_depth);
 
   // PARTITION_VERT_3
-  if (IS_FORCED_PARTITION_TYPE(PARTITION_VERT_3) && vert_3_allowed) {
-    search_partition_vert_3(
-        &part_search_state, cpi, td, tile_data, tp, &best_rdc, pc_tree,
-        (ptree_luma && ptree_luma->partition == PARTITION_VERT_3) ? ptree_luma
-                                                                  : NULL,
-
-        template_tree, &x_ctx,
-#if CONFIG_C043_MVP_IMPROVEMENTS
-        &best_level_bank,
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-        &best_level_warp_bank,
-#endif  // WARP_CU_BANK
-        multi_pass_mode, ext_recur_depth);
-#if CONFIG_C043_MVP_IMPROVEMENTS
-    x->e_mbd.ref_mv_bank = curr_level_bank;
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
-#if WARP_CU_BANK
-    x->e_mbd.warp_param_bank = curr_level_warp_bank;
-#endif  // WARP_CU_BANK
-  }
+  search_partition_vert_3(&part_search_state, cpi, td, tile_data, tp, &best_rdc,
+                          pc_tree, track_ptree_luma ? ptree_luma : NULL,
+                          template_tree, &x_ctx, &part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                          &level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                          multi_pass_mode, ext_recur_depth);
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 
+#if CONFIG_UNEVEN_4WAY
+  prune_ext_partitions_4way(cpi, pc_tree, &part_search_state,
+                            partition_boundaries);
+
+  // PARTITION_HORZ_4A
+  search_partition_horz_4a(&part_search_state, cpi, td, tile_data, tp,
+                           &best_rdc, pc_tree,
+                           track_ptree_luma ? ptree_luma : NULL, template_tree,
+                           &x_ctx, &part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                           &level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                           multi_pass_mode, ext_recur_depth);
+
+  // PARTITION_HORZ_4B
+  search_partition_horz_4b(&part_search_state, cpi, td, tile_data, tp,
+                           &best_rdc, pc_tree,
+                           track_ptree_luma ? ptree_luma : NULL, template_tree,
+                           &x_ctx, &part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                           &level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                           multi_pass_mode, ext_recur_depth);
+
+  // PARTITION_VERT_4A
+  search_partition_vert_4a(&part_search_state, cpi, td, tile_data, tp,
+                           &best_rdc, pc_tree,
+                           track_ptree_luma ? ptree_luma : NULL, template_tree,
+                           &x_ctx, &part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                           &level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                           multi_pass_mode, ext_recur_depth);
+
+  // PARTITION_VERT_4B
+  search_partition_vert_4b(&part_search_state, cpi, td, tile_data, tp,
+                           &best_rdc, pc_tree,
+                           track_ptree_luma ? ptree_luma : NULL, template_tree,
+                           &x_ctx, &part_search_state,
+#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                           &level_banks,
+#endif  // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK
+                           multi_pass_mode, ext_recur_depth);
+#endif  // CONFIG_UNEVEN_4WAY
+
   if (bsize == cm->sb_size && !part_search_state.found_best_partition) {
     if (x->must_find_valid_partition) {
       aom_internal_error(
@@ -5924,23 +7285,27 @@
       pc_tree->partitioning != template_tree->partition) {
     assert(0);
     printf("Mismatch with template at fr: %d, mi: (%d, %d), BLOCK_%dX%d\n",
-           cm->current_frame.order_hint, mi_row, mi_col, block_size_wide[bsize],
-           block_size_high[bsize]);
+#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+           cm->current_frame.display_order_hint,
+#else
+           cm->current_frame.order_hint,
+#endif  // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC
+           mi_row, mi_col, block_size_wide[bsize], block_size_high[bsize]);
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS && !defined(NDEBUG)
 
   // Store the final rd cost
   *rd_cost = best_rdc;
-#if CONFIG_C043_MVP_IMPROVEMENTS
-  x->e_mbd.ref_mv_bank = best_level_bank;
+#if CONFIG_MVP_IMPROVEMENT
+  x->e_mbd.ref_mv_bank = level_banks.best_level_bank;
 #if CONFIG_EXT_RECUR_PARTITIONS
-  pc_tree->ref_mv_bank = best_level_bank;
+  pc_tree->ref_mv_bank = level_banks.best_level_bank;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
-#endif  // CONFIG_C043_MVP_IMPROVEMENTS
+#endif  // CONFIG_MVP_IMPROVEMENT
 #if WARP_CU_BANK
-  x->e_mbd.warp_param_bank = best_level_warp_bank;
+  x->e_mbd.warp_param_bank = level_banks.best_level_warp_bank;
 #if CONFIG_EXT_RECUR_PARTITIONS
-  pc_tree->warp_param_bank = best_level_warp_bank;
+  pc_tree->warp_param_bank = level_banks.best_level_warp_bank;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 #endif  // WARP_CU_BANK
   pc_tree->rd_cost = best_rdc;
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c
index 89fb2d7..cc8ea99 100644
--- a/av1/encoder/partition_strategy.c
+++ b/av1/encoder/partition_strategy.c
@@ -253,6 +253,9 @@
     int *partition_horz_allowed, int *partition_vert_allowed,
     int *do_rectangular_split, int *do_square_split) {
   aom_clear_system_state();
+  (void)partition_horz_allowed;
+  (void)partition_vert_allowed;
+  (void)do_rectangular_split;
 
   const AV1_COMMON *const cm = &cpi->common;
   const int bsize_idx = convert_bsize_to_idx(bsize);
@@ -511,7 +514,11 @@
 void av1_simple_motion_search_prune_rect(
     AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
     int mi_row, int mi_col, BLOCK_SIZE bsize, int partition_horz_allowed,
-    int partition_vert_allowed, int *prune_horz, int *prune_vert) {
+    int partition_vert_allowed, bool *prune_horz, bool *prune_vert) {
+  // TODO(urvang): Need to change for CONFIG_UNEVEN_4WAY.
+#if CONFIG_UNEVEN_4WAY
+  assert(0 && "Not implemented");
+#endif  // CONFIG_UNEVEN_4WAY
   aom_clear_system_state();
   const AV1_COMMON *const cm = &cpi->common;
   const int bsize_idx = convert_bsize_to_idx(bsize);
@@ -924,8 +931,8 @@
 void av1_ml_prune_rect_partition(const AV1_COMP *const cpi,
                                  const MACROBLOCK *const x, BLOCK_SIZE bsize,
                                  int64_t best_rd, int64_t none_rd,
-                                 int64_t *split_rd, int *const dst_prune_horz,
-                                 int *const dst_prune_vert) {
+                                 int64_t *split_rd, bool *const dst_prune_horz,
+                                 bool *const dst_prune_vert) {
   if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
   best_rd = AOMMAX(best_rd, 1);
   const NN_CONFIG *nn_config = NULL;
@@ -1091,6 +1098,7 @@
   }
 }
 
+#if !CONFIG_EXT_RECUR_PARTITIONS
 #define FEATURES 18
 #define LABELS 4
 // Use a ML model to predict if horz4 and vert4 should be considered.
@@ -1229,6 +1237,8 @@
 #undef FEATURES
 #undef LABELS
 
+#endif  // !CONFIG_EXT_RECUR_PARTITIONS
+
 #define FEATURES 4
 int av1_ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
                             const MACROBLOCK *const x,
@@ -1300,7 +1310,7 @@
     BLOCK_SIZE bsize, SIMPLE_MOTION_DATA_TREE *const sms_tree,
     int *partition_none_allowed, int *partition_horz_allowed,
     int *partition_vert_allowed, int *do_rectangular_split,
-    int *do_square_split, int *prune_horz, int *prune_vert,
+    int *do_square_split, bool *prune_horz, bool *prune_vert,
     const PC_TREE *pc_tree) {
   const AV1_COMMON *const cm = &cpi->common;
   const CommonModeInfoParams *const mi_params = &cm->mi_params;
@@ -1342,27 +1352,15 @@
         do_square_split);
 #if CONFIG_EXT_RECUR_PARTITIONS
     if (!*partition_none_allowed) {
-      if (!pc_tree->parent || pc_tree != pc_tree->parent->horizontal3[1]) {
-        av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, bsize,
-                                 cm->sb_size, PARTITION_HORZ);
-        const int mi_step = block_size_high[bsize] / 2;
-        BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
-        av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, subsize,
-                                 cm->sb_size, PARTITION_VERT);
-        av1_cache_best_partition(x->sms_bufs, mi_row + mi_step, mi_col, subsize,
-                                 cm->sb_size, PARTITION_VERT);
-      } else if (pc_tree != pc_tree->parent->vertical[1]) {
-        av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, bsize,
-                                 cm->sb_size, PARTITION_VERT);
-        const int mi_step = block_size_wide[bsize] / 2;
-        BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
-        av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, subsize,
-                                 cm->sb_size, PARTITION_HORZ);
-        av1_cache_best_partition(x->sms_bufs, mi_row, mi_col + mi_step, subsize,
-                                 cm->sb_size, PARTITION_HORZ);
-      }
+      av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, bsize, cm->sb_size,
+                               PARTITION_HORZ);
+      const int mi_step = block_size_high[bsize] / 2;
+      BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
+      av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, subsize,
+                               cm->sb_size, PARTITION_VERT);
+      av1_cache_best_partition(x->sms_bufs, mi_row + mi_step, mi_col, subsize,
+                               cm->sb_size, PARTITION_VERT);
     }
-#else
     (void)pc_tree;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
   }
@@ -1631,6 +1629,24 @@
 }
 
 // Gets the linear index corresponds to the current block.
+
+#if CONFIG_UNEVEN_4WAY
+static INLINE int get_sms_arr_1d_idx(int mi_bsize, int mi_in_sb) {
+  int idx = -1;
+  if (mi_bsize <= 2) {
+    idx = mi_in_sb;
+  } else if (mi_bsize <= 8) {
+    assert(mi_in_sb % (mi_bsize / 4) == 0);
+    idx = mi_in_sb / (mi_bsize / 4);
+  } else {
+    assert(mi_in_sb % (mi_bsize / 2) == 0);
+    idx = mi_in_sb / (mi_bsize / 2);
+  }
+  assert(idx >= 0 && idx < get_sms_count_from_length(mi_bsize));
+
+  return idx;
+}
+#else
 static INLINE int get_sms_arr_1d_idx(int mi_bsize, int mi_in_sb) {
   int idx = -1;
   if (mi_bsize == 1) {
@@ -1643,6 +1659,7 @@
 
   return idx;
 }
+#endif  // CONFIG_UNEVEN_4WAY
 
 #define MAKE_SMS_ARR_SWITCH_CASE(width, height) \
   case BLOCK_##width##X##height: {              \
@@ -1817,42 +1834,59 @@
     SimpleMotionDataBufs *sms_bufs, int mi_row, int mi_col, BLOCK_SIZE bsize,
     BLOCK_SIZE sb_size, PARTITION_TYPE partition, MV start_mv) {
   assert(bsize < BLOCK_SIZES_ALL);
-  const int quarter_step_h = block_size_high[bsize] / 4;
-  const int quarter_step_w = block_size_wide[bsize] / 4;
+  const int eighth_step_h = block_size_high[bsize] / 8;
+  const int eighth_step_w = block_size_wide[bsize] / 8;
   static const int subblock_count[ALL_PARTITION_TYPES] = {
     1,  // PARTITION_NONE
     2,  // PARTITION_HORZ
     2,  // PARTITION_VERT
-#if CONFIG_H_PARTITION
     4,  // PARTITION_HORZ_3
     4,  // PARTITION_VERT_3
-#else
-    3,                                           // PARTITION_HORZ_3
-    3,                                           // PARTITION_VERT_3
-#endif  // CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+    4,  // PARTITION_HORZ_4A
+    4,  // PARTITION_HORZ_4B
+    4,  // PARTITION_VERT_4A
+    4,  // PARTITION_VERT_4B
+#endif  // CONFIG_UNEVEN_4WAY
     4,  // PARTITION_SPLIT
   };
   // PARTITION x NUM_SUBBLOCKS x (ROW and COL)
   static const int step_multiplier[ALL_PARTITION_TYPES][4][2] = {
     { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } },  // PARTITION_NONE
-    { { 0, 0 }, { 2, 0 }, { 0, 0 }, { 0, 0 } },  // PARTITION_HORZ
-    { { 0, 0 }, { 0, 2 }, { 0, 0 }, { 0, 0 } },  // PARTITION_VERT
-#if CONFIG_H_PARTITION
-    { { 0, 0 }, { 1, 0 }, { 1, 2 }, { 3, 0 } },  // PARTITION_HORZ_3
-    { { 0, 0 }, { 0, 1 }, { 2, 1 }, { 0, 3 } },  // PARTITION_VERT_3
-#else
-    { { 0, 0 }, { 1, 0 }, { 3, 0 }, { 0, 0 } },  // PARTITION_HORZ_3
-    { { 0, 0 }, { 0, 1 }, { 0, 3 }, { 0, 0 } },  // PARTITION_VERT_3
-#endif                                           // CONFIG_H_PARTITION
-    { { 0, 0 }, { 0, 2 }, { 2, 0 }, { 2, 2 } },  // PARTITION_SPLIT
+    { { 0, 0 }, { 4, 0 }, { 0, 0 }, { 0, 0 } },  // PARTITION_HORZ
+    { { 0, 0 }, { 0, 4 }, { 0, 0 }, { 0, 0 } },  // PARTITION_VERT
+    { { 0, 0 }, { 2, 0 }, { 2, 4 }, { 6, 0 } },  // PARTITION_HORZ_3
+    { { 0, 0 }, { 0, 2 }, { 4, 2 }, { 0, 6 } },  // PARTITION_VERT_3
+#if CONFIG_UNEVEN_4WAY
+    { { 0, 0 }, { 1, 0 }, { 3, 0 }, { 7, 0 } },  // PARTITION_HORZ_4A
+    { { 0, 0 }, { 1, 0 }, { 5, 0 }, { 7, 0 } },  // PARTITION_HORZ_4B
+    { { 0, 0 }, { 0, 1 }, { 0, 3 }, { 0, 7 } },  // PARTITION_VERT_4A
+    { { 0, 0 }, { 0, 1 }, { 0, 5 }, { 0, 7 } },  // PARTITION_VERT_4B
+#endif                                           // CONFIG_UNEVEN_4WAY
+    { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } },  // PARTITION_SPLIT
   };
 
+  // Sizes of subblocks.
   const BLOCK_SIZE part_subsize = get_partition_subsize(bsize, partition);
   if (part_subsize == BLOCK_INVALID) return;
 
   BLOCK_SIZE subsizes[4] = { part_subsize, part_subsize, part_subsize,
                              part_subsize };
-#if CONFIG_H_PARTITION
+#if CONFIG_UNEVEN_4WAY
+  if (partition == PARTITION_HORZ_4A) {
+    subsizes[2] = get_partition_subsize(bsize, PARTITION_HORZ);
+    subsizes[1] = get_partition_subsize(subsizes[2], PARTITION_HORZ);
+  } else if (partition == PARTITION_HORZ_4B) {
+    subsizes[1] = get_partition_subsize(bsize, PARTITION_HORZ);
+    subsizes[2] = get_partition_subsize(subsizes[1], PARTITION_HORZ);
+  } else if (partition == PARTITION_VERT_4A) {
+    subsizes[2] = get_partition_subsize(bsize, PARTITION_VERT);
+    subsizes[1] = get_partition_subsize(subsizes[2], PARTITION_VERT);
+  } else if (partition == PARTITION_VERT_4B) {
+    subsizes[1] = get_partition_subsize(bsize, PARTITION_VERT);
+    subsizes[2] = get_partition_subsize(subsizes[1], PARTITION_VERT);
+  }
+#endif  // CONFIG_UNEVEN_4WAY
   if (partition == PARTITION_HORZ_3) {
     subsizes[1] = get_h_partition_subsize(sb_size, 1, PARTITION_HORZ_3);
     subsizes[2] = get_h_partition_subsize(sb_size, 2, PARTITION_HORZ_3);
@@ -1860,12 +1894,12 @@
     subsizes[1] = get_h_partition_subsize(sb_size, 1, PARTITION_VERT_3);
     subsizes[2] = get_h_partition_subsize(sb_size, 2, PARTITION_VERT_3);
   }
-#endif  // CONFIG_H_PARTITION
+
   for (int idx = 0; idx < subblock_count[partition]; idx++) {
     const int sub_row =
-        mi_row + step_multiplier[partition][idx][0] * quarter_step_h / 4;
+        mi_row + step_multiplier[partition][idx][0] * eighth_step_h / 4;
     const int sub_col =
-        mi_col + step_multiplier[partition][idx][1] * quarter_step_w / 4;
+        mi_col + step_multiplier[partition][idx][1] * eighth_step_w / 4;
     SimpleMotionData *subblock = av1_get_sms_data_entry(
         sms_bufs, sub_row, sub_col, subsizes[idx], sb_size);
     add_start_mv_to_block(subblock, start_mv);
@@ -1989,15 +2023,10 @@
 
   // Whether we are in the middle of a PARTITION_3 subblock
   const PC_TREE *parent = pc_tree->parent;
-#if CONFIG_H_PARTITION
   ml_features[num_features++] = parent && (parent->horizontal3[1] == pc_tree ||
                                            parent->horizontal3[2] == pc_tree);
   ml_features[num_features++] = parent && (parent->vertical3[1] == pc_tree ||
                                            parent->vertical3[2] == pc_tree);
-#else
-  ml_features[num_features++] = parent && parent->horizontal3[1] == pc_tree;
-  ml_features[num_features++] = parent && parent->vertical3[1] == pc_tree;
-#endif  // CONFIG_H_PARTITION
   assert(num_features == 19);
 }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
diff --git a/av1/encoder/partition_strategy.h b/av1/encoder/partition_strategy.h
index 4c59fb9..63fc974 100644
--- a/av1/encoder/partition_strategy.h
+++ b/av1/encoder/partition_strategy.h
@@ -87,7 +87,7 @@
 void av1_simple_motion_search_prune_rect(
     AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree,
     int mi_row, int mi_col, BLOCK_SIZE bsize, int partition_horz_allowed,
-    int partition_vert_allowed, int *prune_horz, int *prune_vert);
+    int partition_vert_allowed, bool *prune_horz, bool *prune_vert);
 
 // Early terminates PARTITION_NONE using simple_motion_search features and the
 // rate, distortion, and rdcost of PARTITION_NONE. This is only called when:
@@ -130,8 +130,8 @@
 void av1_ml_prune_rect_partition(const AV1_COMP *const cpi,
                                  const MACROBLOCK *const x, BLOCK_SIZE bsize,
                                  int64_t best_rd, int64_t none_rd,
-                                 int64_t *split_rd, int *const dst_prune_horz,
-                                 int *const dst_prune_vert);
+                                 int64_t *split_rd, bool *const dst_prune_horz,
+                                 bool *const dst_prune_vert);
 
 // Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be
 // considered.
@@ -165,7 +165,7 @@
     BLOCK_SIZE bsize, SIMPLE_MOTION_DATA_TREE *const sms_tree,
     int *partition_none_allowed, int *partition_horz_allowed,
     int *partition_vert_allowed, int *do_rectangular_split,
-    int *do_square_split, int *prune_horz, int *prune_vert,
+    int *do_square_split, bool *prune_horz, bool *prune_vert,
     const PC_TREE *pc_tree);
 
 // Prune out partitions that lead to coding block sizes outside the min and max
diff --git a/av1/encoder/pickccso.c b/av1/encoder/pickccso.c
index 2653a09..1dd7be9 100644
--- a/av1/encoder/pickccso.c
+++ b/av1/encoder/pickccso.c
@@ -281,12 +281,13 @@
   return ssd;
 }
 /* Compute SSE */
-void compute_distortion(const uint16_t *org, const int org_stride,
-                        const uint16_t *rec16, const int rec_stride,
-                        const int log2_filter_unit_size, const int height,
-                        const int width, uint64_t *distortion_buf,
-                        const int distortion_buf_stride,
-                        uint64_t *total_distortion) {
+static void compute_distortion(const uint16_t *org, const int org_stride,
+                               const uint16_t *rec16, const int rec_stride,
+                               const int log2_filter_unit_size,
+                               const int height, const int width,
+                               uint64_t *distortion_buf,
+                               const int distortion_buf_stride,
+                               uint64_t *total_distortion) {
   for (int y = 0; y < height; y += (1 << log2_filter_unit_size)) {
     for (int x = 0; x < width; x += (1 << log2_filter_unit_size)) {
       const uint64_t ssd =
diff --git a/av1/encoder/pickccso.h b/av1/encoder/pickccso.h
index 725311d..d455980 100644
--- a/av1/encoder/pickccso.h
+++ b/av1/encoder/pickccso.h
@@ -46,13 +46,6 @@
                             const uint8_t shift_bits);
 #endif
 
-void compute_distortion(const uint16_t *org, const int org_stride,
-                        const uint16_t *rec16, const int rec_stride,
-                        const int log2_filter_unit_size, const int height,
-                        const int width, uint64_t *distortion_buf,
-                        const int distortion_buf_stride,
-                        uint64_t *total_distortion);
-
 void derive_ccso_filter(AV1_COMMON *cm, const int plane, MACROBLOCKD *xd,
                         const uint16_t *org_uv, const uint16_t *ext_rec_y,
                         const uint16_t *rec_uv, int rdmult);
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c
index 113bf17..452ab8d 100644
--- a/av1/encoder/picklpf.c
+++ b/av1/encoder/picklpf.c
@@ -321,6 +321,17 @@
 
   cpi->td.mb.rdmult = cpi->rd.RDMULT;
 
+  double no_deblocking_cost[MAX_MB_PLANE] = { DBL_MAX, DBL_MAX, DBL_MAX };
+
+  for (int i = 0; i < num_planes; i++) {
+    const int chroma_lambda_mult = i ? CHROMA_LAMBDA_MULT : 1;
+    const int64_t no_deblocking_sse =
+        aom_get_sse_plane(cpi->source, &cm->cur_frame->buf, i);
+    no_deblocking_cost[i] = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+        cpi->td.mb.rdmult * chroma_lambda_mult, 0, no_deblocking_sse,
+        cm->seq_params.bit_depth);
+  }
+
   if (method == LPF_PICK_MINIMAL_LPF) {
     lf->filter_level[0] = 0;
     lf->filter_level[1] = 0;
@@ -416,7 +427,12 @@
     last_frame_offsets[2] = lf->delta_q_luma[1] = lf->delta_side_luma[1];
 #endif  // DF_TWO_PARAM
 
-    if (best_single_cost < best_dual_cost) {
+    if (no_deblocking_cost[0] < AOMMIN(best_single_cost, best_dual_cost)) {
+      lf->filter_level[0] = 0;
+      lf->filter_level[1] = 0;
+      lf->delta_q_luma[0] = lf->delta_side_luma[0] = lf->delta_q_luma[1] =
+          lf->delta_side_luma[1] = 0;
+    } else if (best_single_cost < best_dual_cost) {
       lf->delta_q_luma[0] = last_frame_offsets[0] = best_single_offsets[0];
       lf->delta_side_luma[0] = last_frame_offsets[1] = best_single_offsets[1];
       lf->delta_q_luma[1] = last_frame_offsets[2] = best_single_offsets[2];
@@ -424,10 +440,12 @@
     }
 
     if (num_planes > 1) {
+      double best_cost_u = DBL_MAX;
+      double best_cost_v = DBL_MAX;
       // Cb
       last_frame_offsets[5] = lf->delta_side_u =
           search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
-                                last_frame_offsets, NULL, 1, 1, dir);
+                                last_frame_offsets, &best_cost_u, 1, 1, dir);
 #if DF_TWO_PARAM
       last_frame_offsets[4] = lf->delta_q_u =
           search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
@@ -438,7 +456,7 @@
 
       last_frame_offsets[5] = lf->delta_side_u =
           search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
-                                last_frame_offsets, NULL, 1, 1, dir);
+                                last_frame_offsets, &best_cost_u, 1, 1, dir);
 #if DF_TWO_PARAM
       last_frame_offsets[4] = lf->delta_q_u =
           search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
@@ -446,10 +464,16 @@
 #else
       last_frame_offsets[4] = lf->delta_q_u = lf->delta_side_u;
 #endif  // DF_TWO_PARAM
+
+      if (no_deblocking_cost[1] < best_cost_u) {
+        lf->filter_level_u = 0;
+        lf->delta_q_u = lf->delta_side_u = 0;
+      }
+
       // Cr
       last_frame_offsets[7] = lf->delta_side_v =
           search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
-                                last_frame_offsets, NULL, 2, 1, dir);
+                                last_frame_offsets, &best_cost_v, 2, 1, dir);
 #if DF_TWO_PARAM
       last_frame_offsets[6] = lf->delta_q_v =
           search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
@@ -459,7 +483,7 @@
 #endif  // DF_TWO_PARAM
       last_frame_offsets[7] = lf->delta_side_v =
           search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
-                                last_frame_offsets, NULL, 2, 1, dir);
+                                last_frame_offsets, &best_cost_v, 2, 1, dir);
 #if DF_TWO_PARAM
       last_frame_offsets[6] = lf->delta_q_v =
           search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
@@ -468,6 +492,11 @@
       last_frame_offsets[6] = lf->delta_q_v = lf->delta_side_v;
 #endif  // DF_TWO_PARAM
 
+      if (no_deblocking_cost[2] < best_cost_v) {
+        lf->filter_level_v = 0;
+        lf->delta_q_v = lf->delta_side_v = 0;
+      }
+
       // to switch off filters if offsets are zero
       if (!df_quant_from_qindex(cm->quant_params.base_qindex +
                                     cm->lf.delta_q_luma[0] * DF_DELTA_SCALE,
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c
index 2e7c780..5c49b56 100644
--- a/av1/encoder/pickrst.c
+++ b/av1/encoder/pickrst.c
@@ -180,6 +180,10 @@
   Vector *unit_indices;
 #endif  // CONFIG_LR_MERGE_COEFFS
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  // To indicate whether it's encoder process for cross-component wiener filter
+  bool is_cross_filter_round;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   AV1PixelRect tile_rect;
 } RestSearchCtxt;
 
@@ -224,12 +228,17 @@
 #endif  // CONFIG_LR_MERGE_COEFFS
 
 static AOM_INLINE void reset_all_banks(RestSearchCtxt *rsc) {
-  av1_reset_wiener_bank(&rsc->wiener_bank);
+  av1_reset_wiener_bank(&rsc->wiener_bank, rsc->plane != AOM_PLANE_Y);
   av1_reset_sgrproj_bank(&rsc->sgrproj_bank);
 #if CONFIG_WIENER_NONSEP
   av1_reset_wienerns_bank(&rsc->wienerns_bank,
                           rsc->cm->quant_params.base_qindex,
-                          rsc->num_filter_classes, rsc->plane != AOM_PLANE_Y);
+                          rsc->num_filter_classes, rsc->plane != AOM_PLANE_Y
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                          ,
+                          rsc->is_cross_filter_round
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  );
 #endif  // CONFIG_WIENER_NONSEP
 }
 
@@ -302,13 +311,34 @@
   // TODO(yunqing): For now, only use optimized LR filter in decoder. Can be
   // also used in encoder.
   const int optimized_lr = 0;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  if (rsc->is_cross_filter_round) {
+    // copy the pre-filtered data to dst buffer, this implementation could be
+    // improved
+    int unit_h = limits->v_end - limits->v_start;
+    int unit_w = limits->h_end - limits->h_start;
+    uint16_t *data_tl = fts->buffers[plane] +
+                        limits->v_start * fts->strides[is_uv] + limits->h_start;
+    uint16_t *dst_tl = rsc->dst->buffers[plane] +
+                       limits->v_start * rsc->dst->strides[is_uv] +
+                       limits->h_start;
+    copy_tile(unit_w, unit_h, data_tl, fts->strides[is_uv], dst_tl,
+              rsc->dst->strides[is_uv]);
 
-  av1_loop_restoration_filter_unit(
-      limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
-      is_uv && cm->seq_params.subsampling_x,
-      is_uv && cm->seq_params.subsampling_y, bit_depth, fts->buffers[plane],
-      fts->strides[is_uv], rsc->dst->buffers[plane], rsc->dst->strides[is_uv],
-      cm->rst_tmpbuf, optimized_lr);
+    av1_wiener_ns_cross_filter_unit(
+        limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
+        is_uv && cm->seq_params.subsampling_x,
+        is_uv && cm->seq_params.subsampling_y, bit_depth, fts->buffers[plane],
+        fts->strides[is_uv], rsc->dst->buffers[plane], rsc->dst->strides[is_uv],
+        cm->rst_tmpbuf, optimized_lr);
+  } else
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    av1_loop_restoration_filter_unit(
+        limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
+        is_uv && cm->seq_params.subsampling_x,
+        is_uv && cm->seq_params.subsampling_y, bit_depth, fts->buffers[plane],
+        fts->strides[is_uv], rsc->dst->buffers[plane], rsc->dst->strides[is_uv],
+        cm->rst_tmpbuf, optimized_lr);
 
   return sse_restoration_unit(limits, rsc->src, rsc->dst, plane);
 }
@@ -637,7 +667,7 @@
     // Iterate over the stripe in blocks of width pu_width
     for (int j = 0; j < width; j += pu_width) {
       const int w = AOMMIN(pu_width, width - j);
-      const int ret = av1_selfguided_restoration(
+      const int ret = av1_selfguided_restoration_c(
           dat_row + j, w, h, dat_stride, flt0_row + j, flt1_row + j, flt_stride,
           sgr_params_idx, bit_depth);
       (void)ret;
@@ -1685,6 +1715,9 @@
                                             const AV1PixelRect *tile,
                                             RestorationUnitInfo *rui) {
   int64_t err = 0;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  if (rsc->is_cross_filter_round) rui->wienerns_cross_info = rui->wienerns_info;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #if CONFIG_LR_MERGE_COEFFS
   if (limits != NULL) {
     err = try_restoration_unit(rsc, limits, tile, rui);
@@ -1749,7 +1782,12 @@
 #ifndef NDEBUG
     {
       const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters(
-          rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y);
+          rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+          ,
+          rsc->is_cross_filter_round
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      );
       assert(check_wienerns_eq(&rui->wienerns_info, &last_unit_filters,
                                nsfilter_params->ncoeffs, ALL_WIENERNS_CLASSES));
     }
@@ -2981,7 +3019,11 @@
   }
 
   copy_nsfilter_taps(&rui->wienerns_info, &best);
-
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  if (rsc->is_cross_filter_round) {
+    rui->wienerns_cross_info = rui->wienerns_info;
+  }
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #if CONFIG_LR_MERGE_COEFFS
   (void)count_wienerns_bits_set(rsc->plane, &x->mode_costs, &rui->wienerns_info,
                                 ref_wienerns_bank, nsfilter_params,
@@ -3114,6 +3156,9 @@
 }
 
 static int64_t compute_stats_for_wienerns_filter(
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    RestSearchCtxt *rsc,
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
     const uint16_t *dgd_hbd, const uint16_t *src_hbd,
     const RestorationTileLimits *limits, int dgd_stride, int src_stride,
     const RestorationUnitInfo *rui, int bit_depth, double *A, double *b,
@@ -3137,9 +3182,14 @@
   int is_uv = (rui->plane != AOM_PLANE_Y);
   const int(*wienerns_config2)[3] =
       is_uv ? nsfilter_params->nsfilter_config.config2 : NULL;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  const int end_pixel = is_uv && !rsc->is_cross_filter_round
+                            ? nsfilter_params->nsfilter_config.num_pixels +
+#else
   const int end_pixel = is_uv ? nsfilter_params->nsfilter_config.num_pixels +
-                                    nsfilter_params->nsfilter_config.num_pixels2
-                              : nsfilter_params->nsfilter_config.num_pixels;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+                                  nsfilter_params->nsfilter_config.num_pixels2
+                            : nsfilter_params->nsfilter_config.num_pixels;
 #else
   const int end_pixel = nsfilter_params->nsfilter_config.num_pixels;
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
@@ -3157,8 +3207,14 @@
         memset(buf, 0, sizeof(buf));
         for (int k = 0; k < end_pixel; ++k) {
 #if CONFIG_WIENER_NONSEP_CROSS_FILT
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+          int cross =
+              rsc->is_cross_filter_round ||
+              (is_uv && k >= nsfilter_params->nsfilter_config.num_pixels);
+#else
           const int cross =
               (is_uv && k >= nsfilter_params->nsfilter_config.num_pixels);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #else
           const int cross = 0;
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
@@ -3176,14 +3232,32 @@
                           bit_depth);
           } else {
 #if CONFIG_WIENER_NONSEP_CROSS_FILT
-            const int k2 = k - nsfilter_params->nsfilter_config.num_pixels;
-            const int pos = wienerns_config2[k2][WIENERNS_BUF_POS];
-            const int r = wienerns_config2[k2][WIENERNS_ROW_ID];
-            const int c = wienerns_config2[k2][WIENERNS_COL_ID];
-            buf[pos] += clip_base(
-                (int16_t)luma_hbd[(i + r) * rui->luma_stride + (j + c)] -
-                    (int16_t)luma_hbd[luma_id],
-                bit_depth);
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+            if (rsc->is_cross_filter_round) {
+              const int pos = wienerns_config[k][WIENERNS_BUF_POS];
+              const int r = wienerns_config[k][WIENERNS_ROW_ID];
+              const int c = wienerns_config[k][WIENERNS_COL_ID];
+              int sign = k % 2 ? -1 : 1;
+              buf[pos] +=
+                  clip_base(
+                      (int16_t)luma_hbd[(i + r) * rui->luma_stride + (j + c)] -
+                          (int16_t)luma_hbd[luma_id],
+                      bit_depth) *
+                  sign;
+            } else {
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+              const int k2 = k - nsfilter_params->nsfilter_config.num_pixels;
+              const int pos = wienerns_config2[k2][WIENERNS_BUF_POS];
+              const int r = wienerns_config2[k2][WIENERNS_ROW_ID];
+              const int c = wienerns_config2[k2][WIENERNS_COL_ID];
+
+              buf[pos] += clip_base(
+                  (int16_t)luma_hbd[(i + r) * rui->luma_stride + (j + c)] -
+                      (int16_t)luma_hbd[luma_id],
+                  bit_depth);
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+            }
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #else
             assert(0 && "Incorrect CONFIG_WIENER_NONSEP configuration");
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
@@ -3507,12 +3581,20 @@
   initialize_rui_for_nonsep_search(rsc, &rui);
   rui.restoration_type = RESTORE_WIENER_NONSEP;
   const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters(
-      rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y);
+      rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      ,
+      rsc->is_cross_filter_round
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  );
   assert(rsc->num_filter_classes == rsc->wienerns_bank.filter[0].num_classes);
 
   // Calculate and save this RU's stats.
   RstUnitStats unit_stats;
   unit_stats.real_sse = compute_stats_for_wienerns_filter(
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      rsc,
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
       rsc->dgd_buffer, rsc->src_buffer, limits, rsc->dgd_stride,
       rsc->src_stride, &rui, rsc->cm->seq_params.bit_depth, unit_stats.A,
       unit_stats.b, nsfilter_params, rsc->num_stat_classes);
@@ -3547,8 +3629,16 @@
   RestorationUnitInfo rui;
   initialize_rui_for_nonsep_search(rsc, &rui);
   rui.restoration_type = RESTORE_WIENER_NONSEP;
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  rui.cross_restoration_type = RESTORE_WIENER_NONSEP;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters(
-      rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y);
+      rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      ,
+      rsc->is_cross_filter_round
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  );
 
   const RstUnitStats *unit_stats = (const RstUnitStats *)aom_vector_const_get(
       rsc->wienerns_stats, rest_unit_idx_in_rutile);
@@ -3578,6 +3668,11 @@
   const int num_classes = rsc->num_filter_classes;
   assert(num_classes == rsc->wienerns_bank.filter[0].num_classes);
   if (num_classes > 1) {
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    if (rsc->is_cross_filter_round) {
+      rui.wienerns_cross_info = rui.wienerns_info;
+    }
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
     rui.wiener_class_id_restrict = -1;
     calc_finer_tile_search_error(rsc, limits, &rsc->tile_rect, &rui);
   }
@@ -3949,7 +4044,12 @@
   const MACROBLOCK *const x = rsc->x;
 #if CONFIG_WIENER_NONSEP
   const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters(
-      rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y);
+      rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+      ,
+      rsc->is_cross_filter_round
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  );
 #endif  // CONFIG_WIENER_NONSEP
   const int wiener_win =
       (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA;
@@ -4083,7 +4183,12 @@
   } else if (best_rtype == RESTORE_WIENER_NONSEP) {
 #if CONFIG_LR_MERGE_COEFFS
     const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters(
-        rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y);
+        rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+        ,
+        rsc->is_cross_filter_round
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    );
     int equal_ref_for_class[WIENERNS_MAX_CLASSES] = { 0 };
     for (int c_id = 0; c_id < rusi->wienerns_info.num_classes; ++c_id) {
       const int is_equal = check_wienerns_bank_eq(
@@ -4207,7 +4312,12 @@
     rui->wienerns_info = rusi->wienerns_info;
 #if CONFIG_LR_MERGE_COEFFS
     const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters(
-        rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y);
+        rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+        ,
+        rsc->is_cross_filter_round
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    );
     int equal_ref_for_class[WIENERNS_MAX_CLASSES] = { 0 };
     count_wienerns_bits_set(rsc->plane, mode_costs, &rui->wienerns_info,
                             &rsc->wienerns_bank, nsfilter_params,
@@ -4352,15 +4462,24 @@
   RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
   const RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
   const RestorationInfo *rsi = &rsc->cm->rst_info[rsc->plane];
-  copy_unit_info(rsi->frame_restoration_type, rusi,
-                 &rsi->unit_info[rest_unit_idx], rsc);
+
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  rsi->unit_info[rest_unit_idx].restoration_type = RESTORE_NONE;
+  rsi->unit_info[rest_unit_idx].cross_restoration_type = RESTORE_NONE;
+  if (rsi->frame_restoration_type != RESTORE_NONE)
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    copy_unit_info(rsi->frame_restoration_type, rusi,
+                   &rsi->unit_info[rest_unit_idx], rsc);
 }
 
 static void finalize_frame_and_unit_info(RestorationType frame_rtype,
                                          RestorationInfo *rsi,
                                          RestSearchCtxt *rsc) {
   rsi->frame_restoration_type = frame_rtype;
-  if (frame_rtype != RESTORE_NONE) {
+#if !CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  if (frame_rtype != RESTORE_NONE)
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  {
     process_by_rutile(rsc, copy_unit_info_visitor);
   }
 }
@@ -4370,6 +4489,31 @@
   return rsi->units_per_tile;
 }
 
+#if CONFIG_FLEXIBLE_RU_SIZE
+// Set the value of number of units, for a given unit size.
+void av1_reset_restoration_struct(AV1_COMMON *cm, RestorationInfo *rsi,
+                                  int is_uv) {
+  const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv);
+  const int max_tile_w = tile_rect.right - tile_rect.left;
+  const int max_tile_h = tile_rect.bottom - tile_rect.top;
+
+  // To calculate hpertile and vpertile (horizontal and vertical units per
+  // tile), we basically want to divide the largest tile width or height by the
+  // size of a restoration unit. Rather than rounding up unconditionally as you
+  // might expect, we round to nearest, which models the way a right or bottom
+  // restoration unit can extend to up to 150% its normal width or height. The
+  // max with 1 is to deal with tiles that are smaller than half of a
+  // restoration unit.
+  const int unit_size = rsi->restoration_unit_size;
+  const int hpertile = av1_lr_count_units_in_tile(unit_size, max_tile_w);
+  const int vpertile = av1_lr_count_units_in_tile(unit_size, max_tile_h);
+
+  rsi->units_per_tile = hpertile * vpertile;
+  rsi->horz_units_per_tile = hpertile;
+  rsi->vert_units_per_tile = vpertile;
+}
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
+
 void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
   AV1_COMMON *const cm = &cpi->common;
   MACROBLOCK *const x = &cpi->td.mb;
@@ -4425,11 +4569,20 @@
       dgd->buffers[AOM_PLANE_Y], dgd->crop_heights[AOM_PLANE_Y],
       dgd->crop_widths[AOM_PLANE_Y], dgd->strides[AOM_PLANE_Y], &luma,
       dgd->crop_heights[1], dgd->crop_widths[1], WIENERNS_UV_BRD,
-      rsc.luma_stride, cm->seq_params.bit_depth);
+      rsc.luma_stride, cm->seq_params.bit_depth
+#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2
+      ,
+      cm->seq_params.enable_cfl_ds_filter == 1
+#endif
+  );
   assert(luma_buf != NULL);
   rsc.luma = luma;
 #endif  // CONFIG_WIENER_NONSEP_CROSS_FILT
 #endif  // CONFIG_WIENER_NONSEP
+
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+  rsc.is_cross_filter_round = 0;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
   for (int plane = plane_start; plane <= plane_end; ++plane) {
     init_rsc(src, &cpi->common, x, &cpi->sf.lpf_sf, plane, rusi,
 #if CONFIG_LR_MERGE_COEFFS
@@ -4437,21 +4590,49 @@
 #endif  // CONFIG_LR_MERGE_COEFFS
              &cpi->trial_frame_rst, &rsc);
 
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+    cm->rst_info[plane].frame_restoration_type = RESTORE_NONE;
+    cm->rst_info[plane].frame_cross_restoration_type = RESTORE_NONE;
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+
     const int plane_ntiles = ntiles[plane > 0];
     const RestorationType num_rtypes =
         (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES;
 
+#if CONFIG_FLEXIBLE_RU_SIZE
+    double best_cost = DBL_MAX;
+#else
     double best_cost = 0;
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
     RestorationType best_rtype = RESTORE_NONE;
 
-    if (!cpi->sf.lpf_sf.disable_loop_restoration_chroma || !plane) {
-      av1_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height,
-                       rsc.dgd_stride, RESTORATION_BORDER, RESTORATION_BORDER);
+#if CONFIG_FLEXIBLE_RU_SIZE
+    RestorationInfo *rsi = &cm->rst_info[plane];
+    const int max_unit_size = rsi->max_restoration_unit_size;
+    const int min_unit_size = rsi->min_restoration_unit_size;
 
-      for (RestorationType r = 0; r < num_rtypes; ++r) {
+    int best_unit_size = min_unit_size;
+
+    for (int unit_size = min_unit_size; unit_size <= max_unit_size;
+         unit_size <<= 1) {
+      if (plane == 2 && unit_size != cm->rst_info[1].restoration_unit_size) {
+        continue;
+      }
+      aom_vector_clear(&wienerns_stats);
+
+      rsi->restoration_unit_size = unit_size;
+
+      av1_reset_restoration_struct(cm, rsi, plane > 0);
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
+      if (!cpi->sf.lpf_sf.disable_loop_restoration_chroma || !plane) {
+        av1_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height,
+                         rsc.dgd_stride, RESTORATION_BORDER,
+                         RESTORATION_BORDER);
+
+        for (RestorationType r = 0; r < num_rtypes; ++r) {
 #if CONFIG_LR_FLEX_SYNTAX
-        if (cpi->common.features.lr_tools_disable_mask[plane > 0] & (1 << r))
-          continue;
+          if (cpi->common.features.lr_tools_disable_mask[plane > 0] & (1 << r))
+            continue;
 #else
         const ToolCfg *const tool_cfg = &cpi->oxcf.tool_cfg;
         switch (r) {
@@ -4476,36 +4657,57 @@
 #endif  // CONFIG_LR_FLEX_SYNTAX
 
 #if CONFIG_PC_WIENER
-        if (plane != AOM_PLANE_Y && r == RESTORE_PC_WIENER) continue;
+          if (plane != AOM_PLANE_Y && r == RESTORE_PC_WIENER) continue;
 #endif  // CONFIG_PC_WIENER
 
-        gather_stats_rest_type(&rsc, r);
+          gather_stats_rest_type(&rsc, r);
 #if CONFIG_WIENER_NONSEP
-        if (r == RESTORE_WIENER_NONSEP) {
-          rsc.num_filter_classes = rsc.plane == AOM_PLANE_Y
-                                       ? NUM_WIENERNS_CLASS_INIT_LUMA
-                                       : NUM_WIENERNS_CLASS_INIT_CHROMA;
-        }
+          if (r == RESTORE_WIENER_NONSEP) {
+            rsc.num_filter_classes = rsc.plane == AOM_PLANE_Y
+                                         ? NUM_WIENERNS_CLASS_INIT_LUMA
+                                         : NUM_WIENERNS_CLASS_INIT_CHROMA;
+          }
 #endif  // CONFIG_WIENER_NONSEP
 
-        double cost = search_rest_type(&rsc, r);
+          double cost = search_rest_type(&rsc, r);
 
+#if CONFIG_FLEXIBLE_RU_SIZE
+          if (cost < best_cost) {
+            best_cost = cost;
+            best_rtype = r;
+            best_unit_size = unit_size;
+          }
+#else
         if (r == 0 || cost < best_cost) {
           best_cost = cost;
           best_rtype = r;
         }
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
+        }
+      }
+#if CONFIG_FLEXIBLE_RU_SIZE
+      if (rsi->restoration_unit_size == min_unit_size ||
+          best_unit_size == rsi->restoration_unit_size) {
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
+        finalize_frame_and_unit_info(best_rtype, &cm->rst_info[plane], &rsc);
+#if CONFIG_FLEXIBLE_RU_SIZE
       }
     }
-
-    finalize_frame_and_unit_info(best_rtype, &cm->rst_info[plane], &rsc);
-
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
 #if CONFIG_LR_FLEX_SYNTAX
     assert(IMPLIES(
         cm->features.lr_tools_count[plane] < 2,
         cm->rst_info[plane].frame_restoration_type != RESTORE_SWITCHABLE));
 #endif  // CONFIG_LR_FLEX_SYNTAX
+#if CONFIG_FLEXIBLE_RU_SIZE
+    rsi->restoration_unit_size = best_unit_size;
+    av1_reset_restoration_struct(cm, rsi, plane > 0);
+    int ru_num = rest_tiles_in_plane(cm, plane > 0);
+    adjust_frame_rtype(&cm->rst_info[plane], ru_num, &rsc, &cpi->oxcf.tool_cfg);
+#else
     adjust_frame_rtype(&cm->rst_info[plane], plane_ntiles, &rsc,
                        &cpi->oxcf.tool_cfg);
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
   }
 
 #if CONFIG_WIENER_NONSEP_CROSS_FILT
@@ -4521,3 +4723,230 @@
   aom_vector_destroy(&unit_indices);
 #endif  // CONFIG_LR_MERGE_COEFFS
 }
+
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+static AOM_INLINE void copy_unit_cross_filter_info(
+    RestorationType frame_cross_rtype, const RestUnitSearchInfo *rusi,
+    RestorationUnitInfo *rui, RestSearchCtxt *rsc) {
+#if CONFIG_LR_MERGE_COEFFS
+  const ModeCosts *mode_costs = &rsc->x->mode_costs;
+#else
+  (void)rsc;
+#endif  // CONFIG_LR_MERGE_COEFFS
+  assert(frame_cross_rtype > 0);
+  rui->cross_restoration_type = frame_cross_rtype == RESTORE_NONE
+                                    ? RESTORE_NONE
+                                    : rusi->best_rtype[frame_cross_rtype - 1];
+  if (rui->cross_restoration_type == RESTORE_WIENER_NONSEP) {
+    rui->wienerns_cross_info = rusi->wienerns_info;
+#if CONFIG_LR_MERGE_COEFFS
+    const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters(
+        rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y,
+        rsc->is_cross_filter_round);
+
+    int equal_ref_for_class[WIENERNS_MAX_CLASSES] = { 0 };
+    count_wienerns_bits_set(rsc->plane, mode_costs, &rui->wienerns_cross_info,
+                            &rsc->wienerns_bank, nsfilter_params,
+                            ALL_WIENERNS_CLASSES);
+    for (int c_id = 0; c_id < rui->wienerns_cross_info.num_classes; ++c_id) {
+      const int is_equal = check_wienerns_bank_eq(
+          &rsc->wienerns_bank, &rui->wienerns_cross_info,
+          nsfilter_params->ncoeffs, c_id, equal_ref_for_class);
+      if (is_equal == -1) {
+        av1_add_to_wienerns_bank(&rsc->wienerns_bank, &rui->wienerns_cross_info,
+                                 c_id);
+      }
+    }
+#endif  // CONFIG_LR_MERGE_COEFFS
+  } else if (rui->cross_restoration_type == RESTORE_NONE) {
+    // do nothing
+  } else {
+    assert(0);
+  }
+}
+
+// copy cross-component filter data from rusi to rsi for one RU
+static void copy_unit_cross_filter_info_visitor(
+    const RestorationTileLimits *limits, const AV1PixelRect *tile_rect,
+    int rest_unit_idx, int rest_unit_idx_seq, void *priv, int32_t *tmpbuf,
+    RestorationLineBuffers *rlbs) {
+  (void)limits;
+  (void)tile_rect;
+  (void)rest_unit_idx_seq;
+  (void)tmpbuf;
+  (void)rlbs;
+
+  RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
+  const RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
+  const RestorationInfo *rsi = &rsc->cm->rst_info[rsc->plane];
+
+  rsi->unit_info[rest_unit_idx].cross_restoration_type = RESTORE_NONE;
+  if (rsi->frame_cross_restoration_type != RESTORE_NONE)
+    copy_unit_cross_filter_info(rsi->frame_cross_restoration_type, rusi,
+                                &rsi->unit_info[rest_unit_idx], rsc);
+  rsi->unit_info[rest_unit_idx].wienerns_cross_info.is_cross_filter = 0;
+}
+
+// copy cross-component filter data from rusi to rsi for one frame
+static void finalize_frame_and_unit_cross_filter_info(
+    RestorationType frame_cross_rtype, RestorationInfo *rsi,
+    RestSearchCtxt *rsc) {
+  rsi->frame_cross_restoration_type = frame_cross_rtype;
+  process_by_rutile(rsc, copy_unit_cross_filter_info_visitor);
+}
+
+// RD process to find the best mode of cross-component wiener filter
+// for each RU within the current frame
+void av1_pick_cross_filter_restoration(const YV12_BUFFER_CONFIG *src,
+                                       AV1_COMP *cpi) {
+  AV1_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &cpi->td.mb;
+  const int num_planes = av1_num_planes(cm);
+  assert(!cm->features.all_lossless);
+  if (num_planes <= 1) return;
+
+  av1_fill_lr_rates(&x->mode_costs, x->e_mbd.tile_ctx);
+
+  int ntiles = rest_tiles_in_plane(cm, 1);
+
+  RestUnitSearchInfo *rusi =
+      (RestUnitSearchInfo *)aom_memalign(16, sizeof(*rusi) * ntiles);
+
+  // If the restoration unit dimensions are not multiples of
+  // rsi->restoration_unit_size then some elements of the rusi array may be
+  // left uninitialised when we reach copy_unit_info(...). This is not a
+  // problem, as these elements are ignored later, but in order to quiet
+  // Valgrind's warnings we initialise the array below.
+  memset(rusi, 0, sizeof(*rusi) * ntiles);
+  x->rdmult = cpi->rd.RDMULT;
+
+#if CONFIG_LR_MERGE_COEFFS
+  Vector unit_stack;
+  aom_vector_setup(&unit_stack,
+                   1,                                // resizable capacity
+                   sizeof(struct RstUnitSnapshot));  // element size
+  Vector unit_indices;
+  aom_vector_setup(&unit_indices,
+                   1,             // resizable capacity
+                   sizeof(int));  // element size
+#endif                            // CONFIG_LR_MERGE_COEFFS
+
+  RestSearchCtxt rsc;
+  const int plane_start = AOM_PLANE_U;
+  const int plane_end = AOM_PLANE_V;
+
+  Vector wienerns_stats;
+  aom_vector_setup(&wienerns_stats,
+                   1,                             // resizable capacity
+                   sizeof(struct RstUnitStats));  // element size
+  rsc.wienerns_stats = &wienerns_stats;
+
+  uint16_t *luma = NULL;
+  uint16_t *luma_buf;
+  const YV12_BUFFER_CONFIG *dgd = &cpi->common.cur_frame->buf;
+  rsc.luma_stride = dgd->crop_widths[1] + 2 * WIENERNS_UV_BRD;
+  luma_buf = wienerns_copy_luma_highbd(
+      dgd->buffers[AOM_PLANE_Y], dgd->crop_heights[AOM_PLANE_Y],
+      dgd->crop_widths[AOM_PLANE_Y], dgd->strides[AOM_PLANE_Y], &luma,
+      dgd->crop_heights[1], dgd->crop_widths[1], WIENERNS_UV_BRD,
+      rsc.luma_stride, cm->seq_params.bit_depth
+#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2
+      ,
+      cm->seq_params.enable_cfl_ds_filter == 1
+#endif
+  );
+  assert(luma_buf != NULL);
+  rsc.luma = luma;
+
+  rsc.is_cross_filter_round = 1;
+
+  for (int plane = plane_start; plane <= plane_end; ++plane) {
+    init_rsc(src, &cpi->common, x, &cpi->sf.lpf_sf, plane, rusi,
+#if CONFIG_LR_MERGE_COEFFS
+             &unit_stack, &unit_indices,
+#endif  // CONFIG_LR_MERGE_COEFFS
+             &cpi->trial_frame_rst, &rsc);
+
+    const int plane_ntiles = ntiles;
+    const RestorationType num_rtypes =
+        (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES;
+
+    double best_cost = DBL_MAX;
+    RestorationType best_cross_rtype = RESTORE_NONE;
+
+#if CONFIG_FLEXIBLE_RU_SIZE
+    RestorationInfo *rsi = &cm->rst_info[plane];
+    int min_unit_size = rsi->restoration_unit_size;
+    int max_unit_size = rsi->restoration_unit_size;
+    int best_unit_size =
+        min_unit_size;  // the best unit_size has been determined at the RD of
+                        // restoring filter, to be optimized.
+    for (int unit_size = min_unit_size; unit_size <= max_unit_size;
+         unit_size <<= 1) {
+      assert(rsi->restoration_unit_size == unit_size);
+      assert(cm->rst_info[1].restoration_unit_size ==
+             cm->rst_info[2].restoration_unit_size);
+
+      aom_vector_clear(&wienerns_stats);
+
+      av1_reset_restoration_struct(cm, rsi, plane > 0);
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
+      if (!cpi->sf.lpf_sf.disable_loop_restoration_chroma || !plane) {
+        av1_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height,
+                         rsc.dgd_stride, RESTORATION_BORDER,
+                         RESTORATION_BORDER);
+
+        for (RestorationType r = 0; r < num_rtypes; ++r) {
+          //??????????? to be updated with tool on/off setting
+          if (r != RESTORE_NONE && r != RESTORE_WIENER_NONSEP) {
+            // to be updated with flexible tool on/off setting.
+            continue;
+          };
+
+          gather_stats_rest_type(&rsc, r);
+
+          //        if (r == RESTORE_WIENER_NONSEP) {
+          rsc.num_filter_classes = rsc.plane == AOM_PLANE_Y
+                                       ? NUM_WIENERNS_CLASS_INIT_LUMA
+                                       : NUM_WIENERNS_CLASS_INIT_CHROMA;
+          //        }
+
+          double cost = search_rest_type(&rsc, r);
+
+#if CONFIG_FLEXIBLE_RU_SIZE
+          if (cost < best_cost) {
+            best_cost = cost;
+            best_cross_rtype = r;
+            best_unit_size = unit_size;
+          }
+#else
+        if (r == 0 || cost < best_cost) {
+          best_cost = cost;
+          best_cross_rtype = r;
+        }
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
+        }
+      }
+#if CONFIG_FLEXIBLE_RU_SIZE
+      if (rsi->restoration_unit_size == min_unit_size ||
+          best_unit_size == rsi->restoration_unit_size) {
+        assert(rsi->restoration_unit_size == min_unit_size);
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
+        cm->rst_info[plane].frame_cross_restoration_type = best_cross_rtype;
+        finalize_frame_and_unit_cross_filter_info(best_cross_rtype,
+                                                  &cm->rst_info[plane], &rsc);
+#if CONFIG_FLEXIBLE_RU_SIZE
+      }
+    }
+#endif  // CONFIG_FLEXIBLE_RU_SIZE
+  }
+  free(luma_buf);
+  aom_free(rusi);
+  aom_vector_destroy(&wienerns_stats);
+
+#if CONFIG_LR_MERGE_COEFFS
+  aom_vector_destroy(&unit_stack);
+  aom_vector_destroy(&unit_indices);
+#endif  // CONFIG_LR_MERGE_COEFFS
+}
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
diff --git a/av1/encoder/pickrst.h b/av1/encoder/pickrst.h
index 8fa2461..d536deb 100644
--- a/av1/encoder/pickrst.h
+++ b/av1/encoder/pickrst.h
@@ -191,7 +191,10 @@
  *
  */
 void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi);
-
+#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
+void av1_pick_cross_filter_restoration(const YV12_BUFFER_CONFIG *sd,
+                                       AV1_COMP *cpi);
+#endif  // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c
index 4a70347..23eb416 100644
--- a/av1/encoder/rd.c
+++ b/av1/encoder/rd.c
@@ -64,12 +64,12 @@
 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
                                             [EXT_TX_SIZES] = {
                                               { 1, 1, 1, 1 },  // unused
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
                                               { 1, 1, 1, 0 },
 #else
                                               { 1, 1, 0, 0 },
                                               { 0, 0, 1, 0 },
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
                                             };
 
 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
@@ -85,12 +85,12 @@
   {
       // Intra
       EXT_TX_SET_DCTONLY,
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
       EXT_NEW_TX_SET,
 #else
       EXT_TX_SET_DTT4_IDTX_1DDCT,
       EXT_TX_SET_DTT4_IDTX,
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
   },
   {
       // Inter
@@ -133,6 +133,17 @@
         av1_cost_tokens_from_cdf(
             mode_costs->do_ext_partition_cost[plane_index][rect_type][i],
             fc->do_ext_partition_cdf[plane_index][rect_type][i], NULL);
+#if CONFIG_UNEVEN_4WAY
+        av1_cost_tokens_from_cdf(
+            mode_costs
+                ->do_uneven_4way_partition_cost[plane_index][rect_type][i],
+            fc->do_uneven_4way_partition_cdf[plane_index][rect_type][i], NULL);
+        av1_cost_tokens_from_cdf(
+            mode_costs
+                ->uneven_4way_partition_type_cost[plane_index][rect_type][i],
+            fc->uneven_4way_partition_type_cdf[plane_index][rect_type][i],
+            NULL);
+#endif  // CONFIG_UNEVEN_4WAY
       }
     }
   }
@@ -164,15 +175,36 @@
             mode_costs->partition_cost[plane_index][ctx][part] +=
                 mode_costs->rect_type_cost[plane_index][ctx][rect_type];
           }
-          const bool disable_ext_part = !cm->seq_params.enable_ext_partitions;
           const bool ext_partition_allowed =
-              !disable_ext_part &&
+              cm->seq_params.enable_ext_partitions &&
               is_ext_partition_allowed(bsize, rect_type, tree_type);
           if (ext_partition_allowed) {
             const bool do_ext_partition = (part >= PARTITION_HORZ_3);
             mode_costs->partition_cost[plane_index][ctx][part] +=
                 mode_costs->do_ext_partition_cost[plane_index][rect_type][ctx]
                                                  [do_ext_partition];
+#if CONFIG_UNEVEN_4WAY
+            if (do_ext_partition) {
+              const bool uneven_4way_partition_allowed =
+                  is_uneven_4way_partition_allowed(bsize, rect_type, tree_type);
+              if (uneven_4way_partition_allowed) {
+                const bool do_uneven_4way_partition =
+                    (part >= PARTITION_HORZ_4A);
+                mode_costs->partition_cost[plane_index][ctx][part] +=
+                    mode_costs->do_uneven_4way_partition_cost
+                        [plane_index][rect_type][ctx][do_uneven_4way_partition];
+                if (do_uneven_4way_partition) {
+                  const UNEVEN_4WAY_PART_TYPE uneven_4way_type =
+                      (part == PARTITION_HORZ_4A || part == PARTITION_VERT_4A)
+                          ? UNEVEN_4A
+                          : UNEVEN_4B;
+                  mode_costs->partition_cost[plane_index][ctx][part] +=
+                      mode_costs->uneven_4way_partition_type_cost
+                          [plane_index][rect_type][ctx][uneven_4way_type];
+                }
+              }
+            }
+#endif  // CONFIG_UNEVEN_4WAY
           }
         }
       }
@@ -200,7 +232,14 @@
                              fc->skip_txfm_cdfs[i], NULL);
   }
 
+#if CONFIG_EXT_DIR
+  for (i = 0; i < MRL_INDEX_CONTEXTS; ++i) {
+    av1_cost_tokens_from_cdf(mode_costs->mrl_index_cost[i],
+                             fc->mrl_index_cdf[i], NULL);
+  }
+#else
   av1_cost_tokens_from_cdf(mode_costs->mrl_index_cost, fc->mrl_index_cdf, NULL);
+#endif  // CONFIG_EXT_DIR
 
   for (i = 0; i < FSC_MODE_CONTEXTS; ++i) {
     for (j = 0; j < FSC_BSIZE_CONTEXTS; ++j) {
@@ -278,7 +317,7 @@
     }
   }
 
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   for (i = 0; i < PALETTE_ROW_FLAG_CONTEXTS; ++i) {
     av1_cost_tokens_from_cdf(mode_costs->palette_y_row_flag_cost[i],
                              fc->identity_row_cdf_y[i], NULL);
@@ -344,6 +383,24 @@
 
   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
     int s;
+#if CONFIG_ATC_DCTX_ALIGNED
+    int k;
+    for (k = 0; k < EOB_TX_CTXS; ++k) {
+      for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
+#if CONFIG_ATC_REDUCED_TXSET
+        if (cm->features.reduced_tx_set_used ||
+            use_inter_ext_tx_for_txsize[s][i]) {
+#else
+        if (use_inter_ext_tx_for_txsize[s][i]) {
+#endif  // CONFIG_ATC_REDUCED_TXSET
+          av1_cost_tokens_from_cdf(
+              mode_costs->inter_tx_type_costs[s][k][i],
+              fc->inter_ext_tx_cdf[s][k][i],
+              av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
+        }
+      }
+    }
+#else
     for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
 #if CONFIG_ATC_REDUCED_TXSET
       if (cm->features.reduced_tx_set_used ||
@@ -356,17 +413,18 @@
             av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
       }
     }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
       int tx_set_type = av1_ext_tx_set_idx_to_type[0][s];
 #if CONFIG_ATC_REDUCED_TXSET
       const int cdf_offset = cm->features.reduced_tx_set_used ? 1 : 0;
 #endif  // CONFIG_ATC_REDUCED_TXSET
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
       if (use_intra_ext_tx_for_txsize[s][i]) {
         for (j = 0; j < INTRA_MODES; ++j) {
           av1_cost_tokens_from_cdf(
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
               mode_costs->intra_tx_type_costs[s][i][j],
 #if CONFIG_ATC_REDUCED_TXSET
               fc->intra_ext_tx_cdf[s + cdf_offset][i][j],
@@ -380,7 +438,7 @@
               mode_costs->intra_tx_type_costs[s][i][j],
               fc->intra_ext_tx_cdf[s][i][j],
               av1_ext_tx_inv_intra[av1_ext_tx_set_idx_to_type[0][s]]);
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
         }
       }
     }
@@ -402,14 +460,14 @@
 #else
   av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
 #endif  // CONFIG_NEW_CONTEXT_MODELING
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   av1_cost_tokens_from_cdf(mode_costs->intrabc_mode_cost, fc->intrabc_mode_cdf,
                            NULL);
   for (i = 0; i < MAX_REF_BV_STACK_SIZE - 1; ++i) {
     av1_cost_tokens_from_cdf(mode_costs->intrabc_drl_idx_cost[i],
                              fc->intrabc_drl_idx_cdf[i], NULL);
   }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
   for (i = 0; i < TX_SIZES; ++i) {
     av1_cost_tokens_from_cdf(mode_costs->stx_flag_cost[i], fc->stx_cdf[i],
@@ -467,7 +525,7 @@
       }
     }
 
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
     for (j = 0; j < INTRA_INTER_SKIP_TXFM_CONTEXTS; ++j) {
       for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
         av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[j][i],
@@ -479,7 +537,7 @@
       av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
                                fc->intra_inter_cdf[i], NULL);
     }
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
 
     for (i = 0; i < INTER_SINGLE_MODE_CONTEXTS; ++i) {
       av1_cost_tokens_from_cdf(mode_costs->inter_single_mode_cost[i],
@@ -502,12 +560,12 @@
                                fc->drl_cdf[2][i], NULL);
     }
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     for (i = 0; i < 3; ++i) {
       av1_cost_tokens_from_cdf(mode_costs->skip_drl_mode_cost[i],
                                fc->skip_drl_cdf[i], NULL);
     }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
 #if CONFIG_OPTFLOW_REFINEMENT
     for (i = 0; i < INTER_COMPOUND_MODE_CONTEXTS; ++i)
@@ -570,6 +628,14 @@
       av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
                                fc->wedge_interintra_cdf[i], NULL);
     }
+
+#if CONFIG_REFINEMV
+    for (i = 0; i < NUM_REFINEMV_CTX; ++i) {
+      av1_cost_tokens_from_cdf(mode_costs->refinemv_flag_cost[i],
+                               fc->refinemv_flag_cdf[i], NULL);
+    }
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_EXTENDED_WARP_PREDICTION
     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
       av1_cost_tokens_from_cdf(mode_costs->obmc_cost[i], fc->obmc_cdf[i], NULL);
@@ -584,6 +650,12 @@
                                fc->warped_causal_warpmv_cdf[i], NULL);
     }
 #endif  // CONFIG_WARPMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+    for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
+      av1_cost_tokens_from_cdf(mode_costs->warpmv_with_mvd_flag_cost[i],
+                               fc->warpmv_with_mvd_flag_cdf[i], NULL);
+    }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 
 #if CONFIG_WARP_REF_LIST
     for (i = 0; i < 3; i++) {
@@ -625,6 +697,14 @@
       av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
                                fc->comp_group_idx_cdf[i], NULL);
     }
+#if CONFIG_CWP
+    for (j = 0; j < MAX_CWP_CONTEXTS; j++) {
+      for (i = 0; i < MAX_CWP_NUM - 1; ++i) {
+        av1_cost_tokens_from_cdf(mode_costs->cwp_idx_cost[j][i],
+                                 fc->cwp_idx_cdf[j][i], NULL);
+      }
+    }
+#endif  // CONFIG_CWP
   }
 }
 
@@ -868,6 +948,20 @@
     for (int plane = 0; plane < nplanes; ++plane) {
       LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
 
+#if CONFIG_ATC_DCTX_ALIGNED
+      aom_cdf_prob *pcdf;
+      switch (eob_multi_size) {
+        case 0: pcdf = fc->eob_flag_cdf16[plane]; break;
+        case 1: pcdf = fc->eob_flag_cdf32[plane]; break;
+        case 2: pcdf = fc->eob_flag_cdf64[plane]; break;
+        case 3: pcdf = fc->eob_flag_cdf128[plane]; break;
+        case 4: pcdf = fc->eob_flag_cdf256[plane]; break;
+        case 5: pcdf = fc->eob_flag_cdf512[plane]; break;
+        case 6: pcdf = fc->eob_flag_cdf1024[plane]; break;
+        default: assert(0 && "Invalid eob_multi_size");
+      }
+      av1_cost_tokens_from_cdf(pcost->eob_cost, pcdf, NULL);
+#else
       for (int ctx = 0; ctx < 2; ++ctx) {
         aom_cdf_prob *pcdf;
         switch (eob_multi_size) {
@@ -882,6 +976,7 @@
         }
         av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
       }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     }
   }
   for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
@@ -900,7 +995,7 @@
         av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
                                  fc->coeff_base_eob_cdf[tx_size][plane][ctx],
                                  NULL);
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
         av1_cost_tokens_from_cdf(pcost->base_lf_eob_cost[ctx],
                                  fc->coeff_base_lf_eob_cdf[tx_size][plane][ctx],
@@ -929,7 +1024,12 @@
         pcost->base_cost[ctx][7] =
             pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
+#if CONFIG_ATC_DCTX_ALIGNED
+      for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_BOB; ++ctx)
+        av1_cost_tokens_from_cdf(pcost->base_bob_cost[ctx],
+                                 fc->coeff_base_bob_cdf[ctx], NULL);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
       for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
         av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
                                  fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
@@ -949,7 +1049,7 @@
       }
 #endif  // CONFIG_CONTEXT_DERIVATION
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
       for (int ctx = 0; ctx < LF_LEVEL_CONTEXTS; ++ctx) {
         int br_lf_rate[BR_CDF_SIZE];
         int prev_cost_lf = 0;
@@ -972,17 +1072,17 @@
               pcost->lps_lf_cost[ctx][i] - pcost->lps_lf_cost[ctx][i - 1];
         }
       }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
         int br_rate[BR_CDF_SIZE];
         int prev_cost = 0;
         int i, j;
         av1_cost_tokens_from_cdf(
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
             br_rate, fc->coeff_br_cdf[plane][ctx],
 #else
             br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
             NULL);
         // printf("br_rate: ");
         // for(j = 0; j < BR_CDF_SIZE; j++)
@@ -1099,7 +1199,7 @@
 #endif
   );
 
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   // Copy the pointer of the dv cost to the mvcost
   mv_costs->dv_joint_cost = &dv_costs->joint_mv[0];
   mv_costs->dv_nmv_cost[0] = dv_costs->dv_costs[0];
@@ -1108,7 +1208,7 @@
   (void)mv_costs;
 #endif
 }
-#elif CONFIG_BVCOST_UPDATE
+#elif CONFIG_IBC_BV_IMPROVEMENT
 void av1_fill_dv_costs(const FRAME_CONTEXT *fc, IntraBCMVCosts *dv_costs) {
   int *dvcost[2] = { &dv_costs->mv_component[0][MV_MAX],
                      &dv_costs->mv_component[1][MV_MAX] };
@@ -1223,14 +1323,14 @@
 #endif
 
   if (cm->features.allow_screen_content_tools &&
-#if !CONFIG_BVCOST_UPDATE
+#if !CONFIG_IBC_BV_IMPROVEMENT
       frame_is_intra_only(cm) &&
-#endif  // !CONFIG_BVCOST_UPDATE
+#endif  // !CONFIG_IBC_BV_IMPROVEMENT
       !is_stat_generation_stage(cpi)) {
 #if CONFIG_FLEX_MVRES
     fill_dv_costs(&x->dv_costs, cm->fc, mv_costs);
 #else
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
     IntraBCMVCosts *const dv_costs = &x->dv_costs;
 #else
     IntraBCMVCosts *const dv_costs = &cpi->dv_costs;
@@ -1244,16 +1344,6 @@
                              dvcost, &cm->fc->ndvc, MV_SUBPEL_NONE);
 #endif
   }
-
-  if (!is_stat_generation_stage(cpi)) {
-    for (int i = 0; i < TRANS_TYPES; ++i)
-      // IDENTITY: 1 bit
-      // TRANSLATION: 3 bits
-      // ROTZOOM: 2 bits
-      // AFFINE: 3 bits
-      cpi->gm_info.type_cost[i] = (1 + (i > 0 ? (i == ROTZOOM ? 1 : 2) : 0))
-                                  << AV1_PROB_COST_SHIFT;
-  }
 }
 
 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
@@ -1635,10 +1725,19 @@
   }
 #endif  // CONFIG_TIP
   const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
+
+#if CONFIG_SEP_COMP_DRL
+  const MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
+  const int_mv ref_mv =
+      av1_get_ref_mv_from_stack(0, ref_frames, 0, x->mbmi_ext, mbmi);
+  const int_mv ref_mv1 =
+      av1_get_ref_mv_from_stack(0, ref_frames, 1, x->mbmi_ext, mbmi);
+#else
   const int_mv ref_mv =
       av1_get_ref_mv_from_stack(0, ref_frames, 0, x->mbmi_ext);
   const int_mv ref_mv1 =
       av1_get_ref_mv_from_stack(0, ref_frames, 1, x->mbmi_ext);
+#endif  // CONFIG_SEP_COMP_DRL
   MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
   int num_mv_refs = 0;
   pred_mv[num_mv_refs++] = ref_mv.as_mv;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h
index 3620853..be21280 100644
--- a/av1/encoder/rd.h
+++ b/av1/encoder/rd.h
@@ -81,7 +81,7 @@
   double r0;
 } RD_OPT;
 
-#if !CONFIG_FLEX_MVRES && !CONFIG_BVCOST_UPDATE
+#if !CONFIG_FLEX_MVRES && !CONFIG_IBC_BV_IMPROVEMENT
 typedef struct {
   // Cost of transmitting the actual motion vector.
   // mv_component[0][i] is the cost of motion vector with horizontal component
@@ -154,6 +154,9 @@
 static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
                                       const RD_STATS *rd_stats_src) {
   assert(rd_stats_dst->rate != INT_MAX && rd_stats_src->rate != INT_MAX);
+#if CONFIG_ATC_DCTX_ALIGNED
+  if (rd_stats_src->dist == INT64_MAX || rd_stats_src->rate == INT_MAX) return;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   rd_stats_dst->rate = (int)AOMMIN(
       ((int64_t)rd_stats_dst->rate + (int64_t)rd_stats_src->rate), INT_MAX);
   if (!rd_stats_dst->zero_rate)
@@ -375,7 +378,7 @@
 #if CONFIG_FLEX_MVRES
 void fill_dv_costs(IntraBCMvCosts *dv_costs, const FRAME_CONTEXT *fc,
                    MvCosts *mv_costs);
-#elif CONFIG_BVCOST_UPDATE
+#elif CONFIG_IBC_BV_IMPROVEMENT
 void av1_fill_dv_costs(const FRAME_CONTEXT *fc, IntraBCMVCosts *dv_costs);
 #endif
 
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c
index 88c474d..9c33f8b 100644
--- a/av1/encoder/rdopt.c
+++ b/av1/encoder/rdopt.c
@@ -584,7 +584,7 @@
              REF_FRAMES * sizeof((*ref_costs_comp)[0]));
   } else {
     int intra_inter_ctx = av1_get_intra_inter_context(xd);
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
     const int skip_txfm = xd->mi[0]->skip_txfm[xd->tree_type == CHROMA_PART];
     ref_costs_single[INTRA_FRAME_INDEX] =
         mode_costs->intra_inter_cost[skip_txfm][intra_inter_ctx][0];
@@ -594,7 +594,7 @@
     ref_costs_single[INTRA_FRAME_INDEX] =
         mode_costs->intra_inter_cost[intra_inter_ctx][0];
     unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
 
 #if CONFIG_TIP
     if (cm->features.tip_frame_mode) {
@@ -786,9 +786,12 @@
   if (xd->tree_type != CHROMA_PART)
     av1_copy_mbmi_ext_to_mbmi_ext_frame(
         &ctx->mbmi_ext_best, x->mbmi_ext,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SEP_COMP_DRL
+        xd->mi[0],
+#endif  // CONFIG_SEP_COMP_DRL
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         xd->mi[0]->skip_mode,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
         av1_ref_frame_type(xd->mi[0]->ref_frame));
   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
   ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
@@ -826,9 +829,9 @@
     av1_setup_pred_block(xd, yv12_mb[ref_frame_idx], yv12, sf, sf, num_planes);
   }
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   if (mbmi->skip_mode) return;
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
   // Gets an initial list of candidate vectors from neighbours and orders them
   av1_find_mv_refs(
@@ -1006,7 +1009,10 @@
   const PREDICTION_MODE this_mode = mbmi->mode;
   const MV_REFERENCE_FRAME refs[2] = { COMPACT_INDEX0_NRS(mbmi->ref_frame[0]),
                                        COMPACT_INDEX1_NRS(mbmi->ref_frame[1]) };
+
+#if !CONFIG_SEP_COMP_DRL
   const int ref_mv_idx = mbmi->ref_mv_idx;
+#endif  // !CONFIG_SEP_COMP_DRL
 #if CONFIG_FLEX_MVRES
   const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision;
 #endif
@@ -1017,7 +1023,12 @@
     int valid_precision_mv0 = NUM_MV_PRECISIONS;
     for (int prev_mv_precision = pb_mv_precision;
          prev_mv_precision <= mbmi->max_mv_precision; prev_mv_precision++) {
+#if CONFIG_SEP_COMP_DRL
+      if (args->single_newmv_valid[prev_mv_precision][get_ref_mv_idx(mbmi, 0)]
+                                  [refs[0]]) {
+#else
       if (args->single_newmv_valid[prev_mv_precision][ref_mv_idx][refs[0]]) {
+#endif  // CONFIG_SEP_COMP_DRL
         valid_mv0_found = 1;
         valid_precision_mv0 = prev_mv_precision;
         break;
@@ -1028,7 +1039,12 @@
     int valid_precision_mv1 = NUM_MV_PRECISIONS;
     for (int prev_mv_precision = pb_mv_precision;
          prev_mv_precision <= mbmi->max_mv_precision; prev_mv_precision++) {
+#if CONFIG_SEP_COMP_DRL
+      if (args->single_newmv_valid[prev_mv_precision][get_ref_mv_idx(mbmi, 1)]
+                                  [refs[1]]) {
+#else
       if (args->single_newmv_valid[prev_mv_precision][ref_mv_idx][refs[1]]) {
+#endif  // CONFIG_SEP_COMP_DRL
         valid_mv1_found = 1;
         valid_precision_mv1 = prev_mv_precision;
         break;
@@ -1043,64 +1059,37 @@
 
 #if CONFIG_OPTFLOW_REFINEMENT
     if (this_mode == NEW_NEWMV || this_mode == NEW_NEWMV_OPTFLOW) {
+#if CONFIG_SKIP_ME_FOR_OPFL_MODES
+      if (this_mode == NEW_NEWMV_OPTFLOW &&
+          args->comp_newmv_valid[av1_ref_frame_type(mbmi->ref_frame)]
+#if CONFIG_SEP_COMP_DRL
+                                [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)]
 #else
-    if (this_mode == NEW_NEWMV) {
-#endif  // CONFIG_OPTFLOW_REFINEMENT
-      if (valid_mv0) {
-#if CONFIG_FLEX_MVRES
+                                [mbmi->ref_mv_idx]
+#endif
+                                [pb_mv_precision]) {
         cur_mv[0].as_int =
-            args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int;
+            args->comp_newmv[av1_ref_frame_type(mbmi->ref_frame)]
+#if CONFIG_SEP_COMP_DRL
+                            [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)]
 #else
-        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
+                            [mbmi->ref_mv_idx]
 #endif
-
-#if CONFIG_FLEX_MVRES
-        lower_mv_precision(&cur_mv[0].as_mv, pb_mv_precision);
-#endif
-        clamp_mv_in_range(x, &cur_mv[0], 0
-#if CONFIG_FLEX_MVRES
-                          ,
-                          pb_mv_precision
-#endif
-
-        );
-      }
-      if (valid_mv1) {
-#if CONFIG_FLEX_MVRES
+                            [pb_mv_precision][0]
+                                .as_int;
         cur_mv[1].as_int =
-            args->single_newmv[valid_precision_mv1][ref_mv_idx][refs[1]].as_int;
+            args->comp_newmv[av1_ref_frame_type(mbmi->ref_frame)]
+#if CONFIG_SEP_COMP_DRL
+                            [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)]
 #else
-        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
+                            [mbmi->ref_mv_idx]
 #endif
-#if CONFIG_FLEX_MVRES
-        lower_mv_precision(&cur_mv[1].as_mv, pb_mv_precision);
-#endif
-        clamp_mv_in_range(x, &cur_mv[1], 1
-#if CONFIG_FLEX_MVRES
-                          ,
-                          pb_mv_precision
-#endif
-        );
-      }
+                            [pb_mv_precision][1]
+                                .as_int;
 
-      // aomenc1
-      if (cpi->sf.inter_sf.comp_inter_joint_search_thresh <= bsize ||
-          !valid_mv0 || !valid_mv1) {
-        // uint8_t mask_value = 32;
-        av1_joint_motion_search(cpi, x, bsize, cur_mv, NULL, 0, rate_mv);
-      } else {
         *rate_mv = 0;
         for (int i = 0; i < 2; ++i) {
           const int_mv ref_mv = av1_get_ref_mv(x, i);
-#if CONFIG_C071_SUBBLK_WARPMV
-          update_mv_precision(ref_mv.as_mv,
-#if CONFIG_FLEX_MVRES
-                              pb_mv_precision,
-#else
-                              cm->features.allow_high_precision_mv,
-#endif
-                              &cur_mv[i].as_mv);
-#endif  // CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_FLEX_MVRES
           *rate_mv +=
               av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv, pb_mv_precision,
@@ -1116,7 +1105,122 @@
               x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
 #endif
         }
+      } else {
+#endif  // CONFIG_SKIP_ME_FOR_OPFL_MODES
+#else
+    if (this_mode == NEW_NEWMV) {
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+        if (valid_mv0) {
+#if CONFIG_FLEX_MVRES
+          cur_mv[0].as_int =
+#if CONFIG_SEP_COMP_DRL
+              args->single_newmv[valid_precision_mv0][get_ref_mv_idx(mbmi, 0)]
+                                [refs[0]]
+                                    .as_int;
+#else
+            args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int;
+#endif  // CONFIG_SEP_COMP_DRL
+#else
+        cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
+#endif
+
+#if CONFIG_FLEX_MVRES
+          lower_mv_precision(&cur_mv[0].as_mv, pb_mv_precision);
+#endif
+          clamp_mv_in_range(x, &cur_mv[0], 0
+#if CONFIG_FLEX_MVRES
+                            ,
+                            pb_mv_precision
+#endif
+
+          );
+        }
+        if (valid_mv1) {
+#if CONFIG_FLEX_MVRES
+          cur_mv[1].as_int =
+#if CONFIG_SEP_COMP_DRL
+              args->single_newmv[valid_precision_mv1][get_ref_mv_idx(mbmi, 1)]
+                                [refs[1]]
+                                    .as_int;
+#else
+            args->single_newmv[valid_precision_mv1][ref_mv_idx][refs[1]].as_int;
+#endif  // CONFIG_SEP_COMP_DRL
+#else
+        cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
+#endif
+#if CONFIG_FLEX_MVRES
+          lower_mv_precision(&cur_mv[1].as_mv, pb_mv_precision);
+#endif
+          clamp_mv_in_range(x, &cur_mv[1], 1
+#if CONFIG_FLEX_MVRES
+                            ,
+                            pb_mv_precision
+#endif
+          );
+        }
+
+        // aomenc1
+        if (cpi->sf.inter_sf.comp_inter_joint_search_thresh <= bsize ||
+            !valid_mv0 || !valid_mv1) {
+          // uint8_t mask_value = 32;
+          av1_joint_motion_search(cpi, x, bsize, cur_mv, NULL, 0, rate_mv);
+        } else {
+          *rate_mv = 0;
+          for (int i = 0; i < 2; ++i) {
+            const int_mv ref_mv = av1_get_ref_mv(x, i);
+#if CONFIG_C071_SUBBLK_WARPMV
+            update_mv_precision(ref_mv.as_mv,
+#if CONFIG_FLEX_MVRES
+                                pb_mv_precision,
+#else
+                              cm->features.allow_high_precision_mv,
+#endif
+                                &cur_mv[i].as_mv);
+#endif  // CONFIG_C071_SUBBLK_WARPMV
+#if CONFIG_FLEX_MVRES
+            *rate_mv +=
+                av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
+                                pb_mv_precision, &x->mv_costs, MV_COST_WEIGHT
+#if CONFIG_ADAPTIVE_MVD
+                                ,
+                                0
+#endif
+                );
+#else
+          *rate_mv += av1_mv_bit_cost(
+              &cur_mv[i].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost,
+              x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+#endif
+          }
+        }
+#if CONFIG_SKIP_ME_FOR_OPFL_MODES
+        if (this_mode == NEW_NEWMV) {
+          args->comp_newmv_valid[av1_ref_frame_type(mbmi->ref_frame)]
+#if CONFIG_SEP_COMP_DRL
+                                [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)]
+#else
+                              [mbmi->ref_mv_idx]
+#endif
+                                [pb_mv_precision] = 1;
+          args->comp_newmv[av1_ref_frame_type(mbmi->ref_frame)]
+#if CONFIG_SEP_COMP_DRL
+                          [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)]
+#else
+                        [mbmi->ref_mv_idx]
+#endif
+                          [pb_mv_precision][0]
+                              .as_int = cur_mv[0].as_int;
+          args->comp_newmv[av1_ref_frame_type(mbmi->ref_frame)]
+#if CONFIG_SEP_COMP_DRL
+                          [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)]
+#else
+                        [mbmi->ref_mv_idx]
+#endif
+                          [pb_mv_precision][1]
+                              .as_int = cur_mv[1].as_int;
+        }
       }
+#endif  // CONFIG_SKIP_ME_FOR_OPFL_MODES
 #if CONFIG_OPTFLOW_REFINEMENT
     } else if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEWMV_OPTFLOW) {
 #else
@@ -1125,7 +1229,13 @@
       if (valid_mv1) {
 #if CONFIG_FLEX_MVRES
         cur_mv[1].as_int =
-            args->single_newmv[valid_precision_mv1][ref_mv_idx][refs[1]].as_int;
+#if CONFIG_SEP_COMP_DRL
+            args->single_newmv[valid_precision_mv1][get_ref_mv_idx(mbmi, 1)]
+                              [refs[1]]
+                                  .as_int;
+#else
+          args->single_newmv[valid_precision_mv1][ref_mv_idx][refs[1]].as_int;
+#endif  // CONFIG_SEP_COMP_DRL
 #else
         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
 #endif
@@ -1161,7 +1271,7 @@
 #if CONFIG_FLEX_MVRES
                               pb_mv_precision,
 #else
-                              cm->features.allow_high_precision_mv,
+                            cm->features.allow_high_precision_mv,
 #endif
                               &cur_mv[1].as_mv);
 #endif  // CONFIG_C071_SUBBLK_WARPMV
@@ -1201,8 +1311,8 @@
 #if IMPROVED_AMVD
       if (first_ref_dist != sec_ref_dist) return INT64_MAX;
 #else
-      if (first_ref_dist > 2 * sec_ref_dist) return INT64_MAX;
-      if (sec_ref_dist > 2 * first_ref_dist) return INT64_MAX;
+    if (first_ref_dist > 2 * sec_ref_dist) return INT64_MAX;
+    if (sec_ref_dist > 2 * first_ref_dist) return INT64_MAX;
 #endif  // IMPROVED_AMVD
 
       const int jmvd_base_ref_list = get_joint_mvd_base_ref_list(cm, mbmi);
@@ -1217,10 +1327,15 @@
 #if CONFIG_FLEX_MVRES
             args->single_newmv[jmvd_base_ref_list == 0 ? valid_precision_mv0
                                                        : valid_precision_mv1]
-                              [ref_mv_idx][refs[jmvd_base_ref_list]]
+#if CONFIG_SEP_COMP_DRL
+                              [get_ref_mv_idx(mbmi, 1)]
+#else
+                              [ref_mv_idx]
+#endif  // CONFIG_SEP_COMP_DRL
+                              [refs[jmvd_base_ref_list]]
                                   .as_int;
 #else
-            args->single_newmv[ref_mv_idx][refs[jmvd_base_ref_list]].as_int;
+          args->single_newmv[ref_mv_idx][refs[jmvd_base_ref_list]].as_int;
 #endif
 
 #if CONFIG_FLEX_MVRES
@@ -1247,7 +1362,13 @@
       if (valid_mv0) {
 #if CONFIG_FLEX_MVRES
         cur_mv[0].as_int =
-            args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int;
+#if CONFIG_SEP_COMP_DRL
+            args->single_newmv[valid_precision_mv0][get_ref_mv_idx(mbmi, 0)]
+                              [refs[0]]
+                                  .as_int;
+#else
+          args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int;
+#endif  // CONFIG_SEP_COMP_DRL
 #else
         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
 #endif
@@ -1285,7 +1406,7 @@
 #if CONFIG_FLEX_MVRES
                               pb_mv_precision,
 #else
-                              cm->features.allow_high_precision_mv,
+                            cm->features.allow_high_precision_mv,
 #endif
                               &cur_mv[0].as_mv);
 #endif  // CONFIG_C071_SUBBLK_WARPMV
@@ -1338,7 +1459,13 @@
       int valid_mv0_found = 0;
       for (int prev_mv_precision = pb_mv_precision;
            prev_mv_precision <= mbmi->max_mv_precision; prev_mv_precision++) {
-        if (args->single_newmv_valid[prev_mv_precision][ref_mv_idx][refs[0]]) {
+#if CONFIG_SEP_COMP_DRL
+        assert(get_ref_mv_idx(mbmi, 1) == get_ref_mv_idx(mbmi, 0));
+        if (args->single_newmv_valid[prev_mv_precision][get_ref_mv_idx(mbmi, 0)]
+                                    [refs[0]]) {
+#else
+      if (args->single_newmv_valid[prev_mv_precision][ref_mv_idx][refs[0]]) {
+#endif  // CONFIG_SEP_COMP_DRL
           valid_mv0_found = 1;
           valid_precision_mv0 = prev_mv_precision;
           break;
@@ -1353,7 +1480,13 @@
       assert(valid_precision_mv0 > pb_mv_precision &&
              valid_precision_mv0 < NUM_MV_PRECISIONS);
       start_mv.as_int =
-          args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int;
+#if CONFIG_SEP_COMP_DRL
+          args->single_newmv[valid_precision_mv0][get_ref_mv_idx(mbmi, 0)]
+                            [refs[0]]
+                                .as_int;
+#else
+        args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int;
+#endif  // CONFIG_SEP_COMP_DRL
       lower_mv_precision(&start_mv.as_mv, pb_mv_precision);
       clamp_mv_in_range(x, &start_mv, 0, pb_mv_precision);
 
@@ -1363,15 +1496,27 @@
     } else {
 #endif
       int search_range = INT_MAX;
-      if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
+#if CONFIG_SEP_COMP_DRL
+      if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx[0] > 0) {
+#else
+    if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
+#endif  // CONFIG_SEP_COMP_DRL
         const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
         int min_mv_diff = INT_MAX;
         int best_match = -1;
         MV best_mv1 = { 0 };
-        for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
+#if CONFIG_SEP_COMP_DRL
+        assert(ref_idx == 0);
+        for (int idx = 0; idx < mbmi->ref_mv_idx[ref_idx]; ++idx) {
           MV prev_ref_mv = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
-                                                     idx, x->mbmi_ext)
+                                                     idx, x->mbmi_ext, mbmi)
                                .as_mv;
+#else
+      for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
+        MV prev_ref_mv = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
+                                                   idx, x->mbmi_ext)
+                             .as_mv;
+#endif  // CONFIG_SEP_COMP_DRL
           const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv.row),
                                          abs(ref_mv.col - prev_ref_mv.col));
 
@@ -1411,7 +1556,13 @@
         }
       }
       av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
-                               mode_info, &best_mv);
+                               mode_info, &best_mv
+#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+                               ,
+                               NULL
+#endif  // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+
+      );
 
 #if CONFIG_FLEX_MVRES
     }
@@ -1420,9 +1571,18 @@
     if (best_mv.as_int == INVALID_MV) return INT64_MAX;
 
 #if CONFIG_FLEX_MVRES
-    args->single_newmv[pb_mv_precision][ref_mv_idx][refs[0]] = best_mv;
-    args->single_newmv_rate[pb_mv_precision][ref_mv_idx][refs[0]] = *rate_mv;
-    args->single_newmv_valid[pb_mv_precision][ref_mv_idx][refs[0]] = 1;
+#if CONFIG_SEP_COMP_DRL
+    args->single_newmv[pb_mv_precision][get_ref_mv_idx(mbmi, 0)][refs[0]] =
+        best_mv;
+    args->single_newmv_rate[pb_mv_precision][get_ref_mv_idx(mbmi, 0)][refs[0]] =
+        *rate_mv;
+    args->single_newmv_valid[pb_mv_precision][get_ref_mv_idx(mbmi, 0)]
+                            [refs[0]] = 1;
+#else
+  args->single_newmv[pb_mv_precision][ref_mv_idx][refs[0]] = best_mv;
+  args->single_newmv_rate[pb_mv_precision][ref_mv_idx][refs[0]] = *rate_mv;
+  args->single_newmv_valid[pb_mv_precision][ref_mv_idx][refs[0]] = 1;
+#endif  // CONFIG_SEP_COMP_DRL
 #else
     args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
     args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
@@ -1451,7 +1611,11 @@
                         const ModeCosts *mode_costs) {
 #if CONFIG_WARP_REF_LIST
   (void)xd;
-  if (!allow_warp_parameter_signaling(mbmi)) {
+  if (!allow_warp_parameter_signaling(
+#if CONFIG_CWG_D067_IMPROVED_WARP
+          cm,
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+          mbmi)) {
     return 0;
   }
 #endif  // CONFIG_WARP_REF_LIST
@@ -1650,7 +1814,11 @@
 #if CONFIG_CROSS_CHROMA_TX
   CctxType best_cctx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
 #endif  // CONFIG_CROSS_CHROMA_TX
-  const int rate_mv0 = *rate_mv;
+  const int rate_mv0 =
+#if CONFIG_WARPMV
+      this_mode == WARPMV ? 0 :
+#endif
+                          *rate_mv;
 #if !CONFIG_EXTENDED_WARP_PREDICTION
   const int interintra_allowed =
       cm->seq_params.enable_interintra_compound && is_interintra_allowed(mbmi);
@@ -1670,6 +1838,9 @@
 #if CONFIG_WARP_REF_LIST
   mbmi->warp_ref_idx = 0;
   mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
 #if CONFIG_EXTENDED_WARP_PREDICTION
   int allowed_motion_modes = motion_mode_allowed(
@@ -1700,6 +1871,13 @@
   }
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  int_mv previous_mvs[MAX_WARP_REF_CANDIDATES];
+  for (int w_ref_idx = 0; w_ref_idx < MAX_WARP_REF_CANDIDATES; w_ref_idx++) {
+    previous_mvs[w_ref_idx].as_int = INVALID_MV;
+  }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+  int num_rd_check = 0;
   const MB_MODE_INFO base_mbmi = *mbmi;
   MB_MODE_INFO best_mbmi;
 #if CONFIG_C071_SUBBLK_WARPMV
@@ -1763,10 +1941,14 @@
         || is_warpmv_warp_causal
 #endif  // CONFIG_WARPMV
     ) {
-      max_warp_ref_idx =
-          (base_mbmi.mode == GLOBALMV || base_mbmi.mode == NEARMV)
-              ? 1
-              : MAX_WARP_REF_CANDIDATES;
+      max_warp_ref_idx = (base_mbmi.mode == GLOBALMV || base_mbmi.mode == NEARMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                          || base_mbmi.mode == AMVDNEWMV
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
+                          )
+                             ? 1
+                             : MAX_WARP_REF_CANDIDATES;
 #if CONFIG_WARPMV
       if (is_warpmv_warp_causal) {
         max_warp_ref_idx = MAX_WARP_REF_CANDIDATES;
@@ -1797,33 +1979,38 @@
       if (is_warpmv_warp_causal && warp_ref_idx >= valid_num_candidates)
         continue;
 #endif  // CONFIG_WARPMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      for (int warpmv_with_mvd_flag = 0;
+           warpmv_with_mvd_flag < (1 + (base_mbmi.mode == WARPMV));
+           warpmv_with_mvd_flag++) {
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 
 #endif  // CONFIG_WARP_REF_LIST
 
-      int tmp_rate2 = rate2_nocoeff;
-      int tmp_rate_mv = rate_mv0;
+        int tmp_rate2 = rate2_nocoeff;
+        int tmp_rate_mv = rate_mv0;
 
-      *mbmi = base_mbmi;
+        *mbmi = base_mbmi;
 #if CONFIG_C071_SUBBLK_WARPMV
-      update_submi(xd, cm, base_submi, bsize);
+        update_submi(xd, cm, base_submi, bsize);
 #endif  // CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_WARP_REF_LIST
-      mbmi->warp_ref_idx = warp_ref_idx;
-      mbmi->max_num_warp_candidates = (mode_index == WARP_DELTA
+        mbmi->warp_ref_idx = warp_ref_idx;
+        mbmi->max_num_warp_candidates = (mode_index == WARP_DELTA
 #if CONFIG_WARPMV
-                                       || is_warpmv_warp_causal
+                                         || is_warpmv_warp_causal
 #endif  // CONFIG_WARPMV
-                                       )
-                                          ? max_warp_ref_idx
-                                          : 0;
-      assert(valid_num_candidates <= mbmi->max_num_warp_candidates);
+                                         )
+                                            ? max_warp_ref_idx
+                                            : 0;
+        assert(valid_num_candidates <= mbmi->max_num_warp_candidates);
 
 #endif  // CONFIG_WARP_REF_LIST
 #if CONFIG_EXTENDED_WARP_PREDICTION
-      mbmi->motion_mode = (MOTION_MODE)mode_index;
-      if (mbmi->motion_mode != INTERINTRA) {
-        assert(mbmi->ref_frame[1] != INTRA_FRAME);
-      }
+        mbmi->motion_mode = (MOTION_MODE)mode_index;
+        if (mbmi->motion_mode != INTERINTRA) {
+          assert(mbmi->ref_frame[1] != INTRA_FRAME);
+        }
 #else
     if (is_interintra_mode) {
       // Only use SIMPLE_TRANSLATION for interintra
@@ -1834,312 +2021,279 @@
     }
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+        if (warpmv_with_mvd_flag && !allow_warpmv_with_mvd_coding(cm, mbmi))
+          continue;
+        mbmi->warpmv_with_mvd_flag = warpmv_with_mvd_flag;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
 #if CONFIG_WARPMV
-      // Only WARP_DELTA and WARPED_CAUSAL are supported for WARPMV mode
-      assert(IMPLIES(mbmi->mode == WARPMV,
-                     mbmi->motion_mode == WARP_DELTA || is_warpmv_warp_causal));
+        // Only WARP_DELTA and WARPED_CAUSAL are supported for WARPMV mode
+        assert(IMPLIES(mbmi->mode == WARPMV, mbmi->motion_mode == WARP_DELTA ||
+                                                 is_warpmv_warp_causal));
 #endif  // CONFIG_WARPMV
 
-      // Do not search OBMC if the probability of selecting it is below a
-      // predetermined threshold for this update_type and block size.
-      const FRAME_UPDATE_TYPE update_type =
-          get_frame_update_type(&cpi->gf_group);
-      const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] <
-                             cpi->sf.inter_sf.prune_obmc_prob_thresh;
+        // Do not search OBMC if the probability of selecting it is below a
+        // predetermined threshold for this update_type and block size.
+        const FRAME_UPDATE_TYPE update_type =
+            get_frame_update_type(&cpi->gf_group);
+        const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] <
+                               cpi->sf.inter_sf.prune_obmc_prob_thresh;
 #if CONFIG_EXTENDED_WARP_PREDICTION
-      bool enable_obmc =
-          (cm->features.enabled_motion_modes & (1 << OBMC_CAUSAL)) != 0;
+        bool enable_obmc =
+            (cm->features.enabled_motion_modes & (1 << OBMC_CAUSAL)) != 0;
 #else
     bool enable_obmc = cpi->oxcf.motion_mode_cfg.enable_obmc;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-      if ((!enable_obmc || cpi->sf.inter_sf.disable_obmc || prune_obmc) &&
-          mbmi->motion_mode == OBMC_CAUSAL)
-        continue;
+        if ((!enable_obmc || cpi->sf.inter_sf.disable_obmc || prune_obmc) &&
+            mbmi->motion_mode == OBMC_CAUSAL)
+          continue;
 
-      if (is_warp_mode(mbmi->motion_mode)) {
-        mbmi->interp_fltr = av1_unswitchable_filter(interp_filter);
-      }
+        if (is_warp_mode(mbmi->motion_mode)) {
+          mbmi->interp_fltr = av1_unswitchable_filter(interp_filter);
+        }
 
 #if CONFIG_EXTENDED_WARP_PREDICTION
-      if (mbmi->motion_mode == SIMPLE_TRANSLATION) {
+        if (mbmi->motion_mode == SIMPLE_TRANSLATION) {
 #else
     if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
         // SIMPLE_TRANSLATION mode: no need to recalculate.
         // The prediction is calculated before motion_mode_rd() is called in
         // handle_inter_mode()
-      } else if (mbmi->motion_mode == OBMC_CAUSAL) {
-        // OBMC_CAUSAL not allowed for compound prediction
-        assert(!is_comp_pred);
-        if (this_mode == NEWMV) {
-          av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX,
-                                   NULL, &mbmi->mv[0]);
-          tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
-        }
-        // Build the inter predictor by blending the predictor corresponding to
-        // this MV, and the neighboring blocks using the OBMC model
-        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
-                                      0, av1_num_planes(cm) - 1);
-        av1_build_obmc_inter_prediction(
-            cm, xd, args->above_pred_buf, args->above_pred_stride,
-            args->left_pred_buf, args->left_pred_stride);
-      } else if (mbmi->motion_mode == WARPED_CAUSAL) {
-        int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
+        } else if (mbmi->motion_mode == OBMC_CAUSAL) {
+          // OBMC_CAUSAL not allowed for compound prediction
+          assert(!is_comp_pred);
+          if (this_mode == NEWMV) {
+            av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX,
+                                     NULL, &mbmi->mv[0]
+#if CONFIG_CWG_D067_IMPROVED_WARP && CONFIG_WARPMV
+                                     ,
+                                     NULL
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP && CONFIG_WARPMV
+            );
+            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
+          }
+          // Build the inter predictor by blending the predictor
+          // corresponding to this MV, and the neighboring blocks using the
+          // OBMC model
+          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
+                                        0, av1_num_planes(cm) - 1);
+          av1_build_obmc_inter_prediction(
+              cm, xd, args->above_pred_buf, args->above_pred_stride,
+              args->left_pred_buf, args->left_pred_stride);
+        } else if (mbmi->motion_mode == WARPED_CAUSAL) {
+          int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
 #if CONFIG_EXTENDED_WARP_PREDICTION
-        mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
+          mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
 #else
       mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
 #if CONFIG_WARPMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+          int_mv warp_ref_mv = mbmi->mv[0];
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
         // Build the motion vector of the WARPMV mode
-        if (mbmi->mode == WARPMV) {
-          WarpedMotionParams ref_model =
-              mbmi_ext
-                  ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)]
-                                    [mbmi->warp_ref_idx]
-                  .wm_params;
-          mbmi->mv[0] = get_mv_from_wrl(xd, &ref_model,
+          if (mbmi->mode == WARPMV) {
+            WarpedMotionParams ref_model =
+                mbmi_ext
+                    ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)]
+                                      [mbmi->warp_ref_idx]
+                    .wm_params;
+            mbmi->mv[0] = get_mv_from_wrl(
+                xd, &ref_model,
 #if CONFIG_FLEX_MVRES
-                                        MV_PRECISION_ONE_EIGHTH_PEL,
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                mbmi->warpmv_with_mvd_flag ? mbmi->pb_mv_precision :
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+                                           MV_PRECISION_ONE_EIGHTH_PEL,
 #else
-                                        1, 0,
+              1, 0,
 #endif
-                                        bsize, xd->mi_col, xd->mi_row);
-          if (!is_warp_candidate_inside_of_frame(cm, xd, mbmi->mv[0])) continue;
+                bsize, xd->mi_col, xd->mi_row);
+
+            if (!is_warp_candidate_inside_of_frame(cm, xd, mbmi->mv[0]))
+              continue;
 #if CONFIG_FLEX_MVRES
-          assert(mbmi->pb_mv_precision == mbmi->max_mv_precision);
+            assert(mbmi->pb_mv_precision == mbmi->max_mv_precision);
 #endif
-        }
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
+            warp_ref_mv.as_int = mbmi->mv[0].as_int;
+            // search MVD if mbmi->warpmv_with_mvd_flag is used.
+            if (mbmi->warpmv_with_mvd_flag) {
+              if (previous_mvs[mbmi->warp_ref_idx].as_int == INVALID_MV) {
+                int tmp_trans_ratemv = 0;
+                av1_single_motion_search(cpi, x, bsize, 0, &tmp_trans_ratemv,
+                                         16, NULL, &mbmi->mv[0], &warp_ref_mv);
+                previous_mvs[mbmi->warp_ref_idx].as_int = mbmi->mv[0].as_int;
+              } else {
+                mbmi->mv[0].as_int = previous_mvs[mbmi->warp_ref_idx].as_int;
+              }
+            }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+          }
 #endif  // CONFIG_WARPMV
 
-        memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
-        memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
-        // Select the samples according to motion vector difference
-        if (mbmi->num_proj_ref > 1) {
-          mbmi->num_proj_ref = av1_selectSamples(
-              &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
-        }
+          memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
+          memcpy(pts_inref, pts_inref0,
+                 total_samples * 2 * sizeof(*pts_inref0));
+          // Select the samples according to motion vector difference
+          if (mbmi->num_proj_ref > 1) {
+            mbmi->num_proj_ref = av1_selectSamples(
+                &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
+          }
 
-        // Compute the warped motion parameters with a least squares fit
-        //  using the collected samples
+          // Compute the warped motion parameters with a least squares fit
+          //  using the collected samples
 #if CONFIG_EXTENDED_WARP_PREDICTION
-        if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
-                                 mbmi->mv[0].as_mv, &mbmi->wm_params[0], mi_row,
-                                 mi_col)) {
+          if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
+                                   mbmi->mv[0].as_mv, &mbmi->wm_params[0],
+                                   mi_row, mi_col)) {
 #else
       if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
                                mbmi->mv[0].as_mv, &mbmi->wm_params, mi_row,
                                mi_col)) {
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
-          assert(!is_comp_pred);
-          if (this_mode == NEWMV
+            assert(!is_comp_pred);
+            if ((this_mode == NEWMV
 #if CONFIG_FLEX_MVRES
-              && (mbmi->pb_mv_precision >= MV_PRECISION_ONE_PEL)
+                 && (mbmi->pb_mv_precision >= MV_PRECISION_ONE_PEL))
 #endif
-          ) {
-            // Refine MV for NEWMV mode
-            const int_mv mv0 = mbmi->mv[0];
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                || mbmi->warpmv_with_mvd_flag
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+            ) {
+              // Refine MV for NEWMV mode
+              const int_mv mv0 =
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                  mbmi->mode == WARPMV ? warp_ref_mv :
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+                                       mbmi->mv[0];
+              const int_mv ref_mv =
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                  mbmi->warpmv_with_mvd_flag ? warp_ref_mv :
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 
-            const int_mv ref_mv = av1_get_ref_mv(x, 0);
+                                             av1_get_ref_mv(x, 0);
 #if CONFIG_FLEX_MVRES
-            const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision;
+              const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision;
 #endif
-            SUBPEL_MOTION_SEARCH_PARAMS ms_params;
-            av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
-                                              &ref_mv.as_mv,
+              SUBPEL_MOTION_SEARCH_PARAMS ms_params;
+              av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
+                                                &ref_mv.as_mv,
 #if CONFIG_FLEX_MVRES
-                                              pb_mv_precision,
+                                                pb_mv_precision,
 #endif
-                                              NULL);
-
-            // Refine MV in a small range.
-            av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
-                                 total_samples,
-                                 cpi->sf.mv_sf.warp_search_method,
-                                 cpi->sf.mv_sf.warp_search_iters);
-
-            if (mv0.as_int != mbmi->mv[0].as_int) {
-              // Keep the refined MV and WM parameters.
+                                                NULL);
+              // Refine MV in a small range.
+              av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
+                                   total_samples,
+                                   cpi->sf.mv_sf.warp_search_method,
+                                   cpi->sf.mv_sf.warp_search_iters);
+              if (mv0.as_int != mbmi->mv[0].as_int
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                  || mbmi->warpmv_with_mvd_flag
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+              ) {
+                // Keep the refined MV and WM parameters.
 #if CONFIG_FLEX_MVRES
-              tmp_rate_mv =
-                  av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv.as_mv,
-                                  pb_mv_precision, &x->mv_costs, MV_COST_WEIGHT
+                tmp_rate_mv = av1_mv_bit_cost(
+                    &mbmi->mv[0].as_mv, &ref_mv.as_mv, pb_mv_precision,
+                    &x->mv_costs, MV_COST_WEIGHT
 #if CONFIG_ADAPTIVE_MVD
-                                  ,
-                                  ms_params.mv_cost_params.is_adaptive_mvd
+                    ,
+                    ms_params.mv_cost_params.is_adaptive_mvd
 #endif
-                  );
+                );
 #else
             tmp_rate_mv = av1_mv_bit_cost(
                 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost,
                 x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
 #endif
-              tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
+                tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                assert(
+                    IMPLIES(mbmi->mode == WARPMV, mbmi->warpmv_with_mvd_flag));
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+              }
             }
-          }
 #if CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_EXTENDED_WARP_PREDICTION
-          assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row,
-                        mi_col);
+            assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row,
+                          mi_col);
 #else
           assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params, mi_row, mi_col);
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 #endif  // CONFIG_C071_SUBBLK_WARPMV
         // Build the warped predictor
-          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
-                                        av1_num_planes(cm) - 1);
-        } else {
-          continue;
-        }
+            av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
+                                          0, av1_num_planes(cm) - 1);
+          } else {
+            continue;
+          }
+
 #if CONFIG_EXTENDED_WARP_PREDICTION
-      } else if (mbmi->motion_mode == INTERINTRA) {
+        } else if (mbmi->motion_mode == INTERINTRA) {
 #else
     } else if (is_interintra_mode) {
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
-        const int ret =
-            av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
-                                        &tmp_rate_mv, &tmp_rate2, orig_dst);
-        if (ret < 0) continue;
+          const int ret = av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args,
+                                                      ref_best_rd, &tmp_rate_mv,
+                                                      &tmp_rate2, orig_dst);
+          if (ret < 0) continue;
 #if CONFIG_EXTENDED_WARP_PREDICTION
-      } else if (mbmi->motion_mode == WARP_DELTA) {
+        } else if (mbmi->motion_mode == WARP_DELTA) {
 #if CONFIG_FLEX_MVRES
-        if (mbmi->mode == NEWMV &&
-            mbmi->pb_mv_precision < MV_PRECISION_ONE_PEL) {
-          // Don't bother with warp modes for MV precisions >1px
-          continue;
-        }
-#endif
-#if CONFIG_WARPMV
-        // Build the motion vector of the WARPMV mode
-        if (mbmi->mode == WARPMV) {
-          WarpedMotionParams ref_model =
-              mbmi_ext
-                  ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)]
-                                    [mbmi->warp_ref_idx]
-                  .wm_params;
-          mbmi->mv[0] = get_mv_from_wrl(xd, &ref_model,
-#if CONFIG_FLEX_MVRES
-                                        MV_PRECISION_ONE_EIGHTH_PEL,
-#else
-                                        1, 0,
-#endif
-                                        bsize, xd->mi_col, xd->mi_row);
-#if CONFIG_FLEX_MVRES
-          assert(mbmi->pb_mv_precision == mbmi->max_mv_precision);
-#endif
-          if (!is_warp_candidate_inside_of_frame(cm, xd, mbmi->mv[0])) continue;
-        }
-#endif  // CONFIG_WARPMV
-        int_mv mv0 = mbmi->mv[0];
-        const int_mv ref_mv = av1_get_ref_mv(x, 0);
-        SUBPEL_MOTION_SEARCH_PARAMS ms_params;
-        av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
-                                          &ref_mv.as_mv,
-#if CONFIG_FLEX_MVRES
-                                          mbmi->pb_mv_precision,
-#endif
-                                          NULL);
-        int valid = 0;
-#if CONFIG_WARP_REF_LIST
-        if (!allow_warp_parameter_signaling(mbmi)) {
-          // Default parameters are not searched if the delta is not signalled
-          if (mbmi_ext
-                  ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)]
-                                    [mbmi->warp_ref_idx]
-                  .proj_type == PROJ_DEFAULT)
-            continue;
-          valid = av1_refine_mv_for_base_param_warp_model(
-              cm, xd, mbmi, mbmi_ext, &ms_params,
-              cpi->sf.mv_sf.warp_search_method,
-              cpi->sf.mv_sf.warp_search_iters);
-        } else {
-#endif  // CONFIG_WARP_REF_LIST
-
-          valid = av1_pick_warp_delta(
-              cm, xd, mbmi, mbmi_ext, &ms_params, &x->mode_costs
-#if CONFIG_WARP_REF_LIST
-              ,
-              mbmi_ext->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)]
-#endif  // CONFIG_WARP_REF_LIST
-
-          );
-
-#if CONFIG_WARP_REF_LIST
-        }
-#endif  // CONFIG_WARP_REF_LIST
-
-        if (!valid) {
-          continue;
-        }
-
-        // If we changed the MV, update costs
-        if (mv0.as_int != mbmi->mv[0].as_int) {
-          // Keep the refined MV and WM parameters.
-#if CONFIG_FLEX_MVRES
-          tmp_rate_mv = av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv.as_mv,
-                                        mbmi->pb_mv_precision, &x->mv_costs,
-                                        MV_COST_WEIGHT
-#if CONFIG_ADAPTIVE_MVD
-                                        ,
-                                        ms_params.mv_cost_params.is_adaptive_mvd
-#endif
-          );
-#else
-          tmp_rate_mv = av1_mv_bit_cost(
-              &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost,
-              x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
-#endif
-
-          tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
-#if CONFIG_WARPMV
-          assert(mbmi->mode == NEWMV);
-#endif  // CONFIG_WARPMV
-        }
-#if CONFIG_C071_SUBBLK_WARPMV
-        assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row,
-                      mi_col);
-#endif  // CONFIG_C071_SUBBLK_WARPMV
-        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
-                                      av1_num_planes(cm) - 1);
-      } else if (mbmi->motion_mode == WARP_EXTEND) {
-#if CONFIG_FLEX_MVRES
-        if (mbmi->mode == NEWMV &&
-            mbmi->pb_mv_precision < MV_PRECISION_ONE_PEL) {
-          // Don't bother with warp modes for MV precisions >1px
-          continue;
-        }
-#endif
-
-        CANDIDATE_MV *neighbor =
-            &mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]][mbmi->ref_mv_idx];
-        POSITION base_pos = { 0, 0 };
-        if (!get_extend_base_pos(cm, xd, mbmi, neighbor->row_offset,
-                                 neighbor->col_offset, &base_pos)) {
-          continue;
-        }
-        const MB_MODE_INFO *neighbor_mi =
-            xd->mi[base_pos.row * xd->mi_stride + base_pos.col];
-
-        if (mbmi->mode == NEARMV) {
-          assert(is_warp_mode(neighbor_mi->motion_mode));
-          if (neighbor_mi->wm_params[0].invalid) {
-            // Skip invalid models
+          if (mbmi->mode == NEWMV &&
+              mbmi->pb_mv_precision < MV_PRECISION_ONE_PEL) {
+            // Don't bother with warp modes for MV precisions >1px
             continue;
           }
-          mbmi->wm_params[0] = neighbor_mi->wm_params[0];
-        } else {
-          assert(mbmi->mode == NEWMV);
+#endif
+#if CONFIG_WARPMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+          int_mv wrl_ref_mv = mbmi->mv[0];
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 
-          bool neighbor_is_above =
-              xd->up_available && (base_pos.row == -1 && base_pos.col >= 0);
+          // Build the motion vector of the WARPMV mode
+          if (mbmi->mode == WARPMV) {
+            WarpedMotionParams ref_model =
+                mbmi_ext
+                    ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)]
+                                      [mbmi->warp_ref_idx]
+                    .wm_params;
+            mbmi->mv[0] = get_mv_from_wrl(
+                xd, &ref_model,
+#if CONFIG_FLEX_MVRES
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                mbmi->warpmv_with_mvd_flag ? mbmi->pb_mv_precision :
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 
-          WarpedMotionParams neighbor_params;
-          av1_get_neighbor_warp_model(cm, xd, neighbor_mi, &neighbor_params);
-
-          const int_mv ref_mv = av1_get_ref_mv(x, 0);
+                                           MV_PRECISION_ONE_EIGHTH_PEL,
+#else
+                1, 0,
+#endif
+                bsize, xd->mi_col, xd->mi_row);
+#if CONFIG_FLEX_MVRES
+            assert(mbmi->pb_mv_precision == mbmi->max_mv_precision);
+#endif
+            if (!is_warp_candidate_inside_of_frame(cm, xd, mbmi->mv[0]))
+              continue;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+            wrl_ref_mv = mbmi->mv[0];
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+          }
+#endif  // CONFIG_WARPMV
+          int_mv mv0 = mbmi->mv[0];
+          const int_mv ref_mv =
+#if CONFIG_CWG_D067_IMPROVED_WARP
+              (mbmi->mode == WARPMV) ? wrl_ref_mv :
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+                                     av1_get_ref_mv(x, 0);
           SUBPEL_MOTION_SEARCH_PARAMS ms_params;
           av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
                                             &ref_mv.as_mv,
@@ -2147,97 +2301,62 @@
                                             mbmi->pb_mv_precision,
 #endif
                                             NULL);
-          const SubpelMvLimits *mv_limits = &ms_params.mv_limits;
-
-          // Note: The warp filter is only able to accept small deviations from
-          // the identity transform, up to 1/4 pel of shift per pixel.
-          // Especially for small blocks, it is likely that the motion vector
-          // estimated by the newmv search will be too distant from the
-          // neighbor's motion vectors for the warp filter to be applied.
-          // However, we don't want to give up the benefits of a good initial
-          // MV in the cases where a suitable one has already been found.
-          //
-          // To get the best of both worlds, we run an initial test to see
-          // if the motion vector found by newmv search gives a valid motion
-          // model. If so, we use that as the starting point for refinement.
-          // Otherwise, we use the MV which is predicted by the neighbor's
-          // warp model
-          // TODO(rachelbarker): Do we need this logic?
-
-          // Backup initial motion vector and resulting warp params
-          int_mv mv0 = mbmi->mv[0];
-          WarpedMotionParams wm_params0;
-          if (!av1_extend_warp_model(neighbor_is_above, bsize,
-                                     &mbmi->mv[0].as_mv, mi_row, mi_col,
-                                     &neighbor_params, &wm_params0)) {
-            // NEWMV search produced a valid model
-            mbmi->wm_params[0] = wm_params0;
+          int valid = 0;
+#if CONFIG_WARP_REF_LIST
+          if (!allow_warp_parameter_signaling(
+#if CONFIG_CWG_D067_IMPROVED_WARP
+                  cm,
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+                  mbmi)) {
+            // Default parameters are not searched if the delta is not
+            // signalled
+            if (mbmi_ext
+                    ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)]
+                                      [mbmi->warp_ref_idx]
+                    .proj_type == PROJ_DEFAULT)
+              continue;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+            // search MVD if mbmi->warpmv_with_mvd_flag is used.
+            if (mbmi->mode == WARPMV && mbmi->warpmv_with_mvd_flag) {
+              if (previous_mvs[mbmi->warp_ref_idx].as_int == INVALID_MV) {
+                int tmp_trans_ratemv = 0;
+                av1_single_motion_search(cpi, x, bsize, 0, &tmp_trans_ratemv,
+                                         16, NULL, &mbmi->mv[0], &ref_mv);
+                previous_mvs[mbmi->warp_ref_idx].as_int = mbmi->mv[0].as_int;
+              } else {
+                mbmi->mv[0].as_int = previous_mvs[mbmi->warp_ref_idx].as_int;
+              }
+            }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+            valid = av1_refine_mv_for_base_param_warp_model(
+                cm, xd, mbmi, mbmi_ext, &ms_params,
+                cpi->sf.mv_sf.warp_search_method,
+                cpi->sf.mv_sf.warp_search_iters);
           } else {
-            // NEWMV search did not produce a valid model, so fall back to
-            // starting with the motion vector predicted by the neighbor's
-            // warp model (if any)
-#if CONFIG_FLEX_MVRES
-            mbmi->mv[0] = get_warp_motion_vector(xd, &neighbor_params,
-                                                 mbmi->pb_mv_precision, bsize,
-                                                 mi_col, mi_row);
-#else
-            mbmi->mv[0] = get_warp_motion_vector(
-                xd, &neighbor_params, features->allow_high_precision_mv, bsize,
-                mi_col, mi_row, features->cur_frame_force_integer_mv);
-#endif
-#if CONFIG_C071_SUBBLK_WARPMV
-            if (
-#if CONFIG_FLEX_MVRES
-                mbmi->pb_mv_precision >= MV_PRECISION_HALF_PEL
-#else
-                !cm->features.allow_high_precision_mv
-#endif
-            ) {
-              FULLPEL_MV tmp_full_mv = get_fullmv_from_mv(&mbmi->mv[0].as_mv);
-              MV tmp_sub_mv = get_mv_from_fullmv(&tmp_full_mv);
-              MV sub_mv_offset = { 0, 0 };
-              get_phase_from_mv(ref_mv.as_mv, &sub_mv_offset,
-#if CONFIG_FLEX_MVRES
-                                mbmi->pb_mv_precision
-#else
-                                cm->features.allow_high_precision_mv
-#endif
-              );
-              mbmi->mv[0].as_mv.col = tmp_sub_mv.col + sub_mv_offset.col;
-              mbmi->mv[0].as_mv.row = tmp_sub_mv.row + sub_mv_offset.row;
-            }
-#endif  // CONFIG_C071_SUBBLK_WARPMV
-        // Check that the prediction is in range
-            if (!av1_is_subpelmv_in_range(mv_limits, mbmi->mv[0].as_mv)) {
-              continue;
-            }
+#endif  // CONFIG_WARP_REF_LIST
 
-            // Regenerate model with this new MV
-            //
-            // Note: This should be very close to the neighbor's warp model,
-            // but may be slightly different due to rounding. So it may be
-            // invalid even if the neighbor's warp model is valid.
-            // Because an exact copy will already have been tried using the
-            // NEARMV mode, we can just detect an invalid model and bail out.
-            //
-            // TODO(rachelbarker): Is it worth trying to search anyway in
-            // this case, in order to try to find a valid model?
-            if (av1_extend_warp_model(neighbor_is_above, bsize,
-                                      &mbmi->mv[0].as_mv, mi_row, mi_col,
-                                      &neighbor_params, &mbmi->wm_params[0])) {
-              continue;
-            }
+            valid = av1_pick_warp_delta(
+                cm, xd, mbmi, mbmi_ext, &ms_params, &x->mode_costs
+#if CONFIG_WARP_REF_LIST
+                ,
+                mbmi_ext->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)]
+#endif  // CONFIG_WARP_REF_LIST
+            );
+
+#if CONFIG_WARP_REF_LIST
+          }
+#endif  // CONFIG_WARP_REF_LIST
+
+          if (!valid) {
+            continue;
           }
 
-          // Refine motion vector. The final choice of MV and warp model are
-          // stored directly into `mbmi`
-          av1_refine_mv_for_warp_extend(cm, xd, &ms_params, neighbor_is_above,
-                                        bsize, &neighbor_params,
-                                        cpi->sf.mv_sf.warp_search_method,
-                                        cpi->sf.mv_sf.warp_search_iters);
-
           // If we changed the MV, update costs
-          if (mv0.as_int != mbmi->mv[0].as_int) {
+          if (mv0.as_int != mbmi->mv[0].as_int
+#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+              || mbmi->warpmv_with_mvd_flag
+#endif  // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP
+          ) {
             // Keep the refined MV and WM parameters.
 #if CONFIG_FLEX_MVRES
             tmp_rate_mv = av1_mv_bit_cost(
@@ -2249,129 +2368,316 @@
 #endif
             );
 #else
+          tmp_rate_mv = av1_mv_bit_cost(
+              &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost,
+              x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
+#endif
+
+            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
+#if CONFIG_WARPMV
+#if CONFIG_CWG_D067_IMPROVED_WARP
+            assert(mbmi->mode == NEWMV || mbmi->warpmv_with_mvd_flag);
+#else
+            assert(mbmi->mode == NEWMV);
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+#if CONFIG_CWG_D067_IMPROVED_WARP
+            assert(IMPLIES(mbmi->mode == WARPMV, rate_mv0 == 0));
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
+#endif  // CONFIG_WARPMV
+          }
+#if CONFIG_C071_SUBBLK_WARPMV
+          assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row,
+                        mi_col);
+#endif  // CONFIG_C071_SUBBLK_WARPMV
+          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
+                                        av1_num_planes(cm) - 1);
+        } else if (mbmi->motion_mode == WARP_EXTEND) {
+#if CONFIG_FLEX_MVRES
+          if (mbmi->mode == NEWMV &&
+              mbmi->pb_mv_precision < MV_PRECISION_ONE_PEL) {
+            // Don't bother with warp modes for MV precisions >1px
+            continue;
+          }
+#endif
+
+          CANDIDATE_MV *neighbor =
+#if CONFIG_SEP_COMP_DRL
+              &mbmi_ext
+                   ->ref_mv_stack[mbmi->ref_frame[0]][get_ref_mv_idx(mbmi, 0)];
+#else
+            &mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]][mbmi->ref_mv_idx];
+#endif
+          POSITION base_pos = { 0, 0 };
+          if (!get_extend_base_pos(cm, xd, mbmi, neighbor->row_offset,
+                                   neighbor->col_offset, &base_pos)) {
+            continue;
+          }
+          const MB_MODE_INFO *neighbor_mi =
+              xd->mi[base_pos.row * xd->mi_stride + base_pos.col];
+
+          if (mbmi->mode == NEARMV) {
+            assert(is_warp_mode(neighbor_mi->motion_mode));
+            if (neighbor_mi->wm_params[0].invalid) {
+              // Skip invalid models
+              continue;
+            }
+            mbmi->wm_params[0] = neighbor_mi->wm_params[0];
+          } else {
+            assert(mbmi->mode == NEWMV);
+
+            bool neighbor_is_above =
+                xd->up_available && (base_pos.row == -1 && base_pos.col >= 0);
+
+            WarpedMotionParams neighbor_params;
+            av1_get_neighbor_warp_model(cm, xd, neighbor_mi, &neighbor_params);
+
+            const int_mv ref_mv = av1_get_ref_mv(x, 0);
+            SUBPEL_MOTION_SEARCH_PARAMS ms_params;
+            av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
+                                              &ref_mv.as_mv,
+#if CONFIG_FLEX_MVRES
+                                              mbmi->pb_mv_precision,
+#endif
+                                              NULL);
+            const SubpelMvLimits *mv_limits = &ms_params.mv_limits;
+
+            // Note: The warp filter is only able to accept small deviations
+            // from the identity transform, up to 1/4 pel of shift per
+            // pixel. Especially for small blocks, it is likely that the
+            // motion vector estimated by the newmv search will be too
+            // distant from the neighbor's motion vectors for the warp
+            // filter to be applied. However, we don't want to give up the
+            // benefits of a good initial MV in the cases where a suitable
+            // one has already been found.
+            //
+            // To get the best of both worlds, we run an initial test to see
+            // if the motion vector found by newmv search gives a valid
+            // motion model. If so, we use that as the starting point for
+            // refinement. Otherwise, we use the MV which is predicted by
+            // the neighbor's warp model
+            // TODO(rachelbarker): Do we need this logic?
+
+            // Backup initial motion vector and resulting warp params
+            int_mv mv0 = mbmi->mv[0];
+            WarpedMotionParams wm_params0;
+            if (!av1_extend_warp_model(neighbor_is_above, bsize,
+                                       &mbmi->mv[0].as_mv, mi_row, mi_col,
+                                       &neighbor_params, &wm_params0)) {
+              // NEWMV search produced a valid model
+              mbmi->wm_params[0] = wm_params0;
+            } else {
+              // NEWMV search did not produce a valid model, so fall back to
+              // starting with the motion vector predicted by the neighbor's
+              // warp model (if any)
+#if CONFIG_FLEX_MVRES
+              mbmi->mv[0] = get_warp_motion_vector(xd, &neighbor_params,
+                                                   mbmi->pb_mv_precision, bsize,
+                                                   mi_col, mi_row);
+#else
+            mbmi->mv[0] = get_warp_motion_vector(
+                xd, &neighbor_params, features->allow_high_precision_mv, bsize,
+                mi_col, mi_row, features->cur_frame_force_integer_mv);
+#endif
+#if CONFIG_C071_SUBBLK_WARPMV
+              if (
+#if CONFIG_FLEX_MVRES
+                  mbmi->pb_mv_precision >= MV_PRECISION_HALF_PEL
+#else
+                  !cm->features.allow_high_precision_mv
+#endif
+              ) {
+                FULLPEL_MV tmp_full_mv = get_fullmv_from_mv(&mbmi->mv[0].as_mv);
+                MV tmp_sub_mv = get_mv_from_fullmv(&tmp_full_mv);
+                MV sub_mv_offset = { 0, 0 };
+                get_phase_from_mv(ref_mv.as_mv, &sub_mv_offset,
+#if CONFIG_FLEX_MVRES
+                                  mbmi->pb_mv_precision
+#else
+                                  cm->features.allow_high_precision_mv
+#endif
+                );
+                mbmi->mv[0].as_mv.col = tmp_sub_mv.col + sub_mv_offset.col;
+                mbmi->mv[0].as_mv.row = tmp_sub_mv.row + sub_mv_offset.row;
+              }
+#endif  // CONFIG_C071_SUBBLK_WARPMV
+        // Check that the prediction is in range
+              if (!av1_is_subpelmv_in_range(mv_limits, mbmi->mv[0].as_mv)) {
+                continue;
+              }
+
+              // Regenerate model with this new MV
+              //
+              // Note: This should be very close to the neighbor's warp
+              // model, but may be slightly different due to rounding. So it
+              // may be invalid even if the neighbor's warp model is valid.
+              // Because an exact copy will already have been tried using
+              // the NEARMV mode, we can just detect an invalid model and
+              // bail out.
+              //
+              // TODO(rachelbarker): Is it worth trying to search anyway in
+              // this case, in order to try to find a valid model?
+              if (av1_extend_warp_model(
+                      neighbor_is_above, bsize, &mbmi->mv[0].as_mv, mi_row,
+                      mi_col, &neighbor_params, &mbmi->wm_params[0])) {
+                continue;
+              }
+            }
+
+            // Refine motion vector. The final choice of MV and warp model
+            // are stored directly into `mbmi`
+            av1_refine_mv_for_warp_extend(cm, xd, &ms_params, neighbor_is_above,
+                                          bsize, &neighbor_params,
+                                          cpi->sf.mv_sf.warp_search_method,
+                                          cpi->sf.mv_sf.warp_search_iters);
+
+            // If we changed the MV, update costs
+            if (mv0.as_int != mbmi->mv[0].as_int) {
+              // Keep the refined MV and WM parameters.
+#if CONFIG_FLEX_MVRES
+              tmp_rate_mv = av1_mv_bit_cost(
+                  &mbmi->mv[0].as_mv, &ref_mv.as_mv, mbmi->pb_mv_precision,
+                  &x->mv_costs, MV_COST_WEIGHT
+#if CONFIG_ADAPTIVE_MVD
+                  ,
+                  ms_params.mv_cost_params.is_adaptive_mvd
+#endif
+              );
+#else
             tmp_rate_mv = av1_mv_bit_cost(
                 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost,
                 x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
 #endif
-            tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
-          } else {
-            // Restore the old MV and WM parameters.
-            mbmi->mv[0] = mv0;
-            mbmi->wm_params[0] = wm_params0;
+              tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
+            } else {
+              // Restore the old MV and WM parameters.
+              mbmi->mv[0] = mv0;
+              mbmi->wm_params[0] = wm_params0;
+            }
           }
-        }
 
 #if CONFIG_C071_SUBBLK_WARPMV
-        assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row,
-                      mi_col);
+          assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row,
+                        mi_col);
 #endif  // CONFIG_C071_SUBBLK_WARPMV
         // Build the warped predictor
-        av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
-                                      av1_num_planes(cm) - 1);
+          av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
+                                        av1_num_planes(cm) - 1);
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
-      }
+        }
 
-      // If we are searching newmv and the mv is the same as refmv, skip the
-      // current mode
-      if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
+        // If we are searching newmv and the mv is the same as refmv, skip
+        // the current mode
+        if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
 
-      // Update rd_stats for the current motion mode
-      txfm_info->skip_txfm = 0;
-      rd_stats->dist = 0;
-      rd_stats->sse = 0;
-      rd_stats->skip_txfm = 1;
-      rd_stats->rate = tmp_rate2;
-      const ModeCosts *mode_costs = &x->mode_costs;
-      if (!is_warp_mode(mbmi->motion_mode)) rd_stats->rate += switchable_rate;
+        // Update rd_stats for the current motion mode
+        txfm_info->skip_txfm = 0;
+        rd_stats->dist = 0;
+        rd_stats->sse = 0;
+        rd_stats->skip_txfm = 1;
+        rd_stats->rate = tmp_rate2;
+        const ModeCosts *mode_costs = &x->mode_costs;
+        if (!is_warp_mode(mbmi->motion_mode)) rd_stats->rate += switchable_rate;
 
 #if CONFIG_BAWP
-      if (cm->features.enable_bawp && av1_allow_bawp(mbmi, mi_row, mi_col))
-        rd_stats->rate += mode_costs->bawp_flg_cost[mbmi->bawp_flag == 1];
+        if (cm->features.enable_bawp && av1_allow_bawp(mbmi, mi_row, mi_col))
+          rd_stats->rate += mode_costs->bawp_flg_cost[mbmi->bawp_flag == 1];
 #endif
 
 #if CONFIG_EXTENDED_WARP_PREDICTION
-      MOTION_MODE motion_mode = mbmi->motion_mode;
+        MOTION_MODE motion_mode = mbmi->motion_mode;
 #if CONFIG_WARPMV
-      bool continue_motion_mode_signaling = (mbmi->mode != WARPMV);
+        bool continue_motion_mode_signaling = (mbmi->mode != WARPMV);
 #else
       bool continue_motion_mode_signaling = true;
 #endif  // CONFIG_WARPMV
 
-      if (continue_motion_mode_signaling &&
-          allowed_motion_modes & (1 << INTERINTRA)) {
-        rd_stats->rate +=
-            mode_costs->interintra_cost[size_group_lookup[bsize]]
-                                       [motion_mode == INTERINTRA];
-        if (motion_mode == INTERINTRA) {
-          // Note(rachelbarker): Costs for other interintra-related signaling
-          // are already accounted for by `av1_handle_inter_intra_mode`
-          continue_motion_mode_signaling = false;
-        }
-      }
-
-      if (continue_motion_mode_signaling &&
-          allowed_motion_modes & (1 << OBMC_CAUSAL)) {
-        rd_stats->rate +=
-            mode_costs->obmc_cost[bsize][motion_mode == OBMC_CAUSAL];
-        if (motion_mode == OBMC_CAUSAL) {
-          continue_motion_mode_signaling = false;
-        }
-      }
-
-      if (continue_motion_mode_signaling &&
-          allowed_motion_modes & (1 << WARP_EXTEND)) {
-        const int ctx1 = av1_get_warp_extend_ctx1(xd, mbmi);
-        const int ctx2 = av1_get_warp_extend_ctx2(xd, mbmi);
-        rd_stats->rate +=
-            mode_costs
-                ->warp_extend_cost[ctx1][ctx2][motion_mode == WARP_EXTEND];
-        if (motion_mode == WARP_EXTEND) {
-          continue_motion_mode_signaling = false;
-        }
-      }
-
-      if (continue_motion_mode_signaling &&
-          allowed_motion_modes & (1 << WARPED_CAUSAL)) {
-        rd_stats->rate +=
-            mode_costs->warped_causal_cost[bsize][motion_mode == WARPED_CAUSAL];
-        if (motion_mode == WARPED_CAUSAL) {
-          continue_motion_mode_signaling = false;
-        }
-      }
-
-      if (continue_motion_mode_signaling &&
-          allowed_motion_modes & (1 << WARP_DELTA)) {
-        rd_stats->rate +=
-            mode_costs->warp_delta_cost[bsize][motion_mode == WARP_DELTA];
-      }
-
-#if CONFIG_WARPMV
-      if (mbmi->mode == WARPMV) {
-        if (allowed_motion_modes & (1 << WARPED_CAUSAL)) {
+        if (continue_motion_mode_signaling &&
+            allowed_motion_modes & (1 << INTERINTRA)) {
           rd_stats->rate +=
-              mode_costs->warped_causal_warpmv_cost[bsize][motion_mode ==
-                                                           WARPED_CAUSAL];
-        } else {
-          assert(motion_mode == WARP_DELTA);
+              mode_costs->interintra_cost[size_group_lookup[bsize]]
+                                         [motion_mode == INTERINTRA];
+          if (motion_mode == INTERINTRA) {
+            // Note(rachelbarker): Costs for other interintra-related
+            // signaling are already accounted for by
+            // `av1_handle_inter_intra_mode`
+            continue_motion_mode_signaling = false;
+          }
         }
-      }
+
+        if (continue_motion_mode_signaling &&
+            allowed_motion_modes & (1 << OBMC_CAUSAL)) {
+          rd_stats->rate +=
+              mode_costs->obmc_cost[bsize][motion_mode == OBMC_CAUSAL];
+          if (motion_mode == OBMC_CAUSAL) {
+            continue_motion_mode_signaling = false;
+          }
+        }
+
+        if (continue_motion_mode_signaling &&
+            allowed_motion_modes & (1 << WARP_EXTEND)) {
+          const int ctx1 = av1_get_warp_extend_ctx1(xd, mbmi);
+          const int ctx2 = av1_get_warp_extend_ctx2(xd, mbmi);
+          rd_stats->rate +=
+              mode_costs
+                  ->warp_extend_cost[ctx1][ctx2][motion_mode == WARP_EXTEND];
+          if (motion_mode == WARP_EXTEND) {
+            continue_motion_mode_signaling = false;
+          }
+        }
+
+        if (continue_motion_mode_signaling &&
+            allowed_motion_modes & (1 << WARPED_CAUSAL)) {
+          rd_stats->rate +=
+              mode_costs
+                  ->warped_causal_cost[bsize][motion_mode == WARPED_CAUSAL];
+          if (motion_mode == WARPED_CAUSAL) {
+            continue_motion_mode_signaling = false;
+          }
+        }
+
+        if (continue_motion_mode_signaling &&
+            allowed_motion_modes & (1 << WARP_DELTA)) {
+          rd_stats->rate +=
+              mode_costs->warp_delta_cost[bsize][motion_mode == WARP_DELTA];
+        }
+
+#if CONFIG_WARPMV
+        if (mbmi->mode == WARPMV) {
+          if (allowed_motion_modes & (1 << WARPED_CAUSAL)) {
+            rd_stats->rate +=
+                mode_costs->warped_causal_warpmv_cost[bsize][motion_mode !=
+                                                             WARP_DELTA];
+
+          } else {
+            assert(motion_mode == WARP_DELTA);
+          }
+#if CONFIG_CWG_D067_IMPROVED_WARP
+          if (allow_warpmv_with_mvd_coding(cm, mbmi)) {
+            rd_stats->rate +=
+                mode_costs
+                    ->warpmv_with_mvd_flag_cost[bsize]
+                                               [mbmi->warpmv_with_mvd_flag];
+          }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+        }
 #endif  // CONFIG_WARPMV
 
-      if (motion_mode == WARP_DELTA
+        if (motion_mode == WARP_DELTA
 #if CONFIG_WARPMV
-          || (motion_mode == WARPED_CAUSAL && mbmi->mode == WARPMV)
+            || ((motion_mode == WARPED_CAUSAL) && mbmi->mode == WARPMV)
 #endif  // CONFIG_WARPMV
-      ) {
+        ) {
 #if CONFIG_WARP_REF_LIST
-        rd_stats->rate += get_warp_ref_idx_cost(mbmi, x);
+          rd_stats->rate += get_warp_ref_idx_cost(mbmi, x);
 #endif  // CONFIG_WARP_REF_LIST
 
-        rd_stats->rate +=
-            av1_cost_warp_delta(cm, xd, mbmi, mbmi_ext, mode_costs);
-        // The following line is commented out to remove a spurious
-        // static analysis warning. Uncomment when adding a new motion mode
-        // continue_motion_mode_signaling = false;
-      }
+          rd_stats->rate +=
+              av1_cost_warp_delta(cm, xd, mbmi, mbmi_ext, mode_costs);
+          // The following line is commented out to remove a spurious
+          // static analysis warning. Uncomment when adding a new motion
+          // mode continue_motion_mode_signaling = false;
+        }
 #else
     if (interintra_allowed) {
       rd_stats->rate +=
@@ -2390,45 +2696,57 @@
     }
 #endif  // CONFIG_EXTENDED_WARP_PREDICTION
 
-      if (!do_tx_search) {
-        // Avoid doing a transform search here to speed up the overall mode
-        // search. It will be done later in the mode search if the current
-        // motion mode seems promising.
-        int64_t curr_sse = -1;
-        int64_t sse_y = -1;
-        int est_residue_cost = 0;
-        int64_t est_dist = 0;
-        int64_t est_rd = 0;
-        if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
-          curr_sse = get_sse(cpi, x, &sse_y);
-          const int has_est_rd = get_est_rate_dist(
-              tile_data, bsize, curr_sse, &est_residue_cost, &est_dist);
-          (void)has_est_rd;
-          assert(has_est_rd);
-        } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2) {
-          model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
-              cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost,
-              &est_dist, NULL, &curr_sse, NULL, NULL, NULL);
-          sse_y = x->pred_sse[COMPACT_INDEX0_NRS(xd->mi[0]->ref_frame[0])];
-        }
-        est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
-        if (est_rd * 0.80 > *best_est_rd) {
-          mbmi->ref_frame[1] = ref_frame_1;
-          continue;
-        }
-        const int mode_rate = rd_stats->rate;
-        rd_stats->rate += est_residue_cost;
-        rd_stats->dist = est_dist;
-        rd_stats->rdcost = est_rd;
-        if (rd_stats->rdcost < *best_est_rd) {
-          *best_est_rd = rd_stats->rdcost;
-          assert(sse_y >= 0);
-          ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
-                               ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
-                               : INT64_MAX;
-        }
-        if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
-          if (!is_comp_pred) {
+        if (!do_tx_search) {
+          // Avoid doing a transform search here to speed up the overall
+          // mode search. It will be done later in the mode search if the
+          // current motion mode seems promising.
+          int64_t curr_sse = -1;
+          int64_t sse_y = -1;
+          int est_residue_cost = 0;
+          int64_t est_dist = 0;
+          int64_t est_rd = 0;
+          if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
+            curr_sse = get_sse(cpi, x, &sse_y);
+            const int has_est_rd = get_est_rate_dist(
+                tile_data, bsize, curr_sse, &est_residue_cost, &est_dist);
+            (void)has_est_rd;
+            assert(has_est_rd);
+          } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2) {
+            model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
+                cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost,
+                &est_dist, NULL, &curr_sse, NULL, NULL, NULL);
+            sse_y = x->pred_sse[COMPACT_INDEX0_NRS(xd->mi[0]->ref_frame[0])];
+          }
+          est_rd =
+              RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
+          if (est_rd * 0.80 > *best_est_rd) {
+            mbmi->ref_frame[1] = ref_frame_1;
+            continue;
+          }
+          const int mode_rate = rd_stats->rate;
+          rd_stats->rate += est_residue_cost;
+          rd_stats->dist = est_dist;
+          rd_stats->rdcost = est_rd;
+          if (rd_stats->rdcost < *best_est_rd) {
+            *best_est_rd = rd_stats->rdcost;
+            assert(sse_y >= 0);
+            ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
+                                 ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
+                                 : INT64_MAX;
+          }
+          if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
+            if (!is_comp_pred) {
+              assert(curr_sse >= 0);
+              inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
+                                    rd_stats->rdcost, rd_stats, rd_stats_y,
+                                    rd_stats_uv, mbmi
+#if CONFIG_C071_SUBBLK_WARPMV
+                                    ,
+                                    xd, cm
+#endif  // CONFIG_C071_SUBBLK_WARPMV
+              );
+            }
+          } else {
             assert(curr_sse >= 0);
             inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
                                   rd_stats->rdcost, rd_stats, rd_stats_y,
@@ -2439,95 +2757,95 @@
 #endif  // CONFIG_C071_SUBBLK_WARPMV
             );
           }
+          mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0;
         } else {
-          assert(curr_sse >= 0);
-          inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
-                                rd_stats->rdcost, rd_stats, rd_stats_y,
-                                rd_stats_uv, mbmi
-#if CONFIG_C071_SUBBLK_WARPMV
-                                ,
-                                xd, cm
-#endif  // CONFIG_C071_SUBBLK_WARPMV
-          );
-        }
-        mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0;
-      } else {
-        // Perform full transform search
-        int64_t skip_rd = INT64_MAX;
-        int64_t skip_rdy = INT64_MAX;
-        if (cpi->sf.inter_sf.txfm_rd_gate_level) {
-          // Check if the mode is good enough based on skip RD
-          int64_t sse_y = INT64_MAX;
-          int64_t curr_sse = get_sse(cpi, x, &sse_y);
-          skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
-          skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
-          int eval_txfm =
-              check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
-                              cpi->sf.inter_sf.txfm_rd_gate_level, 0);
-          if (!eval_txfm) continue;
-        }
-
-        // Do transform search
-        if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
-                             rd_stats->rate, ref_best_rd)) {
-          if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
-            return INT64_MAX;
+          // Perform full transform search
+          int64_t skip_rd = INT64_MAX;
+          int64_t skip_rdy = INT64_MAX;
+          if (cpi->sf.inter_sf.txfm_rd_gate_level) {
+            // Check if the mode is good enough based on skip RD
+            int64_t sse_y = INT64_MAX;
+            int64_t curr_sse = get_sse(cpi, x, &sse_y);
+            skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
+            skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
+            int eval_txfm =
+                check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
+                                cpi->sf.inter_sf.txfm_rd_gate_level, 0);
+            if (!eval_txfm) continue;
           }
-          continue;
+
+          // Do transform search
+          if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
+                               rd_stats->rate, ref_best_rd)) {
+            if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
+              return INT64_MAX;
+            }
+            continue;
+          }
+          const int64_t curr_rd =
+              RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
+
+          if (curr_rd < ref_best_rd) {
+            ref_best_rd = curr_rd;
+            ref_skip_rd[0] = skip_rd;
+            ref_skip_rd[1] = skip_rdy;
+          }
+          if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
+            const int skip_ctx = av1_get_skip_txfm_context(xd);
+            inter_mode_data_push(
+                tile_data, mbmi->sb_type[PLANE_TYPE_Y], rd_stats->sse,
+                rd_stats->dist,
+                rd_stats_y->rate + rd_stats_uv->rate +
+                    mode_costs->skip_txfm_cost[skip_ctx]
+                                              [mbmi->skip_txfm[xd->tree_type ==
+                                                               CHROMA_PART]]);
+          }
         }
-        const int64_t curr_rd =
+
+        if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
+          if (is_nontrans_global_motion(xd, xd->mi[0])) {
+            mbmi->interp_fltr = av1_unswitchable_filter(interp_filter);
+          }
+        }
+
+        const int64_t tmp_rd =
             RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-        if (curr_rd < ref_best_rd) {
-          ref_best_rd = curr_rd;
-          ref_skip_rd[0] = skip_rd;
-          ref_skip_rd[1] = skip_rdy;
+        if (num_rd_check == 0) {
+#if CONFIG_SEP_COMP_DRL
+          args->simple_rd[this_mode][get_ref_mv_idx(mbmi, 0)]
+#else
+      args->simple_rd[this_mode][mbmi->ref_mv_idx]
+#endif
+                         [COMPACT_INDEX0_NRS(mbmi->ref_frame[0])] = tmp_rd;
         }
-        if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
-          const int skip_ctx = av1_get_skip_txfm_context(xd);
-          inter_mode_data_push(
-              tile_data, mbmi->sb_type[PLANE_TYPE_Y], rd_stats->sse,
-              rd_stats->dist,
-              rd_stats_y->rate + rd_stats_uv->rate +
-                  mode_costs->skip_txfm_cost[skip_ctx]
-                                            [mbmi->skip_txfm[xd->tree_type ==
-                                                             CHROMA_PART]]);
-        }
-      }
 
-      if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
-        if (is_nontrans_global_motion(xd, xd->mi[0])) {
-          mbmi->interp_fltr = av1_unswitchable_filter(interp_filter);
-        }
-      }
-
-      const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
-      if (mode_index == 0) {
-        args->simple_rd[this_mode][mbmi->ref_mv_idx]
-                       [COMPACT_INDEX0_NRS(mbmi->ref_frame[0])] = tmp_rd;
-      }
-      if (mode_index == 0 || tmp_rd < best_rd) {
-        // Update best_rd data if this is the best motion mode so far
-        best_mbmi = *mbmi;
+        if (num_rd_check == 0 || tmp_rd < best_rd) {
+          // Update best_rd data if this is the best motion mode so far
+          best_mbmi = *mbmi;
 #if CONFIG_C071_SUBBLK_WARPMV
-        if (is_warp_mode(mbmi->motion_mode)) {
-          store_submi(xd, cm, best_submi, bsize);
-        }
+          if (is_warp_mode(mbmi->motion_mode)) {
+            store_submi(xd, cm, best_submi, bsize);
+          }
 #endif  // CONFIG_C071_SUBBLK_WARPMV
-        best_rd = tmp_rd;
-        best_rd_stats = *rd_stats;
-        best_rd_stats_y = *rd_stats_y;
-        best_rate_mv = tmp_rate_mv;
-        if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
-        memcpy(best_blk_skip, txfm_info->blk_skip,
-               sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
-        av1_copy_array(best_tx_type_map, xd->tx_type_map,
-                       xd->height * xd->width);
+          best_rd = tmp_rd;
+          best_rd_stats = *rd_stats;
+          best_rd_stats_y = *rd_stats_y;
+          best_rate_mv = tmp_rate_mv;
+          if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
+          memcpy(best_blk_skip, txfm_info->blk_skip,
+                 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
+          av1_copy_array(best_tx_type_map, xd->tx_type_map,
+                         xd->height * xd->width);
 #if CONFIG_CROSS_CHROMA_TX
-        av1_copy_array(best_cctx_type_map, xd->cctx_type_map,
-                       xd->height * xd->width);
+          av1_copy_array(best_cctx_type_map, xd->cctx_type_map,
+                         xd->height * xd->width);
 #endif  // CONFIG_CROSS_CHROMA_TX
-        best_xskip_txfm = mbmi->skip_txfm[xd->tree_type == CHROMA_PART];
+          best_xskip_txfm = mbmi->skip_txfm[xd->tree_type == CHROMA_PART];
+        }
+        num_rd_check++;
+#if CONFIG_CWG_D067_IMPROVED_WARP
       }
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #if CONFIG_WARP_REF_LIST
     }
 #endif  // CONFIG_WARP_REF_LIST
@@ -2605,8 +2923,17 @@
 static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
                                       int ref_idx,
                                       const MV_REFERENCE_FRAME *ref_frame,
+#if CONFIG_SEP_COMP_DRL
+                                      PREDICTION_MODE this_mode,
+#endif
                                       PREDICTION_MODE single_mode) {
+#if CONFIG_SEP_COMP_DRL
+  const int8_t ref_frame_type = has_second_drl_by_mode(this_mode, ref_frame)
+                                    ? ref_frame[ref_idx]
+                                    : av1_ref_frame_type(ref_frame);
+#else
   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
+#endif
 #if CONFIG_TIP
   if (is_tip_ref_frame(ref_frame_type)) return 0;
 #endif  // CONFIG_TIP
@@ -2632,7 +2959,11 @@
   for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
     int_mv this_mv;
 
+#if CONFIG_SEP_COMP_DRL
+    if (ref_idx == 0 || has_second_drl_by_mode(this_mode, ref_frame))
+#else
     if (ref_idx == 0)
+#endif
       this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
     else
       this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
@@ -2654,7 +2985,12 @@
     this_mv->as_int = INVALID_MV;
   } else if (single_mode == GLOBALMV) {
     if (skip_repeated_ref_mv &&
+#if CONFIG_SEP_COMP_DRL
+        check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, this_mode,
+                            single_mode))
+#else
         check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
+#endif
       return 0;
     *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
   }
@@ -2668,7 +3004,13 @@
   else {
     assert(single_mode == NEARMV);
     const int ref_mv_offset = ref_mv_idx;
+#if CONFIG_SEP_COMP_DRL
+    const int8_t ref_frame_type = has_second_drl_by_mode(this_mode, ref_frame)
+                                      ? ref_frame[ref_idx]
+                                      : av1_ref_frame_type(ref_frame);
+#else
     const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
+#endif
     if (ref_frame_type > NONE_FRAME &&
         ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
       assert(ref_mv_offset >= 0);
@@ -2677,11 +3019,21 @@
             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
       } else {
         *this_mv =
-            mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
+#if CONFIG_SEP_COMP_DRL
+            has_second_drl_by_mode(this_mode, ref_frame)
+                ? mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv
+                :
+#endif
+                mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
       }
     } else {
       if (skip_repeated_ref_mv &&
+#if CONFIG_SEP_COMP_DRL
+          check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, this_mode,
+                              single_mode))
+#else
           check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
+#endif
         return 0;
 #if CONFIG_TIP
       if (is_tip_ref_frame(ref_frame_type)) {
@@ -2705,14 +3057,24 @@
   const MB_MODE_INFO *mbmi = xd->mi[0];
   const int is_comp_pred = has_second_ref(mbmi);
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   if (mbmi->skip_mode) {
     int ret = 1;
+#if CONFIG_SEP_COMP_DRL
+    assert(get_ref_mv_idx(mbmi, 0) < xd->skip_mvp_candidate_list.ref_mv_count);
+    assert(get_ref_mv_idx(mbmi, 1) == get_ref_mv_idx(mbmi, 0));
+#else
     assert(mbmi->ref_mv_idx < xd->skip_mvp_candidate_list.ref_mv_count);
+#endif
     int_mv this_mv;
     this_mv.as_int = INVALID_MV;
     this_mv =
+#if CONFIG_SEP_COMP_DRL
+        xd->skip_mvp_candidate_list.ref_mv_stack[get_ref_mv_idx(mbmi, 0)]
+            .this_mv;
+#else
         xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].this_mv;
+#endif
 
     cur_mv[0] = this_mv;
 #if !CONFIG_C071_SUBBLK_WARPMV
@@ -2726,7 +3088,12 @@
     ret &= clamp_and_check_mv(cur_mv, this_mv, cm, x);
 
     this_mv =
+#if CONFIG_SEP_COMP_DRL
+        xd->skip_mvp_candidate_list.ref_mv_stack[get_ref_mv_idx(mbmi, 1)]
+            .comp_mv;
+#else
         xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].comp_mv;
+#endif
     cur_mv[1] = this_mv;
 #if !CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_FLEX_MVRES
@@ -2740,23 +3107,39 @@
 
     return ret;
   }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
   int ret = 1;
   for (int i = 0; i < is_comp_pred + 1; ++i) {
     int_mv this_mv;
     this_mv.as_int = INVALID_MV;
+#if CONFIG_SEP_COMP_DRL
+    int ref_mv_idx = get_ref_mv_idx(mbmi, i);
+    ret = get_this_mv(&this_mv, this_mode, i, ref_mv_idx,
+#else
     ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
+#endif
                       skip_repeated_ref_mv, mbmi->ref_frame, x->mbmi_ext);
     if (!ret) return 0;
     const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
     if (single_mode == NEWMV) {
       const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
+#if CONFIG_SEP_COMP_DRL
+      if (has_second_drl(mbmi))
+        cur_mv[i] =
+            x->mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]][ref_mv_idx].this_mv;
+      else
+        cur_mv[i] =
+            (i == 0)
+                ? x->mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv
+                : x->mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
+#else
       cur_mv[i] =
           (i == 0) ? x->mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
                          .this_mv
                    : x->mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
                          .comp_mv;
+#endif
     } else {
       ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
     }
@@ -2774,13 +3157,38 @@
 #if IMPROVED_AMVD
   if (mbmi->mode == AMVDNEWMV) max_drl_bits = AOMMIN(max_drl_bits, 1);
 #endif  // IMPROVED_AMVD
+#if CONFIG_SEP_COMP_DRL
+  assert(get_ref_mv_idx(mbmi, 0) < max_drl_bits + 1);
+  assert(get_ref_mv_idx(mbmi, 1) < max_drl_bits + 1);
+#else
   assert(mbmi->ref_mv_idx < max_drl_bits + 1);
+#endif
   if (!have_drl_index(mbmi->mode)) {
     return 0;
   }
   int16_t mode_ctx_pristine =
       av1_mode_context_pristine(mbmi_ext->mode_context, mbmi->ref_frame);
   int cost = 0;
+#if CONFIG_SEP_COMP_DRL
+  for (int ref_idx = 0; ref_idx < 1 + has_second_drl(mbmi); ref_idx++) {
+    for (int idx = 0; idx < max_drl_bits; ++idx) {
+      int drl_ctx = av1_drl_ctx(mode_ctx_pristine);
+      int ref_mv_idx = get_ref_mv_idx(mbmi, ref_idx);
+      switch (idx) {
+        case 0:
+          cost += x->mode_costs.drl_mode_cost[0][drl_ctx][ref_mv_idx != idx];
+          break;
+        case 1:
+          cost += x->mode_costs.drl_mode_cost[1][drl_ctx][ref_mv_idx != idx];
+          break;
+        default:
+          cost += x->mode_costs.drl_mode_cost[2][drl_ctx][ref_mv_idx != idx];
+          break;
+      }
+      if (ref_mv_idx == idx) break;
+    }
+  }
+#else
   for (int idx = 0; idx < max_drl_bits; ++idx) {
     int drl_ctx = av1_drl_ctx(mode_ctx_pristine);
     switch (idx) {
@@ -2799,18 +3207,40 @@
     }
     if (mbmi->ref_mv_idx == idx) return cost;
   }
+#endif
   return cost;
 }
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
 static INLINE int get_skip_drl_cost(int max_drl_bits, const MB_MODE_INFO *mbmi,
                                     const MACROBLOCK *x) {
+#if CONFIG_SEP_COMP_DRL
+  assert(get_ref_mv_idx(mbmi, 0) < max_drl_bits + 1);
+#else
   assert(mbmi->ref_mv_idx < max_drl_bits + 1);
+#endif
   assert(mbmi->skip_mode);
   if (!have_drl_index(mbmi->mode)) {
     return 0;
   }
   int cost = 0;
+#if CONFIG_SEP_COMP_DRL
+  int ref_mv_idx = get_ref_mv_idx(mbmi, 0);
+  for (int idx = 0; idx < max_drl_bits; ++idx) {
+    switch (idx) {
+      case 0:
+        cost += x->mode_costs.skip_drl_mode_cost[0][ref_mv_idx != idx];
+        break;
+      case 1:
+        cost += x->mode_costs.skip_drl_mode_cost[1][ref_mv_idx != idx];
+        break;
+      default:
+        cost += x->mode_costs.skip_drl_mode_cost[2][ref_mv_idx != idx];
+        break;
+    }
+    if (ref_mv_idx == idx) return cost;
+  }
+#else
   for (int idx = 0; idx < max_drl_bits; ++idx) {
     switch (idx) {
       case 0:
@@ -2825,9 +3255,10 @@
     }
     if (mbmi->ref_mv_idx == idx) return cost;
   }
+#endif
   return cost;
 }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
 static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
                                         const MB_MODE_INFO *const mbmi,
@@ -2837,8 +3268,13 @@
     const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
     if (single_mode == NEWMV &&
 #if CONFIG_FLEX_MVRES
+#if CONFIG_SEP_COMP_DRL
+        args->single_newmv_valid[mbmi->pb_mv_precision]
+                                [get_ref_mv_idx(mbmi, ref_idx)][ref] == 0) {
+#else
         args->single_newmv_valid[mbmi->pb_mv_precision][mbmi->ref_mv_idx]
                                 [ref] == 0) {
+#endif
 #else
         args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
 #endif
@@ -2850,7 +3286,12 @@
 
 static int get_drl_refmv_count(int max_drl_bits, const MACROBLOCK *const x,
                                const MV_REFERENCE_FRAME *ref_frame,
-                               PREDICTION_MODE mode) {
+                               PREDICTION_MODE mode
+#if CONFIG_SEP_COMP_DRL
+                               ,
+                               int ref_idx
+#endif
+) {
   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   int has_drl = have_drl_index(mode);
   if (!has_drl) {
@@ -2861,17 +3302,25 @@
 #endif  // CONFIG_WARPMV
     return 1;
   }
+#if CONFIG_SEP_COMP_DRL
+  MB_MODE_INFO *mbmi = x->e_mbd.mi[0];
+  if (has_second_drl(mbmi)) {
+    return AOMMIN(max_drl_bits + 1, mbmi_ext->ref_mv_count[ref_frame[ref_idx]]);
+  }
+#endif
+
   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
+
   int ref_mv_count =
       ref_frame_type > NONE_FRAME ? mbmi_ext->ref_mv_count[ref_frame_type] : 0;
 #if IMPROVED_AMVD
   if (mode == AMVDNEWMV) ref_mv_count = AOMMIN(ref_mv_count, 2);
 #endif  // IMPROVED_AMVD
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   if (x->e_mbd.mi[0]->skip_mode)
     ref_mv_count = mbmi_ext->skip_mvp_candidate_list.ref_mv_count;
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
   return AOMMIN(max_drl_bits + 1, ref_mv_count);
 }
@@ -2882,7 +3331,11 @@
     const AV1_COMP *const cpi,
     const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
     const HandleInterModeArgs *const args, int64_t ref_best_rd,
+#if CONFIG_SEP_COMP_DRL
+    int *ref_mv_idx) {
+#else
     int ref_mv_idx) {
+#endif
   (void)ref_frame_dist_info;
   const AV1_COMMON *const cm = &cpi->common;
   const SPEED_FEATURES *const sf = &cpi->sf;
@@ -2891,7 +3344,12 @@
   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
   const int is_comp_pred = has_second_ref(mbmi);
+#if CONFIG_SEP_COMP_DRL
+  if (sf->inter_sf.reduce_inter_modes &&
+      (ref_mv_idx[0] > 0 || ref_mv_idx[1] > 0)) {
+#else
   if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
+#endif
     // NOTE: This section changes the stats.
     int ranks[2][2], dir[2] = { -1, -1 };
     if (mbmi->ref_frame[0] != INTRA_FRAME)
@@ -2900,9 +3358,21 @@
       dir[1] = get_dir_rank(cm, mbmi->ref_frame[1], ranks[1]);
     if ((dir[0] != -1 && ranks[0][dir[0]] > 3) ||
         (dir[1] != -1 && ranks[1][dir[1]] > 2)) {
+#if CONFIG_SEP_COMP_DRL  //????????? to be updated
+      if (has_second_drl(mbmi)) {
+        if (mbmi_ext->weight[mbmi->ref_frame[0]][ref_mv_idx[0]] <
+                REF_CAT_LEVEL &&
+            mbmi_ext->weight[mbmi->ref_frame[1]][ref_mv_idx[1]] < REF_CAT_LEVEL)
+          return true;
+      } else {
+        if (mbmi_ext->weight[ref_frame_type][ref_mv_idx[0]] < REF_CAT_LEVEL)
+          return true;
+      }
+#else
       if (mbmi_ext->weight[ref_frame_type][ref_mv_idx] < REF_CAT_LEVEL) {
         return true;
       }
+#endif
     }
     // TODO(any): Experiment with reduce_inter_modes for compound prediction
     if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
@@ -2911,14 +3381,32 @@
            mbmi->ref_frame[0] != cm->ref_frames_info.future_refs[0]) &&
           (cm->ref_frames_info.num_past_refs == 0 ||
            mbmi->ref_frame[0] != cm->ref_frames_info.past_refs[0])) {
+#if CONFIG_SEP_COMP_DRL  //????????? to be updated
+        if (has_second_drl(mbmi)) {
+          if (mbmi_ext->weight[mbmi->ref_frame[0]][ref_mv_idx[0]] <
+                  REF_CAT_LEVEL &&
+              mbmi_ext->weight[mbmi->ref_frame[1]][ref_mv_idx[1]] <
+                  REF_CAT_LEVEL)
+            return true;
+        } else {
+          if (mbmi_ext->weight[ref_frame_type][ref_mv_idx[0]] < REF_CAT_LEVEL)
+            return true;
+        }
+#else
         if (mbmi_ext->weight[ref_frame_type][ref_mv_idx] < REF_CAT_LEVEL) {
           return true;
         }
+#endif
       }
     }
   }
 
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = ref_mv_idx[0];
+  mbmi->ref_mv_idx[1] = ref_mv_idx[1];
+#else
   mbmi->ref_mv_idx = ref_mv_idx;
+#endif
   if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
     return true;
   }
@@ -2936,7 +3424,11 @@
 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
                                           RD_STATS *rd_stats,
                                           HandleInterModeArgs *args,
+#if CONFIG_SEP_COMP_DRL
+                                          int *ref_mv_idx,
+#else
                                           int ref_mv_idx,
+#endif
                                           inter_mode_info *mode_info,
                                           int64_t ref_best_rd, BLOCK_SIZE bsize
 #if CONFIG_FLEX_MVRES
@@ -2968,16 +3460,27 @@
 
   mbmi->num_proj_ref = 0;
   mbmi->motion_mode = SIMPLE_TRANSLATION;
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = ref_mv_idx[0];
+  mbmi->ref_mv_idx[1] = ref_mv_idx[1];
+  int ref_mv_idx_type = av1_ref_mv_idx_type(mbmi, ref_mv_idx);
+#else
   mbmi->ref_mv_idx = ref_mv_idx;
+#endif
   rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
 #if CONFIG_FLEX_MVRES
   rd_stats->rate += flex_mv_cost;
 #endif
+
   const int drl_cost =
       get_drl_cost(cpi->common.features.max_drl_bits, mbmi, mbmi_ext, x);
 
   rd_stats->rate += drl_cost;
+#if CONFIG_SEP_COMP_DRL
+  mode_info[ref_mv_idx_type].drl_cost = drl_cost;
+#else
   mode_info[ref_mv_idx].drl_cost = drl_cost;
+#endif
 
   int_mv cur_mv[2];
   if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
@@ -3018,6 +3521,9 @@
   }
 
   mbmi->motion_mode = SIMPLE_TRANSLATION;
+#if CONFIG_CWP
+  mbmi->cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
   mbmi->num_proj_ref = 0;
   if (is_comp_pred) {
     // Only compound_average
@@ -3032,6 +3538,7 @@
 
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
+
   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
                                 AOM_PLANE_Y, AOM_PLANE_Y);
   int est_rate;
@@ -3060,36 +3567,74 @@
 
   if (is_pb_mv_precision_active(cm, mbmi, bsize) &&
       (mbmi->pb_mv_precision < mbmi->max_mv_precision) &&
+#if CONFIG_SEP_COMP_DRL
+      (mbmi->ref_mv_idx[0] > 0 || mbmi->ref_mv_idx[1] > 0)) {
+#else
       mbmi->ref_mv_idx > 0) {
+#endif
     const int is_comp_pred = has_second_ref(mbmi);
     const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
     int_mv this_refmv[2];
     this_refmv[0].as_int = 0;
     this_refmv[1].as_int = 0;
     for (int i = 0; i < is_comp_pred + 1; ++i) {
+#if CONFIG_SEP_COMP_DRL
+      if (has_second_drl(mbmi))
+        this_refmv[i] =
+            mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]][mbmi->ref_mv_idx[i]]
+                .this_mv;
+      else
+        this_refmv[i] =
+            (i == 0)
+                ? mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx[0]]
+                      .this_mv
+                : mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx[0]]
+                      .comp_mv;
+#else
       this_refmv[i] =
           (i == 0)
               ? mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].this_mv
               : mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
                     .comp_mv;
+#endif
 #if CONFIG_C071_SUBBLK_WARPMV
       if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL)
 #endif  // CONFIG_C071_SUBBLK_WARPMV
         lower_mv_precision(&this_refmv[i].as_mv, mbmi->pb_mv_precision);
     }
 
+#if CONFIG_SEP_COMP_DRL
+    const uint8_t ref_mv_idx_type = av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx);
+    for (int prev_ref_mv_idx = 0; prev_ref_mv_idx < ref_mv_idx_type;
+         prev_ref_mv_idx++) {
+#else
     for (int prev_ref_mv_idx = 0; prev_ref_mv_idx < mbmi->ref_mv_idx;
          prev_ref_mv_idx++) {
+#endif
       int_mv prev_refmv[2];
       prev_refmv[0].as_int = INVALID_MV;
       prev_refmv[1].as_int = INVALID_MV;
 
       for (int i = 0; i < is_comp_pred + 1; ++i) {
+#if CONFIG_SEP_COMP_DRL
+        if (has_second_drl(mbmi)) {
+          int temp_idx[2];
+          av1_set_ref_mv_idx(temp_idx, prev_ref_mv_idx);
+          prev_refmv[i] =
+              mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]][temp_idx[i]].this_mv;
+        } else
+          prev_refmv[i] =
+              (i == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][prev_ref_mv_idx]
+                             .this_mv
+                       : mbmi_ext->ref_mv_stack[ref_frame_type][prev_ref_mv_idx]
+                             .comp_mv;
+#else
         prev_refmv[i] =
             (i == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][prev_ref_mv_idx]
                            .this_mv
                      : mbmi_ext->ref_mv_stack[ref_frame_type][prev_ref_mv_idx]
                            .comp_mv;
+#endif
 #if CONFIG_C071_SUBBLK_WARPMV
         if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL)
 #endif  // CONFIG_C071_SUBBLK_WARPMV
@@ -3115,7 +3660,12 @@
                                 RD_STATS *rd_stats,
                                 HandleInterModeArgs *const args,
                                 int64_t ref_best_rd, inter_mode_info *mode_info,
-                                BLOCK_SIZE bsize, const int ref_set
+                                BLOCK_SIZE bsize,
+#if CONFIG_SEP_COMP_DRL
+                                const int *ref_set
+#else
+                                const int ref_set
+#endif
 #if CONFIG_FLEX_MVRES
                                 ,
                                 const int flex_mv_cost
@@ -3133,22 +3683,36 @@
   // Only search indices if they have some chance of being good.
   int good_indices = 0;
 
+#if CONFIG_SEP_COMP_DRL
+  int ref_mv_idx[2];
+  for (ref_mv_idx[1] = 0; ref_mv_idx[1] < ref_set[1]; ++ref_mv_idx[1]) {
+    for (ref_mv_idx[0] = 0; ref_mv_idx[0] < ref_set[0]; ++ref_mv_idx[0]) {
+      int i = av1_ref_mv_idx_type(mbmi, ref_mv_idx);
+      if (ref_mv_idx_early_breakout(cpi, &cpi->ref_frame_dist_info, x, args,
+                                    ref_best_rd, ref_mv_idx)) {
+        continue;
+      }
+#else
   for (int i = 0; i < ref_set; ++i) {
     if (ref_mv_idx_early_breakout(cpi, &cpi->ref_frame_dist_info, x, args,
                                   ref_best_rd, i)) {
       continue;
     }
-    mask_set_bit(&good_indices, i);
+#endif
+      mask_set_bit(&good_indices, i);
+    }
+#if CONFIG_SEP_COMP_DRL
   }
+#endif
 
   // Always have at least one motion vector searched.
   if (!good_indices) {
     good_indices = 0x1;
   }
 
-  // Only prune in NEARMV mode, if the speed feature is set, and the block size
-  // is large enough. If these conditions are not met, return all good indices
-  // found so far.
+  // Only prune in NEARMV mode, if the speed feature is set, and the block
+  // size is large enough. If these conditions are not met, return all good
+  // indices found so far.
   if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
     return good_indices;
   if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
@@ -3162,6 +3726,29 @@
   }
 
   // Calculate the RD cost for the motion vectors using simple translation.
+#if CONFIG_SEP_COMP_DRL
+  int64_t idx_rdcost[MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH];
+  for (int i = 0; i < MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH; i++)
+    idx_rdcost[i] = INT64_MAX;
+
+  for (ref_mv_idx[1] = 0; ref_mv_idx[1] < ref_set[1]; ++ref_mv_idx[1]) {
+    for (ref_mv_idx[0] = 0; ref_mv_idx[0] < ref_set[0]; ++ref_mv_idx[0]) {
+      int i = av1_ref_mv_idx_type(mbmi, ref_mv_idx);
+
+      // If this index is bad, ignore it.
+      if (!mask_check_bit(good_indices, i)) {
+        continue;
+      }
+      idx_rdcost[i] = simple_translation_pred_rd(
+          cpi, x, rd_stats, args, ref_mv_idx, mode_info, ref_best_rd, bsize
+#if CONFIG_FLEX_MVRES
+          ,
+          flex_mv_cost
+#endif
+      );
+    }
+  }
+#else
   int64_t idx_rdcost[MAX_REF_MV_SEARCH];
   for (int i = 0; i < MAX_REF_MV_SEARCH; i++) idx_rdcost[i] = INT64_MAX;
   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
@@ -3177,6 +3764,7 @@
 #endif
     );
   }
+#endif
   // Find the index with the best RD cost.
   int best_idx = 0;
   // Find the 2nd best motion vector and search motion vectors within a
@@ -3203,13 +3791,22 @@
   // best RD, skip it. Note that the cutoff is derived experimentally.
   const double ref_dth = 5;
   int result = 0;
+#if CONFIG_SEP_COMP_DRL
+  for (ref_mv_idx[1] = 0; ref_mv_idx[1] < ref_set[1]; ++ref_mv_idx[1]) {
+    for (ref_mv_idx[0] = 0; ref_mv_idx[0] < ref_set[0]; ++ref_mv_idx[0]) {
+      int i = av1_ref_mv_idx_type(mbmi, ref_mv_idx);
+#else
   for (int i = 0; i < ref_set; ++i) {
-    if (mask_check_bit(good_indices, i) &&
-        (1.0 * idx_rdcost[i]) < idx_rdcost[best_idx] * dth &&
-        (1.0 * idx_rdcost[i]) < ref_best_rd * ref_dth) {
-      mask_set_bit(&result, i);
+#endif
+      if (mask_check_bit(good_indices, i) &&
+          (1.0 * idx_rdcost[i]) < idx_rdcost[best_idx] * dth &&
+          (1.0 * idx_rdcost[i]) < ref_best_rd * ref_dth) {
+        mask_set_bit(&result, i);
+      }
     }
+#if CONFIG_SEP_COMP_DRL
   }
+#endif
   return result;
 }
 
@@ -3408,8 +4005,8 @@
   // Thresholds used for pruning:
   // Lower value indicates aggressive pruning and higher value indicates
   // conservative pruning which is set based on ref_mv_idx and speed feature.
-  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
-  // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
+  // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2.
+  // prune_index 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
   static const int tpl_inter_mode_prune_mul_factor[2][MAX_REF_MV_SEARCH + 1] = {
     { 3, 3, 3, 2, 2, 2, 2, 2 }, { 3, 2, 2, 2, 2, 2, 2, 2 }
   };
@@ -3458,9 +4055,18 @@
     RD_STATS *best_rd_stats_uv, inter_mode_info *mode_info,
     HandleInterModeArgs *args, int drl_cost, const MV_REFERENCE_FRAME *refs,
     int_mv *cur_mv, int64_t *best_rd, const BUFFER_SET orig_dst,
+#if CONFIG_SEP_COMP_DRL
+    int ref_mv_idx[2]) {
+#else
     int ref_mv_idx) {
+#endif
   // This feature only works for NEWMV when a previous mv has been searched
+#if CONFIG_SEP_COMP_DRL
+  if (this_mode != NEWMV || (ref_mv_idx[0] == 0 && ref_mv_idx[1] == 0))
+    return 0;
+#else
   if (this_mode != NEWMV || ref_mv_idx == 0) return 0;
+#endif
   MACROBLOCKD *xd = &x->e_mbd;
   const AV1_COMMON *cm = &cpi->common;
   const int num_planes = av1_num_planes(cm);
@@ -3478,105 +4084,154 @@
   int skip = 0;
   int this_rate_mv = 0;
   int i;
+#if CONFIG_SEP_COMP_DRL
+  int ref_mv_idx_type = av1_ref_mv_idx_type(mbmi, ref_mv_idx);
+  int temp_mv_idx[2];
+  for (temp_mv_idx[1] = 0; temp_mv_idx[1] <= ref_mv_idx[1]; ++temp_mv_idx[1]) {
+    for (temp_mv_idx[0] = 0; temp_mv_idx[0] <= ref_mv_idx[0];
+         ++temp_mv_idx[0]) {
+      if (temp_mv_idx[0] == ref_mv_idx[0] && temp_mv_idx[1] == ref_mv_idx[1])
+        continue;
+      i = av1_ref_mv_idx_type(mbmi, temp_mv_idx);
+#else
   for (i = 0; i < ref_mv_idx; ++i) {
-    // Check if the motion search result same as previous results
+#endif
+      // Check if the motion search result same as previous results
 #if CONFIG_FLEX_MVRES
-    if (cur_mv[0].as_int ==
-            args->single_newmv[pb_mv_precision][i][refs[0]].as_int &&
-        args->single_newmv_valid[pb_mv_precision][i][refs[0]]) {
+#if CONFIG_SEP_COMP_DRL
+      if (cur_mv[0].as_int ==
+              args->single_newmv[pb_mv_precision][temp_mv_idx[0]][refs[0]]
+                  .as_int &&
+          args->single_newmv_valid[pb_mv_precision][temp_mv_idx[0]][refs[0]]) {
+#else
+      if (cur_mv[0].as_int ==
+              args->single_newmv[pb_mv_precision][i][refs[0]].as_int &&
+          args->single_newmv_valid[pb_mv_precision][i][refs[0]]) {
+#endif
 #else
     if (cur_mv[0].as_int == args->single_newmv[i][refs[0]].as_int &&
         args->single_newmv_valid[i][refs[0]]) {
 #endif
 
-      // If the compared mode has no valid rd, it is unlikely this
-      // mode will be the best mode
-      if (mode_info[i].rd == INT64_MAX) {
-        skip = 1;
-        break;
-      }
-      // Compare the cost difference including drl cost and mv cost
-      if (mode_info[i].mv.as_int != INVALID_MV) {
-        const int compare_cost = mode_info[i].rate_mv + mode_info[i].drl_cost;
-        const int_mv ref_mv = av1_get_ref_mv(x, 0);
+        // If the compared mode has no valid rd, it is unlikely this
+        // mode will be the best mode
+        if (mode_info[i].rd == INT64_MAX) {
+          skip = 1;
+          break;
+        }
+        // Compare the cost difference including drl cost and mv cost
+        if (mode_info[i].mv.as_int != INVALID_MV) {
+          const int compare_cost = mode_info[i].rate_mv + mode_info[i].drl_cost;
+          const int_mv ref_mv = av1_get_ref_mv(x, 0);
 #if CONFIG_FLEX_MVRES
-        // Check if this MV is within mv_limit
-        SubpelMvLimits mv_limits;
-        av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv,
-                                       pb_mv_precision);
-        if (!av1_is_subpelmv_in_range(&mv_limits, mode_info[i].mv.as_mv))
-          continue;
+          // Check if this MV is within mv_limit
+          SubpelMvLimits mv_limits;
+          av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits,
+                                         &ref_mv.as_mv, pb_mv_precision);
+          if (!av1_is_subpelmv_in_range(&mv_limits, mode_info[i].mv.as_mv))
+            continue;
 
-        this_rate_mv =
-            av1_mv_bit_cost(&mode_info[i].mv.as_mv, &ref_mv.as_mv,
-                            pb_mv_precision, &x->mv_costs, MV_COST_WEIGHT
+          this_rate_mv =
+              av1_mv_bit_cost(&mode_info[i].mv.as_mv, &ref_mv.as_mv,
+                              pb_mv_precision, &x->mv_costs, MV_COST_WEIGHT
 #if CONFIG_ADAPTIVE_MVD
-                            ,
-                            is_adaptive_mvd
+                              ,
+                              is_adaptive_mvd
 #endif
-            );
+              );
 #else
         this_rate_mv = av1_mv_bit_cost(
             &mode_info[i].mv.as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost,
             x->mv_costs.mv_cost_stack, MV_COST_WEIGHT);
 #endif
-        const int this_cost = this_rate_mv + drl_cost;
+          const int this_cost = this_rate_mv + drl_cost;
 
-        if (compare_cost <= this_cost) {
-          // Skip this mode if it is more expensive as the previous result
-          // for this MV
-          skip = 1;
-          break;
-        } else {
-          // If the cost is less than current best result, make this
-          // the best and update corresponding variables unless the
-          // best_mv is the same as ref_mv. In this case we skip and
-          // rely on NEAR(EST)MV instead
+          if (compare_cost <= this_cost) {
+            // Skip this mode if it is more expensive as the previous result
+            // for this MV
+            skip = 1;
+            break;
+          } else {
+            // If the cost is less than current best result, make this
+            // the best and update corresponding variables unless the
+            // best_mv is the same as ref_mv. In this case we skip and
+            // rely on NEAR(EST)MV instead
+#if CONFIG_SEP_COMP_DRL
+            if (av1_ref_mv_idx_type(best_mbmi, best_mbmi->ref_mv_idx) == i &&
+#else
           if (best_mbmi->ref_mv_idx == i &&
-              best_mbmi->mv[0].as_int != ref_mv.as_int
+#endif
+                best_mbmi->mv[0].as_int != ref_mv.as_int
 #if CONFIG_FLEX_MVRES
-              && best_mbmi->pb_mv_precision == pb_mv_precision
+                && best_mbmi->pb_mv_precision == pb_mv_precision
 #endif
 #if CONFIG_BAWP
-              && best_mbmi->bawp_flag == bawp_flag
+                && best_mbmi->bawp_flag == bawp_flag
 #endif
-          ) {
-            assert(*best_rd != INT64_MAX);
-            assert(best_mbmi->mv[0].as_int == mode_info[i].mv.as_int);
+            ) {
+              assert(*best_rd != INT64_MAX);
+              assert(best_mbmi->mv[0].as_int == mode_info[i].mv.as_int);
+#if CONFIG_SEP_COMP_DRL
+              best_mbmi->ref_mv_idx[0] = ref_mv_idx[0];
+              best_mbmi->ref_mv_idx[1] = ref_mv_idx[1];
+#else
             best_mbmi->ref_mv_idx = ref_mv_idx;
-            motion_mode_cand->rate_mv = this_rate_mv;
-            best_rd_stats->rate += this_cost - compare_cost;
-            *best_rd =
-                RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist);
-            // We also need to update mode_info here because we are setting
-            // (ref_)best_rd here. So we will not be able to search the same
-            // mode again with the current configuration.
+#endif
+              motion_mode_cand->rate_mv = this_rate_mv;
+              best_rd_stats->rate += this_cost - compare_cost;
+              *best_rd =
+                  RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist);
+              // We also need to update mode_info here because we are setting
+              // (ref_)best_rd here. So we will not be able to search the same
+              // mode again with the current configuration.
+#if CONFIG_SEP_COMP_DRL
+              mode_info[ref_mv_idx_type].mv.as_int = best_mbmi->mv[0].as_int;
+              mode_info[ref_mv_idx_type].rate_mv = this_rate_mv;
+              mode_info[ref_mv_idx_type].rd = *best_rd;
+#else
             mode_info[ref_mv_idx].mv.as_int = best_mbmi->mv[0].as_int;
             mode_info[ref_mv_idx].rate_mv = this_rate_mv;
             mode_info[ref_mv_idx].rd = *best_rd;
-            if (*best_rd < *ref_best_rd) *ref_best_rd = *best_rd;
-            break;
+#endif
+              if (*best_rd < *ref_best_rd) *ref_best_rd = *best_rd;
+              break;
+            }
           }
         }
       }
     }
+#if CONFIG_SEP_COMP_DRL
   }
+#endif
   if (skip) {
     // Collect mode stats for multiwinner mode processing
     store_winner_mode_stats(
         &cpi->common, x, best_mbmi, best_rd_stats, best_rd_stats_y,
         best_rd_stats_uv, refs, best_mbmi->mode, NULL, bsize, *best_rd,
         cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
+#if CONFIG_SEP_COMP_DRL
+    args->modelled_rd[this_mode][ref_mv_idx[0]][refs[0]] =
+        args->modelled_rd[this_mode][i][refs[0]];
+    args->simple_rd[this_mode][ref_mv_idx[0]][refs[0]] =
+        args->simple_rd[this_mode][i][refs[0]];
+    mode_info[ref_mv_idx_type].rd = mode_info[i].rd;
+    mode_info[ref_mv_idx_type].rate_mv = this_rate_mv;
+#else
     args->modelled_rd[this_mode][ref_mv_idx][refs[0]] =
         args->modelled_rd[this_mode][i][refs[0]];
     args->simple_rd[this_mode][ref_mv_idx][refs[0]] =
         args->simple_rd[this_mode][i][refs[0]];
     mode_info[ref_mv_idx].rd = mode_info[i].rd;
     mode_info[ref_mv_idx].rate_mv = this_rate_mv;
+#endif
 #if CONFIG_FLEX_MVRES
     int_mv temp_mv = mode_info[i].mv;
     clamp_mv_in_range(x, &temp_mv, 0, pb_mv_precision);
+#if CONFIG_SEP_COMP_DRL
+    mode_info[ref_mv_idx_type].mv.as_int = temp_mv.as_int;
+#else
     mode_info[ref_mv_idx].mv.as_int = temp_mv.as_int;
+#endif
 #else
     mode_info[ref_mv_idx].mv.as_int = mode_info[i].mv.as_int;
 #endif
@@ -3589,7 +4244,8 @@
 /*!\brief High level function to select parameters for compound mode.
  *
  * \ingroup inter_mode_search
- * The main search functionality is done in the call to av1_compound_type_rd().
+ * The main search functionality is done in the call to
+ av1_compound_type_rd().
  *
  * \param[in]     cpi               Top-level encoder structure.
  * \param[in]     x                 Pointer to struct holding all the data for
@@ -3611,27 +4267,38 @@
  *                                  search.
  * \param[in,out] orig_dst          A prediction buffer to hold a computed
  *                                  prediction. This will eventually hold the
- *                                  final prediction, and the tmp_dst info will
+ *                                  final prediction, and the tmp_dst info
+ will
  *                                  be copied here.
  * \param[in]     tmp_dst           A temporary prediction buffer to hold a
  *                                  computed prediction.
- * \param[in,out] rate_mv           The rate associated with the motion vectors.
- *                                  This will be modified if a motion search is
+ * \param[in,out] rate_mv           The rate associated with the motion
+ vectors.
+ *                                  This will be modified if a motion search
+ is
  *                                  done in the motion mode search.
  * \param[in,out] rd_stats          Struct to keep track of the overall RD
  *                                  information.
- * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
+ * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is
+ the
  *                                  best total RD for a skip mode so far, and
- *                                  skip_rd[1] is the best RD for a skip mode so
- *                                  far in luma. This is used as a speed feature
- *                                  to skip the transform search if the computed
+ *                                  skip_rd[1] is the best RD for a skip mode
+ so
+ *                                  far in luma. This is used as a speed
+ feature
+ *                                  to skip the transform search if the
+ computed
  *                                  skip RD for the current mode is not better
  *                                  than the best skip_rd so far.
- * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
- *                                  predictor. If this is 0, the inter predictor
- *                                  has already been built and thus we can avoid
+ * \param[in,out] skip_build_pred   Indicates whether or not to build the
+ inter
+ *                                  predictor. If this is 0, the inter
+ predictor
+ *                                  has already been built and thus we can
+ avoid
  *                                  repeating computation.
- * \return Returns 1 if this mode is worse than one already seen and 0 if it is
+ * \return Returns 1 if this mode is worse than one already seen and 0 if it
+ is
  * a viable candidate.
  */
 static int process_compound_inter_mode(
@@ -3643,11 +4310,22 @@
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = xd->mi[0];
   const AV1_COMMON *cm = &cpi->common;
-  const int masked_compound_used = is_any_masked_compound_used(bsize) &&
-                                   cm->seq_params.enable_masked_compound;
+  const int masked_compound_used =
+      is_any_masked_compound_used(bsize) &&
+      cm->seq_params.enable_masked_compound
+#if CONFIG_REFINEMV
+      && (!mbmi->refinemv_flag || !switchable_refinemv_flag(cm, mbmi))
+#endif  // CONFIG_REFINEMV
+      ;
   int mode_search_mask =
       (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
 
+#if CONFIG_CWP
+  if (get_cwp_idx(mbmi) != CWP_EQUAL) {
+    mode_search_mask = (1 << COMPOUND_AVERAGE);
+  }
+#endif  // CONFIG_CWP
+
   const int num_planes = av1_num_planes(cm);
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
@@ -3697,7 +4375,11 @@
 // Speed feature to prune out MVs that are similar to previous MVs if they
 // don't achieve the best RD advantage.
 static int prune_ref_mv_idx_search(const FeatureFlags *const features,
+#if CONFIG_SEP_COMP_DRL
+                                   int ref_mv_idx[2], int best_ref_mv_idx[2],
+#else
                                    int ref_mv_idx, int best_ref_mv_idx,
+#endif
                                    int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
                                    MB_MODE_INFO *mbmi, int pruning_factor) {
   (void)features;
@@ -3706,6 +4388,40 @@
   const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
 
   // Skip the evaluation if an MV match is found.
+#if CONFIG_SEP_COMP_DRL
+  if (ref_mv_idx[0] > 0 || ref_mv_idx[1] > 0) {
+    int idx[2];
+    for (idx[1] = 0; idx[1] <= ref_mv_idx[1]; ++idx[1]) {
+      for (idx[0] = 0; idx[0] <= ref_mv_idx[0]; ++idx[0]) {
+        if (idx[1] == ref_mv_idx[1] && idx[0] == ref_mv_idx[0]) continue;
+
+        int idx_type = av1_ref_mv_idx_type(mbmi, idx);
+
+        if (save_mv[idx_type][0].as_int == INVALID_MV) continue;
+
+        int mv_diff = 0;
+        for (i = 0; i < 1 + is_comp_pred; ++i) {
+          mv_diff +=
+              abs(save_mv[idx_type][i].as_mv.row - mbmi->mv[i].as_mv.row) +
+              abs(save_mv[idx_type][i].as_mv.col - mbmi->mv[i].as_mv.col);
+        }
+
+        // If this mode is not the best one, and current MV is similar to
+        // previous stored MV, terminate this ref_mv_idx evaluation.
+        if ((best_ref_mv_idx[0] == -1 || best_ref_mv_idx[1] == -1) &&
+            mv_diff <= thr)
+          return 1;
+      }
+    }
+  }
+
+  if (ref_mv_idx[0] < features->max_drl_bits &&
+      ref_mv_idx[1] < features->max_drl_bits) {
+    for (i = 0; i < is_comp_pred + 1; ++i)
+      save_mv[av1_ref_mv_idx_type(mbmi, ref_mv_idx)][i].as_int =
+          mbmi->mv[i].as_int;
+  }
+#else
   if (ref_mv_idx > 0) {
     for (int idx = 0; idx < ref_mv_idx; ++idx) {
       if (save_mv[idx][0].as_int == INVALID_MV) continue;
@@ -3726,16 +4442,120 @@
     for (i = 0; i < is_comp_pred + 1; ++i)
       save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
   }
-
+#endif
   return 0;
 }
 
+#if CONFIG_CWP
+// Calculate SSE when using compound weighted prediction
+uint64_t av1_cwp_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
+                                      const int8_t *m, int N) {
+  uint64_t csse = 0;
+  int i;
+
+  for (i = 0; i < N; i++) {
+    int32_t t = (1 << WEDGE_WEIGHT_BITS) * r1[i] + m[i] * d[i];
+    t = clamp(t, INT16_MIN, INT16_MAX);
+    csse += t * t;
+  }
+  return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS);
+}
+
+// Select a subset of cwp weighting factors
+static void set_cwp_search_mask(const AV1_COMP *const cpi, MACROBLOCK *const x,
+                                const BLOCK_SIZE bsize, uint16_t *const p0,
+                                uint16_t *const p1, int16_t *residual1,
+                                int16_t *diff10, int stride, int *mask) {
+  MACROBLOCKD *xd = &x->e_mbd;
+  const int bw = block_size_wide[bsize];
+  const int bh = block_size_high[bsize];
+  // get inter predictors to use for masked compound modes
+  av1_build_inter_predictor_single_buf_y(xd, bsize, 0, p0, stride);
+  av1_build_inter_predictor_single_buf_y(xd, bsize, 1, p1, stride);
+  const struct buf_2d *const src = &x->plane[0].src;
+
+  aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, p1,
+                            bw, xd->bd);
+  aom_highbd_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw, xd->bd);
+
+  MB_MODE_INFO *const mbmi = xd->mi[0];
+
+  const AV1_COMMON *const cm = &cpi->common;
+  const int same_side = is_ref_frame_same_side(cm, mbmi);
+
+  const int N = 1 << num_pels_log2_lookup[bsize];
+  int rate;
+  int64_t dist;
+  int cwp_index;
+  int64_t best_rd = INT64_MAX;
+  const int bd_round = (xd->bd - 8) * 2;
+
+  const int8_t *tmp_mask;
+  int rate_cwp_idx;
+
+  int idx_list[MAX_CWP_NUM];
+  int64_t cost_list[MAX_CWP_NUM];
+
+  for (int i = 0; i < MAX_CWP_NUM; i++) {
+    idx_list[i] = i;
+    cost_list[i] = INT64_MAX;
+  }
+
+  for (cwp_index = 0; cwp_index < MAX_CWP_NUM; cwp_index++) {
+    if (cwp_index == 0) continue;
+
+    tmp_mask = av1_get_cwp_mask(same_side, cwp_index);
+
+    // compute rd for mask
+    uint64_t sse = av1_cwp_sse_from_residuals_c(residual1, diff10, tmp_mask, N);
+    sse = ROUND_POWER_OF_TWO(sse, bd_round);
+
+    model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N,
+                                                  &rate, &dist);
+    int8_t cur_cwp = cwp_weighting_factor[same_side][cwp_index];
+    rate_cwp_idx = av1_get_cwp_idx_cost(cur_cwp, cm, x);
+    const int64_t rd0 = RDCOST(x->rdmult, rate + rate_cwp_idx, dist);
+    if (rd0 < best_rd) {
+      best_rd = rd0;
+    }
+
+    cost_list[cwp_index] = rd0;
+  }
+
+  // sort cwp in ascending order
+  for (int i = 0; i < MAX_CWP_NUM - 1; i++) {
+    for (int j = 0; j < (MAX_CWP_NUM - 1) - i; j++) {
+      if (cost_list[j] > cost_list[j + 1]) {
+        int64_t tmp_cost = cost_list[j];
+        cost_list[j] = cost_list[j + 1];
+        cost_list[j + 1] = tmp_cost;
+
+        int tmp_idx = idx_list[j];
+        idx_list[j] = idx_list[j + 1];
+        idx_list[j + 1] = tmp_idx;
+      }
+    }
+  }
+
+  int th = 2;
+  for (int i = 0; i < MAX_CWP_NUM; i++) {
+    if (i < th) {
+      mask[idx_list[i]] = 1;
+    } else {
+      mask[idx_list[i]] = 0;
+    }
+  }
+
+  return;
+}
+#endif  // CONFIG_CWP
+
 /*!\brief AV1 inter mode RD computation
  *
  * \ingroup inter_mode_search
- * Do the RD search for a given inter mode and compute all information relevant
- * to the input mode. It will compute the best MV,
- * compound parameters (if the mode is a compound mode) and interpolation filter
+ * Do the RD search for a given inter mode and compute all information
+ * relevant to the input mode. It will compute the best MV, compound
+ * parameters (if the mode is a compound mode) and interpolation filter
  * parameters.
  *
  * \param[in]     cpi               Top-level encoder structure.
@@ -3769,31 +4589,25 @@
  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
  *                                  a full transform search. This will compute
  *                                  an estimated RD for the modes without the
- *                                  transform search and later perform the full
- *                                  transform search on the best candidates.
- * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
- *                                  information to perform a full transform
- *                                  search only on winning candidates searched
- *                                  with an estimate for transform coding RD.
- * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
- *                                  motion mode information used in a speed
- *                                  feature to search motion modes other than
- *                                  SIMPLE_TRANSLATION only on winning
- *                                  candidates.
- * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
- *                                  best total RD for a skip mode so far, and
- *                                  skip_rd[1] is the best RD for a skip mode so
- *                                  far in luma. This is used as a speed feature
- *                                  to skip the transform search if the computed
- *                                  skip RD for the current mode is not better
- *                                  than the best skip_rd so far.
- * \param[in] best_ref_mode         Parameter to indicate the best mode so far.
- *                                  This is used as a speed feature to skip the
+ *                                  transform search and later perform the
+ * full transform search on the best candidates. \param[in,out]
+ * inter_modes_info  InterModesInfo struct to hold inter mode information to
+ * perform a full transform search only on winning candidates searched with an
+ * estimate for transform coding RD. \param[in,out] motion_mode_cand  A
+ * motion_mode_candidate struct to store motion mode information used in a
+ * speed feature to search motion modes other than SIMPLE_TRANSLATION only on
+ * winning candidates. \param[in,out] skip_rd           A length 2 array,
+ * where skip_rd[0] is the best total RD for a skip mode so far, and
+ *                                  skip_rd[1] is the best RD for a skip mode
+ * so far in luma. This is used as a speed feature to skip the transform
+ * search if the computed skip RD for the current mode is not better than the
+ * best skip_rd so far. \param[in] best_ref_mode         Parameter to indicate
+ * the best mode so far. This is used as a speed feature to skip the
  *                                  additional scaling factors for joint mvd
  *                                  coding mode.
  * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
- *                                         narrow down the search based on data
- *                                         collected in the TPL model.
+ *                                         narrow down the search based on
+ * data collected in the TPL model.
  *
  * \return The RD cost for the mode being searched.
  */
@@ -3861,12 +4675,21 @@
   int64_t newmv_ret_val = INT64_MAX;
 #if CONFIG_FLEX_MVRES
 #if CONFIG_BAWP
+#if CONFIG_SEP_COMP_DRL
+  inter_mode_info mode_info[2][NUM_MV_PRECISIONS]
+                           [MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH];
+#else
   inter_mode_info mode_info[2][NUM_MV_PRECISIONS][MAX_REF_MV_SEARCH];
+#endif
 
   // initialize mode_info
   for (int bawp = 0; bawp < 2; bawp++) {
     for (int prec = 0; prec < NUM_MV_PRECISIONS; prec++) {
+#if CONFIG_SEP_COMP_DRL
+      for (int idx = 0; idx < MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH; idx++) {
+#else
       for (int idx = 0; idx < MAX_REF_MV_SEARCH; idx++) {
+#endif
         mode_info[bawp][prec][idx].full_search_mv.as_int = INVALID_MV;
         mode_info[bawp][prec][idx].mv.as_int = INVALID_MV;
         mode_info[bawp][prec][idx].rd = INT64_MAX;
@@ -3921,6 +4744,10 @@
 #if CONFIG_BAWP
   mbmi->bawp_flag = 0;
 #endif
+
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
   // Do not prune the mode based on inter cost from tpl if the current ref
   // frame is the winner ref in neighbouring blocks.
   int ref_match_found_in_above_nb = 0;
@@ -3935,20 +4762,50 @@
   // First, perform a simple translation search for each of the indices. If
   // an index performs well, it will be fully searched in the main loop
   // of this function.
+#if CONFIG_SEP_COMP_DRL
+  int ref_set[2];
+  ref_set[0] = get_drl_refmv_count(cm->features.max_drl_bits, x,
+                                   mbmi->ref_frame, this_mode, 0);
+  ref_set[1] = 1;
+  if (has_second_drl(mbmi)) {
+    ref_set[1] = get_drl_refmv_count(cm->features.max_drl_bits, x,
+                                     mbmi->ref_frame, this_mode, 1);
+
+    if (mbmi->mode == NEAR_NEWMV) {
+      ref_set[0] = AOMMIN(ref_set[0], SEP_COMP_DRL_SIZE);
+      ref_set[1] = AOMMIN(ref_set[1], SEP_COMP_DRL_SIZE);
+    } else {
+      assert(mbmi->mode == NEAR_NEARMV);
+    }
+  }
+#else
   const int ref_set = get_drl_refmv_count(cm->features.max_drl_bits, x,
                                           mbmi->ref_frame, this_mode);
+#endif
 
 #if CONFIG_WARPMV
+#if CONFIG_SEP_COMP_DRL
+  assert(IMPLIES(this_mode == WARPMV, ref_set[0] == 1));
+#else
   assert(IMPLIES(this_mode == WARPMV, ref_set == 1));
+#endif
 #endif  // CONFIG_WARPMV
 
   // Save MV results from first 2 ref_mv_idx.
 #if CONFIG_FLEX_MVRES
+#if CONFIG_SEP_COMP_DRL
+  int_mv save_mv[NUM_MV_PRECISIONS][MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH][2];
+#else
   int_mv save_mv[NUM_MV_PRECISIONS][MAX_REF_MV_SEARCH - 1][2];
+#endif
 #else
   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
 #endif
+#if CONFIG_SEP_COMP_DRL
+  int best_ref_mv_idx[2] = { -1, -1 };
+#else
   int best_ref_mv_idx = -1;
+#endif
   const int16_t mode_ctx =
       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
 #if !CONFIG_FLEX_MVRES
@@ -3986,7 +4843,11 @@
 #if CONFIG_FLEX_MVRES
   for (int pb_mv_precision = mbmi->max_mv_precision;
        pb_mv_precision >= MV_PRECISION_8_PEL; pb_mv_precision--) {
+#if CONFIG_SEP_COMP_DRL
+    for (i = 0; i < MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH - 1; ++i) {
+#else
     for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
+#endif
       save_mv[pb_mv_precision][i][0].as_int = INVALID_MV;
       save_mv[pb_mv_precision][i][1].as_int = INVALID_MV;
     }
@@ -4029,7 +4890,6 @@
       idx_mask[0][pb_mv_precision] = ref_mv_idx_to_search(
           cpi, x, rd_stats, args, ref_best_rd, mode_info[0][pb_mv_precision],
           bsize, ref_set, flex_mv_cost[pb_mv_precision]);
-
       if (cm->features.enable_bawp &&
           av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col)) {
         mbmi->bawp_flag = 1;
@@ -4079,6 +4939,10 @@
   //    4.) Build the inter predictor
   //    5.) Pick the motion mode
   //    6.) Update stats if best so far
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag =
+      0;  // initialize to 0; later on the default value is assigned
+#endif    // CONFIG_REFINEMV
 #if CONFIG_IMPROVED_JMVD
   const int jmvd_scaling_factor_num =
       is_joint_mvd_coding_mode(mbmi->mode) ? JOINT_NEWMV_SCALE_FACTOR_CNT : 1;
@@ -4096,270 +4960,412 @@
         continue;
     }
 #endif  // CONFIG_IMPROVED_JMVD
-    for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
-#if CONFIG_IMPROVED_JMVD
-      // apply early termination method to jmvd scaling factors
-      if (cpi->sf.inter_sf.early_terminate_jmvd_scale_factor) {
-        if (scale_index > 0 && ref_mv_idx > 0 &&
-            best_mbmi.jmvd_scale_mode == 0 && best_mbmi.ref_mv_idx < ref_mv_idx)
-          continue;
-      }
-#endif  // CONFIG_IMPROVED_JMVD
-#if CONFIG_FLEX_MVRES
-
-      // Initialize compound mode data
-      mbmi->interinter_comp.type = COMPOUND_AVERAGE;
-      mbmi->comp_group_idx = 0;
-      if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
-
-      mbmi->num_proj_ref = 0;
-      mbmi->motion_mode = SIMPLE_TRANSLATION;
-      mbmi->ref_mv_idx = ref_mv_idx;
-      set_mv_precision(mbmi, mbmi->max_mv_precision);
-      if (
-#if CONFIG_WARPMV
-          mbmi->mode != WARPMV &&
-#endif  // CONFIG_WARPMV
-          prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
-          !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
-        // Skip mode if TPL model indicates it will not be beneficial.
-        if (prune_modes_based_on_tpl_stats(
-                &cm->features, inter_cost_info_from_tpl, refs, ref_mv_idx,
-                this_mode, cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
-          continue;
-      }
-      const int drl_cost =
-          get_drl_cost(cm->features.max_drl_bits, mbmi, mbmi_ext, x);
-
-#if CONFIG_FLEX_MVRES
-      MvSubpelPrecision best_precision_so_far = mbmi->max_mv_precision;
-      int64_t best_precision_rd_so_far = INT64_MAX;
-      set_precision_set(cm, xd, mbmi, bsize, ref_mv_idx);
-      set_most_probable_mv_precision(cm, mbmi, bsize);
-      const PRECISION_SET *precision_def =
-          &av1_mv_precision_sets[mbmi->mb_precision_set];
-      for (int precision_dx = precision_def->num_precisions - 1;
-           precision_dx >= 0; precision_dx--) {
-        MvSubpelPrecision pb_mv_precision =
-            precision_def->precision[precision_dx];
-        mbmi->pb_mv_precision = pb_mv_precision;
-        if (!is_pb_mv_precision_active(cm, mbmi, bsize) &&
-            (pb_mv_precision != mbmi->max_mv_precision)) {
-          continue;
-        }
-        assert(pb_mv_precision <= mbmi->max_mv_precision);
+#if CONFIG_CWP
+    int best_cwp_idx = CWP_EQUAL;
+    int64_t best_cwp_cost = INT64_MAX;
+#endif  // CONFIG_CWP
+#if CONFIG_SEP_COMP_DRL
+    int ref_mv_idx[2];
+    for (ref_mv_idx[1] = 0; ref_mv_idx[1] < ref_set[1]; ++ref_mv_idx[1]) {
+      for (ref_mv_idx[0] = 0; ref_mv_idx[0] < ref_set[0]; ++ref_mv_idx[0]) {
+#else
+  for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
+#endif  // CONFIG_SEP_COMP_DRL
 #if CONFIG_IMPROVED_JMVD
         // apply early termination method to jmvd scaling factors
         if (cpi->sf.inter_sf.early_terminate_jmvd_scale_factor) {
-          if (scale_index > 0 && (!is_inter_compound_mode(best_ref_mode)) &&
-              mbmi->pb_mv_precision <= MV_PRECISION_HALF_PEL &&
+#if CONFIG_SEP_COMP_DRL
+          if (scale_index > 0 && (ref_mv_idx[0] > 0 || ref_mv_idx[1] > 0) &&
               best_mbmi.jmvd_scale_mode == 0 &&
-              best_mbmi.pb_mv_precision > MV_PRECISION_HALF_PEL)
+              (best_mbmi.ref_mv_idx[0] < ref_mv_idx[0] ||
+               best_mbmi.ref_mv_idx[1] < ref_mv_idx[1]))
+#else
+          if (scale_index > 0 && ref_mv_idx > 0 &&
+              best_mbmi.jmvd_scale_mode == 0 &&
+              best_mbmi.ref_mv_idx < ref_mv_idx)
+#endif  // CONFIG_SEP_COMP_DRL
             continue;
         }
 #endif  // CONFIG_IMPROVED_JMVD
-
-        if (is_pb_mv_precision_active(cm, mbmi, bsize)) {
-          if (cpi->sf.flexmv_sf.terminate_early_4_pel_precision &&
-              pb_mv_precision < MV_PRECISION_FOUR_PEL &&
-              best_precision_so_far >= MV_PRECISION_QTR_PEL)
-            continue;
-          if (mbmi->ref_mv_idx) {
-            if (cpi->sf.flexmv_sf.do_not_search_8_pel_precision &&
-                mbmi->pb_mv_precision == MV_PRECISION_8_PEL)
-              continue;
-
-            if (cpi->sf.flexmv_sf.do_not_search_4_pel_precision &&
-                mbmi->pb_mv_precision == MV_PRECISION_FOUR_PEL)
-              continue;
-          }
-        }
-
-#endif
-#endif
-
-#if !CONFIG_FLEX_MVRES && !CONFIG_BAWP
-        mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
-        mode_info[ref_mv_idx].mv.as_int = INVALID_MV;
-        mode_info[ref_mv_idx].rd = INT64_MAX;
-        if (
-#if CONFIG_WARPMV
-            mbmi->mode != WARPMV &&
-#endif  // CONFIG_WARPMV
-
-            !mask_check_bit(idx_mask, ref_mv_idx)) {
-          // MV did not perform well in simple translation search. Skip it.
-          continue;
-        }
-#endif  // !CONFIG_FLEX_MVRES && !CONFIG_BAWP
-#if !CONFIG_FLEX_MVRES
-        if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
-            !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
-          // Skip mode if TPL model indicates it will not be beneficial.
-          if (prune_modes_based_on_tpl_stats(
-                  &cm->features, inter_cost_info_from_tpl, refs, ref_mv_idx,
-                  this_mode, cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
-            continue;
-        }
-        av1_init_rd_stats(rd_stats);
-        // Initialize compound mode data
-        mbmi->interinter_comp.type = COMPOUND_AVERAGE;
-        mbmi->comp_group_idx = 0;
-        if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
-
-        mbmi->num_proj_ref = 0;
-        mbmi->motion_mode = SIMPLE_TRANSLATION;
-        mbmi->ref_mv_idx = ref_mv_idx;
-        // Compute cost for signalling this DRL index
-        rd_stats->rate = base_rate;
-        const int drl_cost =
-            get_drl_cost(cm->features.max_drl_bits, mbmi, mbmi_ext, x);
-
-        rd_stats->rate += drl_cost;
-#if CONFIG_BAWP
-        mode_info[0][ref_mv_idx].drl_cost = drl_cost;
-        mode_info[1][ref_mv_idx].drl_cost = drl_cost;
+#if CONFIG_CWP
+        mbmi->cwp_idx = CWP_EQUAL;
+        const int same_side = is_ref_frame_same_side(cm, mbmi);
+        int cwp_loop_num = cm->features.enable_cwp ? MAX_CWP_NUM : 1;
+#if CONFIG_SEP_COMP_DRL
+        if (best_cwp_idx == CWP_EQUAL &&
+            (ref_mv_idx[0] > 0 || ref_mv_idx[1] > 0))
+          cwp_loop_num = 1;
 #else
-      mode_info[ref_mv_idx].drl_cost = drl_cost;
-#endif
-#endif  //! CONFIG_FLEX_MVRES
+        if (best_cwp_idx == CWP_EQUAL && ref_mv_idx > 0) cwp_loop_num = 1;
+#endif  // CONFIG_SEP_COMP_DRL
 
-        int rs = 0;
-        int compmode_interinter_cost = 0;
-        int_mv cur_mv[2];
-        // TODO(Cherma): Extend this speed feature to support compound mode
-        int skip_repeated_ref_mv =
-            is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
-        // Generate the current mv according to the prediction mode
-        if (
-#if CONFIG_WARPMV
-            mbmi->mode != WARPMV &&
-#endif  // CONFIG_WARPMV
-            !build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
-          continue;
-        }
-#if CONFIG_WARPMV
-        // For WARPMV mode we will build MV in the later stage
-        // Currently initialize to 0
-        if (mbmi->mode == WARPMV) {
-          cur_mv[0].as_int = 0;
-          cur_mv[1].as_int = 0;
-          assert(ref_mv_idx == 0);
-        }
-#endif  // CONFIG_WARPMV
+        int cwp_search_mask[MAX_CWP_NUM] = { 0 };
+        av1_zero(cwp_search_mask);
+        // Loop all supported weighting factors for CWP
+        for (int cwp_search_idx = 0; cwp_search_idx < cwp_loop_num;
+             cwp_search_idx++) {
+#if CONFIG_SEP_COMP_DRL
+          mbmi->ref_mv_idx[1] = ref_mv_idx[1];
+          mbmi->ref_mv_idx[0] = ref_mv_idx[0];
+#else
+          mbmi->ref_mv_idx = ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
+          mbmi->interinter_comp.type = COMPOUND_AVERAGE;
+          mbmi->comp_group_idx = 0;
+          mbmi->motion_mode = SIMPLE_TRANSLATION;
 
+          mbmi->cwp_idx = cwp_weighting_factor[same_side][cwp_search_idx];
+
+          if (mbmi->cwp_idx != CWP_EQUAL) {
+            if (!is_cwp_allowed(mbmi)) break;
+            if (cwp_search_mask[cwp_search_idx] == 0) {
+              continue;
+            }
+          }
+          if (mbmi->cwp_idx == -1) {
+            break;
+          }
+#endif  // CONFIG_CWP
 #if CONFIG_FLEX_MVRES
-#if !CONFIG_BAWP
-        mode_info[mbmi->pb_mv_precision][ref_mv_idx].full_search_mv.as_int =
-            INVALID_MV;
-        mode_info[mbmi->pb_mv_precision][ref_mv_idx].mv.as_int = INVALID_MV;
-        mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd = INT64_MAX;
-        mode_info[mbmi->pb_mv_precision][ref_mv_idx].drl_cost = drl_cost;
-        if (
-#if CONFIG_WARPMV
-            mbmi->mode != WARPMV &&
-#endif  // CONFIG_WARPMV
-            !mask_check_bit(idx_mask[mbmi->pb_mv_precision], ref_mv_idx)) {
-          // MV did not perform well in simple translation search. Skip it.
-          continue;
-        }
-#endif
 
-        if (
-#if CONFIG_WARPMV
-            mbmi->mode != WARPMV &&
-#endif  // CONFIG_WARPMV
-            cpi->sf.flexmv_sf.skip_similar_ref_mv &&
-            skip_similar_ref_mv(cpi, x, bsize)) {
-          continue;
-        }
+          // Initialize compound mode data
+          mbmi->interinter_comp.type = COMPOUND_AVERAGE;
+          mbmi->comp_group_idx = 0;
+          if (mbmi->ref_frame[1] == INTRA_FRAME)
+            mbmi->ref_frame[1] = NONE_FRAME;
 
-#if CONFIG_WARPMV
-        assert(IMPLIES(mbmi->mode == WARPMV,
-                       mbmi->pb_mv_precision == mbmi->max_mv_precision));
-#endif  // CONFIG_WARPMV
-#endif
-
-#if CONFIG_BAWP
-        int_mv bawp_off_mv[2];
-        int64_t bawp_off_newmv_ret_val = 0;
-        for (i = 0; i < is_comp_pred + 1; ++i) {
-          bawp_off_mv[i].as_int = cur_mv[i].as_int;
-        }
-        int bawp_eanbled = cm->features.enable_bawp &&
-                           av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col);
-        for (int bawp_flag = 0; bawp_flag <= bawp_eanbled; bawp_flag++) {
-          mbmi->bawp_flag = bawp_flag;
-
-#if CONFIG_FLEX_MVRES
-          mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx]
-              .full_search_mv.as_int = INVALID_MV;
-          mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].mv.as_int =
-              INVALID_MV;
-          mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd =
-              INT64_MAX;
-          mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].drl_cost =
-              drl_cost;
-
+          mbmi->num_proj_ref = 0;
+          mbmi->motion_mode = SIMPLE_TRANSLATION;
+#if CONFIG_SEP_COMP_DRL
+          mbmi->ref_mv_idx[1] = ref_mv_idx[1];
+          mbmi->ref_mv_idx[0] = ref_mv_idx[0];
+          int ref_mv_idx_type = av1_ref_mv_idx_type(mbmi, ref_mv_idx);
+#else
+          mbmi->ref_mv_idx = ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
+          set_mv_precision(mbmi, mbmi->max_mv_precision);
           if (
 #if CONFIG_WARPMV
               mbmi->mode != WARPMV &&
 #endif  // CONFIG_WARPMV
-              !mask_check_bit(idx_mask[bawp_flag][mbmi->pb_mv_precision],
-                              ref_mv_idx)) {
-            // MV did not perform well in simple translation search. Skip it.
-            continue;
-          }
+              prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
+              !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
+            // Skip mode if TPL model indicates it will not be beneficial.
+            if (prune_modes_based_on_tpl_stats(
+#if CONFIG_SEP_COMP_DRL
+                    &cm->features, inter_cost_info_from_tpl, refs,
+                    ref_mv_idx[0],
 #else
-        mode_info[bawp_flag][ref_mv_idx].full_search_mv.as_int = INVALID_MV;
-        mode_info[bawp_flag][ref_mv_idx].mv.as_int = INVALID_MV;
-        mode_info[bawp_flag][ref_mv_idx].rd = INT64_MAX;
-        mode_info[bawp_flag][ref_mv_idx].drl_cost = drl_cost;
+                    &cm->features, inter_cost_info_from_tpl, refs, ref_mv_idx,
+#endif  // CONFIG_SEP_COMP_DRL
+                    this_mode, cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
+              continue;
+          }
+          const int drl_cost =
+              get_drl_cost(cm->features.max_drl_bits, mbmi, mbmi_ext, x);
 
-        if (
-#if CONFIG_WARPMV
-            mbmi->mode != WARPMV &&
-#endif  // CONFIG_WARPMV
-            !mask_check_bit(idx_mask[bawp_flag], ref_mv_idx)) {
-          // MV did not perform well in simple translation search. Skip it.
-          continue;
-        }
-#endif  // CONFIG_FLEX_MVRES
-          if (mbmi->bawp_flag == 1) {
-            for (i = 0; i < is_comp_pred + 1; ++i) {
-              mbmi->mv[i].as_int = bawp_off_mv[i].as_int;
-              cur_mv[i].as_int = bawp_off_mv[i].as_int;
+#if CONFIG_FLEX_MVRES
+          MvSubpelPrecision best_precision_so_far = mbmi->max_mv_precision;
+          int64_t best_precision_rd_so_far = INT64_MAX;
+          set_precision_set(cm, xd, mbmi, bsize, ref_mv_idx);
+          set_most_probable_mv_precision(cm, mbmi, bsize);
+          const PRECISION_SET *precision_def =
+              &av1_mv_precision_sets[mbmi->mb_precision_set];
+          for (int precision_dx = precision_def->num_precisions - 1;
+               precision_dx >= 0; precision_dx--) {
+            MvSubpelPrecision pb_mv_precision =
+                precision_def->precision[precision_dx];
+            mbmi->pb_mv_precision = pb_mv_precision;
+            if (!is_pb_mv_precision_active(cm, mbmi, bsize) &&
+                (pb_mv_precision != mbmi->max_mv_precision)) {
+              continue;
+            }
+            assert(pb_mv_precision <= mbmi->max_mv_precision);
+#if CONFIG_IMPROVED_JMVD
+            // apply early termination method to jmvd scaling factors
+            if (cpi->sf.inter_sf.early_terminate_jmvd_scale_factor) {
+              if (scale_index > 0 && (!is_inter_compound_mode(best_ref_mode)) &&
+                  mbmi->pb_mv_precision <= MV_PRECISION_HALF_PEL &&
+                  best_mbmi.jmvd_scale_mode == 0 &&
+                  best_mbmi.pb_mv_precision > MV_PRECISION_HALF_PEL)
+                continue;
+            }
+#endif  // CONFIG_IMPROVED_JMVD
+
+            if (is_pb_mv_precision_active(cm, mbmi, bsize)) {
+              if (cpi->sf.flexmv_sf.terminate_early_4_pel_precision &&
+                  pb_mv_precision < MV_PRECISION_FOUR_PEL &&
+                  best_precision_so_far >= MV_PRECISION_QTR_PEL)
+                continue;
+#if CONFIG_SEP_COMP_DRL
+              if (mbmi->ref_mv_idx[0] || mbmi->ref_mv_idx[1]) {
+#else
+              if (mbmi->ref_mv_idx) {
+#endif  // CONFIG_SEP_COMP_DRL
+                if (cpi->sf.flexmv_sf.do_not_search_8_pel_precision &&
+                    mbmi->pb_mv_precision == MV_PRECISION_8_PEL)
+                  continue;
+
+                if (cpi->sf.flexmv_sf.do_not_search_4_pel_precision &&
+                    mbmi->pb_mv_precision == MV_PRECISION_FOUR_PEL)
+                  continue;
+              }
             }
 
-#if CONFIG_FLEX_MVRES
-            mode_info[1][mbmi->pb_mv_precision][ref_mv_idx]
-                .full_search_mv.as_int =
-                mode_info[0][mbmi->pb_mv_precision][ref_mv_idx]
-                    .full_search_mv.as_int;
-            mode_info[1][mbmi->pb_mv_precision][ref_mv_idx].full_mv_rate =
-                mode_info[0][mbmi->pb_mv_precision][ref_mv_idx].full_mv_rate;
+#endif
+#endif
+
+#if CONFIG_REFINEMV
+            // Get the default value of DMVR flag based on mode
+            mbmi->refinemv_flag = get_default_refinemv_flag(cm, mbmi);
+#endif  // CONFIG_REFINEMV
+
+#if !CONFIG_FLEX_MVRES && !CONFIG_BAWP
+            mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
+            mode_info[ref_mv_idx].mv.as_int = INVALID_MV;
+            mode_info[ref_mv_idx].rd = INT64_MAX;
+            if (
+#if CONFIG_WARPMV
+                mbmi->mode != WARPMV &&
+#endif  // CONFIG_WARPMV
+
+                !mask_check_bit(idx_mask, ref_mv_idx)) {
+              // MV did not perform well in simple translation search. Skip it.
+              continue;
+            }
+#endif  // !CONFIG_FLEX_MVRES && !CONFIG_BAWP
+#if !CONFIG_FLEX_MVRES
+            if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
+                !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
+              // Skip mode if TPL model indicates it will not be beneficial.
+              if (prune_modes_based_on_tpl_stats(
+                      &cm->features, inter_cost_info_from_tpl, refs, ref_mv_idx,
+                      this_mode,
+                      cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
+                continue;
+            }
+            av1_init_rd_stats(rd_stats);
+            // Initialize compound mode data
+            mbmi->interinter_comp.type = COMPOUND_AVERAGE;
+            mbmi->comp_group_idx = 0;
+            if (mbmi->ref_frame[1] == INTRA_FRAME)
+              mbmi->ref_frame[1] = NONE_FRAME;
+
+            mbmi->num_proj_ref = 0;
+            mbmi->motion_mode = SIMPLE_TRANSLATION;
+            mbmi->ref_mv_idx = ref_mv_idx;
+            // Compute cost for signalling this DRL index
+            rd_stats->rate = base_rate;
+            const int drl_cost =
+                get_drl_cost(cm->features.max_drl_bits, mbmi, mbmi_ext, x);
+
+            rd_stats->rate += drl_cost;
+#if CONFIG_BAWP
+            mode_info[0][ref_mv_idx].drl_cost = drl_cost;
+            mode_info[1][ref_mv_idx].drl_cost = drl_cost;
 #else
-          mode_info[1][ref_mv_idx].full_search_mv.as_int =
-              mode_info[0][ref_mv_idx].full_search_mv.as_int;
-          mode_info[1][ref_mv_idx].full_mv_rate =
-              mode_info[0][ref_mv_idx].full_mv_rate;
+          mode_info[ref_mv_idx].drl_cost = drl_cost;
+#endif
+#endif  //! CONFIG_FLEX_MVRES
+
+            int rs = 0;
+            int compmode_interinter_cost = 0;
+            int_mv cur_mv[2];
+            // TODO(Cherma): Extend this speed feature to support compound mode
+            int skip_repeated_ref_mv =
+                is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
+            // Generate the current mv according to the prediction mode
+            if (
+#if CONFIG_WARPMV
+                mbmi->mode != WARPMV &&
+#endif  // CONFIG_WARPMV
+                !build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
+              continue;
+            }
+#if CONFIG_WARPMV
+            // For WARPMV mode we will build MV in the later stage
+            // Currently initialize to 0
+            if (mbmi->mode == WARPMV) {
+              cur_mv[0].as_int = 0;
+              cur_mv[1].as_int = 0;
+
+#if CONFIG_SEP_COMP_DRL
+              assert(ref_mv_idx[0] == 0 && ref_mv_idx[1] == 0);
+#else
+            assert(ref_mv_idx == 0);
+#endif  // CONFIG_SEP_COMP_DRL
+            }
+#endif  // CONFIG_WARPMV
+
+#if CONFIG_FLEX_MVRES
+#if !CONFIG_BAWP
+            mode_info[mbmi->pb_mv_precision][ref_mv_idx].full_search_mv.as_int =
+                INVALID_MV;
+            mode_info[mbmi->pb_mv_precision][ref_mv_idx].mv.as_int = INVALID_MV;
+            mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd = INT64_MAX;
+            mode_info[mbmi->pb_mv_precision][ref_mv_idx].drl_cost = drl_cost;
+            if (
+#if CONFIG_WARPMV
+                mbmi->mode != WARPMV &&
+#endif  // CONFIG_WARPMV
+                !mask_check_bit(idx_mask[mbmi->pb_mv_precision], ref_mv_idx)) {
+              // MV did not perform well in simple translation search. Skip it.
+              continue;
+            }
+#endif
+
+            if (
+#if CONFIG_WARPMV
+                mbmi->mode != WARPMV &&
+#endif  // CONFIG_WARPMV
+                cpi->sf.flexmv_sf.skip_similar_ref_mv &&
+                skip_similar_ref_mv(cpi, x, bsize)) {
+              continue;
+            }
+
+#if CONFIG_WARPMV
+            assert(IMPLIES(mbmi->mode == WARPMV,
+                           mbmi->pb_mv_precision == mbmi->max_mv_precision));
+#endif  // CONFIG_WARPMV
+#endif
+
+#if CONFIG_BAWP
+            int_mv bawp_off_mv[2];
+            int64_t bawp_off_newmv_ret_val = 0;
+#if BAWP_BUGFIX
+            int bawp_off_rate_mv = 0;
+#endif
+            for (i = 0; i < is_comp_pred + 1; ++i) {
+              bawp_off_mv[i].as_int = cur_mv[i].as_int;
+            }
+            int bawp_eanbled = cm->features.enable_bawp &&
+#if CONFIG_REFINEMV
+                               !mbmi->refinemv_flag &&
+#endif  // CONFIG_REFINEMV
+                               av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col);
+            for (int bawp_flag = 0; bawp_flag <= bawp_eanbled; bawp_flag++) {
+              mbmi->bawp_flag = bawp_flag;
+
+#if CONFIG_FLEX_MVRES
+#if CONFIG_SEP_COMP_DRL
+              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type]
+                  .full_search_mv.as_int = INVALID_MV;
+              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type]
+                  .mv.as_int = INVALID_MV;
+              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type].rd =
+                  INT64_MAX;
+              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type]
+                  .drl_cost = drl_cost;
+              if (
+#if CONFIG_WARPMV
+                  mbmi->mode != WARPMV &&
+#endif  // CONFIG_WARPMV
+#if CONFIG_REFINEMV
+                  !mbmi->refinemv_flag &&
+#endif  // CONFIG_REFINEMV
+                  !mask_check_bit(idx_mask[bawp_flag][mbmi->pb_mv_precision],
+                                  ref_mv_idx_type)) {
+                // MV did not perform well in simple translation search. Skip
+                // it.
+                continue;
+              }
+#else
+              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx]
+                  .full_search_mv.as_int = INVALID_MV;
+              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx]
+                  .mv.as_int = INVALID_MV;
+              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd =
+                  INT64_MAX;
+              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].drl_cost =
+                  drl_cost;
+
+              if (
+#if CONFIG_WARPMV
+                  mbmi->mode != WARPMV &&
+#endif  // CONFIG_WARPMV
+#if CONFIG_REFINEMV
+                  !mbmi->refinemv_flag &&
+#endif  // CONFIG_REFINEMV
+                  !mask_check_bit(idx_mask[bawp_flag][mbmi->pb_mv_precision],
+                                  ref_mv_idx)) {
+                // MV did not perform well in simple translation search. Skip
+                // it.
+                continue;
+              }
+#endif  // CONFIG_SEP_COMP_DRL
+#else
+            mode_info[bawp_flag][ref_mv_idx].full_search_mv.as_int = INVALID_MV;
+            mode_info[bawp_flag][ref_mv_idx].mv.as_int = INVALID_MV;
+            mode_info[bawp_flag][ref_mv_idx].rd = INT64_MAX;
+            mode_info[bawp_flag][ref_mv_idx].drl_cost = drl_cost;
+
+            if (
+#if CONFIG_WARPMV
+                mbmi->mode != WARPMV &&
+#endif  // CONFIG_WARPMV
+#if CONFIG_REFINEMV
+                !mbmi->refinemv_flag &&
+#endif  // CONFIG_REFINEMV
+                !mask_check_bit(idx_mask[bawp_flag], ref_mv_idx)) {
+              // MV did not perform well in simple translation search. Skip it.
+              continue;
+            }
 #endif  // CONFIG_FLEX_MVRES
-            if (bawp_off_newmv_ret_val != 0) continue;
-          } else {
+
+#if CONFIG_REFINEMV
+              assert(!(mbmi->bawp_flag && mbmi->refinemv_flag));
+#endif  // CONFIG_REFINEMV
+
+              if (mbmi->bawp_flag == 1) {
+                for (i = 0; i < is_comp_pred + 1; ++i) {
+                  mbmi->mv[i].as_int = bawp_off_mv[i].as_int;
+                  cur_mv[i].as_int = bawp_off_mv[i].as_int;
+                }
+
+#if CONFIG_FLEX_MVRES
+#if CONFIG_SEP_COMP_DRL
+                mode_info[1][mbmi->pb_mv_precision][ref_mv_idx_type]
+                    .full_search_mv.as_int =
+                    mode_info[0][mbmi->pb_mv_precision][ref_mv_idx_type]
+                        .full_search_mv.as_int;
+                mode_info[1][mbmi->pb_mv_precision][ref_mv_idx_type]
+                    .full_mv_rate =
+                    mode_info[0][mbmi->pb_mv_precision][ref_mv_idx_type]
+                        .full_mv_rate;
+#else
+                mode_info[1][mbmi->pb_mv_precision][ref_mv_idx]
+                    .full_search_mv.as_int =
+                    mode_info[0][mbmi->pb_mv_precision][ref_mv_idx]
+                        .full_search_mv.as_int;
+                mode_info[1][mbmi->pb_mv_precision][ref_mv_idx].full_mv_rate =
+                    mode_info[0][mbmi->pb_mv_precision][ref_mv_idx]
+                        .full_mv_rate;
+#endif  // CONFIG_SEP_COMP_DRL
+#else
+              mode_info[1][ref_mv_idx].full_search_mv.as_int =
+                  mode_info[0][ref_mv_idx].full_search_mv.as_int;
+              mode_info[1][ref_mv_idx].full_mv_rate =
+                  mode_info[0][ref_mv_idx].full_mv_rate;
+#endif  // CONFIG_FLEX_MVRES
+#if BAWP_BUGFIX
+                rate_mv = bawp_off_rate_mv;
 #endif
-            // The above call to build_cur_mv does not handle NEWMV modes. Build
-            // the mv here if we have NEWMV for any predictors.
-            if (have_newmv_in_inter_mode(this_mode)) {
+                if (bawp_off_newmv_ret_val != 0) continue;
+              } else {
+#endif  // CONFIG_BAWP
+        // The above call to build_cur_mv does not handle NEWMV modes.
+        // Build the mv here if we have NEWMV for any predictors.
+                if (have_newmv_in_inter_mode(this_mode)) {
 #if CONFIG_COLLECT_COMPONENT_TIMING
-              start_timing(cpi, handle_newmv_time);
+                  start_timing(cpi, handle_newmv_time);
 #endif
-              newmv_ret_val =
-                  handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args,
+                  newmv_ret_val =
+                      handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args,
 #if CONFIG_FLEX_MVRES
 #if CONFIG_BAWP
-                               mode_info[bawp_flag][mbmi->pb_mv_precision]);
+                                   mode_info[bawp_flag][mbmi->pb_mv_precision]);
 #else
-                             mode_info[mbmi->pb_mv_precision]);
+                                 mode_info[mbmi->pb_mv_precision]);
 #endif
 #else
 #if CONFIG_BAWP
@@ -4369,67 +5375,71 @@
 #endif
 #endif
 #if CONFIG_COLLECT_COMPONENT_TIMING
-              end_timing(cpi, handle_newmv_time);
+                  end_timing(cpi, handle_newmv_time);
 #endif
 
 #if CONFIG_BAWP
-              for (i = 0; i < is_comp_pred + 1; ++i) {
-                bawp_off_mv[i].as_int = cur_mv[i].as_int;
+#if BAWP_BUGFIX
+                  bawp_off_rate_mv = rate_mv;
+#endif
+                  for (i = 0; i < is_comp_pred + 1; ++i) {
+                    bawp_off_mv[i].as_int = cur_mv[i].as_int;
+                  }
+                  bawp_off_newmv_ret_val = newmv_ret_val;
+                  if (newmv_ret_val != 0) continue;
+                }
               }
-              bawp_off_newmv_ret_val = newmv_ret_val;
-              if (newmv_ret_val != 0) continue;
-            }
-          }
-          if (have_newmv_in_inter_mode(this_mode)) {
+              if (have_newmv_in_inter_mode(this_mode)) {
 #else
       if (newmv_ret_val != 0) continue;
 #endif
 
 #if CONFIG_C071_SUBBLK_WARPMV && CONFIG_FLEX_MVRES
-            int mv_outlim = 0;
-            for (int ref = 0; ref < is_comp_pred + 1; ref++) {
-              const PREDICTION_MODE single_mode =
-                  get_single_mode(this_mode, ref);
-              if (single_mode == NEWMV) {
-                SUBPEL_MOTION_SEARCH_PARAMS ms_params;
-                MV ref_mv = av1_get_ref_mv(x, ref).as_mv;
-                if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL)
-                  lower_mv_precision(&ref_mv, mbmi->pb_mv_precision);
-                av1_make_default_subpel_ms_params(
-                    &ms_params, cpi, x, bsize, &ref_mv, pb_mv_precision, NULL);
-                if (!av1_is_subpelmv_in_range(&ms_params.mv_limits,
-                                              cur_mv[ref].as_mv)) {
-                  mv_outlim = 1;
-                  break;
+                int mv_outlim = 0;
+                for (int ref = 0; ref < is_comp_pred + 1; ref++) {
+                  const PREDICTION_MODE single_mode =
+                      get_single_mode(this_mode, ref);
+                  if (single_mode == NEWMV) {
+                    SUBPEL_MOTION_SEARCH_PARAMS ms_params;
+                    MV ref_mv = av1_get_ref_mv(x, ref).as_mv;
+                    if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL)
+                      lower_mv_precision(&ref_mv, mbmi->pb_mv_precision);
+                    av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
+                                                      &ref_mv, pb_mv_precision,
+                                                      NULL);
+                    if (!av1_is_subpelmv_in_range(&ms_params.mv_limits,
+                                                  cur_mv[ref].as_mv)) {
+                      mv_outlim = 1;
+                      break;
+                    }
+                  }
                 }
-              }
-            }
-            if (mv_outlim) continue;
+                if (mv_outlim) continue;
 #endif  // CONFIG_C071_SUBBLK_WARPMV && CONFIG_FLEX_MVRES
 
-              // skip NEWMV mode in drl if the motion search result is the same
-              // as a previous result
+                  // skip NEWMV mode in drl if the motion search result is the
+                  // same as a previous result
 #if CONFIG_FLEX_MVRES
-            int skip_new_mv =
-                cpi->sf.inter_sf.skip_repeated_newmv ||
-                (mbmi->pb_mv_precision != mbmi->max_mv_precision &&
-                 cpi->sf.flexmv_sf.skip_repeated_newmv_low_prec);
-            if (skip_new_mv &&
-                skip_repeated_newmv(
-                    cpi, x, bsize, do_tx_search, this_mode,
-                    mbmi->pb_mv_precision,
+                int skip_new_mv =
+                    cpi->sf.inter_sf.skip_repeated_newmv ||
+                    (mbmi->pb_mv_precision != mbmi->max_mv_precision &&
+                     cpi->sf.flexmv_sf.skip_repeated_newmv_low_prec);
+                if (skip_new_mv &&
+                    skip_repeated_newmv(
+                        cpi, x, bsize, do_tx_search, this_mode,
+                        mbmi->pb_mv_precision,
 #if CONFIG_BAWP
-                    mbmi->bawp_flag,
+                        mbmi->bawp_flag,
 #endif
-                    &best_mbmi, motion_mode_cand, &ref_best_rd, &best_rd_stats,
-                    &best_rd_stats_y,
+                        &best_mbmi, motion_mode_cand, &ref_best_rd,
+                        &best_rd_stats, &best_rd_stats_y,
 #if CONFIG_BAWP
-                    &best_rd_stats_uv,
-                    mode_info[bawp_flag][mbmi->pb_mv_precision], args,
+                        &best_rd_stats_uv,
+                        mode_info[bawp_flag][mbmi->pb_mv_precision], args,
 #else
-                  &best_rd_stats_uv, mode_info[mbmi->pb_mv_precision], args,
+                      &best_rd_stats_uv, mode_info[mbmi->pb_mv_precision], args,
 #endif
-                    drl_cost, refs, cur_mv, &best_rd, orig_dst, ref_mv_idx))
+                        drl_cost, refs, cur_mv, &best_rd, orig_dst, ref_mv_idx))
 #else
       if (cpi->sf.inter_sf.skip_repeated_newmv &&
           skip_repeated_newmv(
@@ -4446,52 +5456,99 @@
 #endif
               args, drl_cost, refs, cur_mv, &best_rd, orig_dst, ref_mv_idx))
 #endif
-              continue;
-          }
+                  continue;
+              }
+
+#if CONFIG_REFINEMV
+              const MB_MODE_INFO base_mbmi = *mbmi;
+              for (int refinemv_loop = 0; refinemv_loop < REFINEMV_NUM_MODES;
+                   refinemv_loop++) {
+                *mbmi = base_mbmi;
+                int_mv tmp_cur_mv[2];
+                for (i = 0; i < 2; ++i) {
+                  tmp_cur_mv[i].as_int = cur_mv[i].as_int;
+                }
+                int tmp_rate_mv = rate_mv;
+#endif  // CONFIG_REFINEMV
 
 #if CONFIG_FLEX_MVRES || CONFIG_BAWP
-          av1_init_rd_stats(rd_stats);
-          // Initialize compound mode data
-          mbmi->interinter_comp.type = COMPOUND_AVERAGE;
-          mbmi->comp_group_idx = 0;
-          if (mbmi->ref_frame[1] == INTRA_FRAME)
-            mbmi->ref_frame[1] = NONE_FRAME;
+                av1_init_rd_stats(rd_stats);
+                // Initialize compound mode data
+                mbmi->interinter_comp.type = COMPOUND_AVERAGE;
+                mbmi->comp_group_idx = 0;
+                if (mbmi->ref_frame[1] == INTRA_FRAME)
+                  mbmi->ref_frame[1] = NONE_FRAME;
 
-          mbmi->num_proj_ref = 0;
-          mbmi->motion_mode = SIMPLE_TRANSLATION;
-          mbmi->ref_mv_idx = ref_mv_idx;
+                mbmi->num_proj_ref = 0;
+                mbmi->motion_mode = SIMPLE_TRANSLATION;
+#if CONFIG_SEP_COMP_DRL
+                mbmi->ref_mv_idx[0] = ref_mv_idx[0];
+                mbmi->ref_mv_idx[1] = ref_mv_idx[1];
+#else
+              mbmi->ref_mv_idx = ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
 
-          // Compute cost for signalling this DRL index
-          rd_stats->rate = base_rate;
+                // Compute cost for signalling this DRL index
+                rd_stats->rate = base_rate;
 #if CONFIG_FLEX_MVRES
-          rd_stats->rate += flex_mv_cost[mbmi->pb_mv_precision];
+                rd_stats->rate += flex_mv_cost[mbmi->pb_mv_precision];
 #endif
-          rd_stats->rate += drl_cost;
+                rd_stats->rate += drl_cost;
 #endif
 
+#if CONFIG_REFINEMV
+                if (refinemv_loop && !switchable_refinemv_flag(cm, mbmi))
+                  continue;
+                mbmi->refinemv_flag = switchable_refinemv_flag(cm, mbmi)
+                                          ? refinemv_loop
+                                          : get_default_refinemv_flag(cm, mbmi);
+                if (mbmi->refinemv_flag &&
+                    !is_refinemv_allowed(cm, mbmi, bsize)) {
+                  continue;
+                }
+#if CONFIG_CWP
+                if (mbmi->refinemv_flag && mbmi->cwp_idx != CWP_EQUAL) continue;
+#endif
+#endif  // CONFIG_REFINEMV
+
 #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
-          if (is_joint_mvd_coding_mode(mbmi->mode)) {
-            int jmvd_scale_mode_cost =
+                if (is_joint_mvd_coding_mode(mbmi->mode)) {
+                  int jmvd_scale_mode_cost =
 #if CONFIG_ADAPTIVE_MVD
-                is_joint_amvd_coding_mode(mbmi->mode)
-                    ? mode_costs
-                          ->jmvd_amvd_scale_mode_cost[mbmi->jmvd_scale_mode]
-                    :
+                      is_joint_amvd_coding_mode(mbmi->mode)
+                          ? mode_costs->jmvd_amvd_scale_mode_cost
+                                [mbmi->jmvd_scale_mode]
+                          :
 #endif  // CONFIG_ADAPTIVE_MVD
-                    mode_costs->jmvd_scale_mode_cost[mbmi->jmvd_scale_mode];
-            rd_stats->rate += jmvd_scale_mode_cost;
-          }
+                          mode_costs
+                              ->jmvd_scale_mode_cost[mbmi->jmvd_scale_mode];
+                  rd_stats->rate += jmvd_scale_mode_cost;
+                }
 #endif  // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
 
-          rd_stats->rate += rate_mv;
+#if CONFIG_REFINEMV
+                rd_stats->rate += tmp_rate_mv;
+                if (switchable_refinemv_flag(cm, mbmi)) {
+                  rd_stats->rate +=
+                      mode_costs->refinemv_flag_cost[av1_get_refinemv_context(
+                          cm, xd, bsize)][mbmi->refinemv_flag];
+                }
+#else
+    rd_stats->rate += rate_mv;
+#endif  // CONFIG_REFINEMV
 
-          // Copy the motion vector for this mode into mbmi struct
-          for (i = 0; i < is_comp_pred + 1; ++i) {
-            mbmi->mv[i].as_int = cur_mv[i].as_int;
-          }
+                // Copy the motion vector for this mode into mbmi struct
+                for (i = 0; i < is_comp_pred + 1; ++i) {
+#if CONFIG_REFINEMV
+                  mbmi->mv[i].as_int = tmp_cur_mv[i].as_int;
+#else
+
+      mbmi->mv[i].as_int = cur_mv[i].as_int;
+#endif  // CONFIG_REFINEMV
+                }
 #if CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_FLEX_MVRES
-          assert(check_mv_precision(cm, mbmi, x));
+                assert(check_mv_precision(cm, mbmi, x));
 #endif
 #else
 #if CONFIG_FLEX_MVRES
@@ -4499,59 +5556,101 @@
 #endif
 #endif  // CONFIG_C071_SUBBLK_WARPMV
 
-          const int like_nearest = (mbmi->mode == NEARMV ||
+                const int like_nearest = (mbmi->mode == NEARMV ||
 #if CONFIG_WARPMV
-                                    mbmi->mode == WARPMV ||
+                                          mbmi->mode == WARPMV ||
 #endif  // CONFIG_WARPMV
 #if CONFIG_OPTFLOW_REFINEMENT
-                                    mbmi->mode == NEAR_NEARMV_OPTFLOW ||
+                                          mbmi->mode == NEAR_NEARMV_OPTFLOW ||
 #endif  // CONFIG_OPTFLOW_REFINEMENT
-                                    mbmi->mode == NEAR_NEARMV) &&
-                                   mbmi->ref_mv_idx == 0;
-          if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
-              !like_nearest) {
-            continue;
-          }
+                                          mbmi->mode == NEAR_NEARMV) &&
+#if CONFIG_SEP_COMP_DRL
+                                         mbmi->ref_mv_idx[0] == 0 &&
+                                         mbmi->ref_mv_idx[1] == 0;
+#else
+                             mbmi->ref_mv_idx == 0;
+#endif  // CONFIG_SEP_COMP_DRL
+                if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
+                    !like_nearest) {
+                  continue;
+                }
 
-          // Skip the rest of the search if prune_ref_mv_idx_search speed
-          // feature is enabled, and the current MV is similar to a previous
-          // one.
-          if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
-              prune_ref_mv_idx_search(&cm->features, ref_mv_idx,
-                                      best_ref_mv_idx,
+                // Skip the rest of the search if prune_ref_mv_idx_search speed
+                // feature is enabled, and the current MV is similar to a
+                // previous one.
+                if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
+                    prune_ref_mv_idx_search(
+                        &cm->features, ref_mv_idx, best_ref_mv_idx,
 #if CONFIG_FLEX_MVRES
-                                      save_mv[mbmi->pb_mv_precision], mbmi,
+                        save_mv[mbmi->pb_mv_precision], mbmi,
 #else
 
-                                save_mv, mbmi,
+            save_mv, mbmi,
 #endif
-                                      cpi->sf.inter_sf.prune_ref_mv_idx_search))
-            continue;
+                        cpi->sf.inter_sf.prune_ref_mv_idx_search))
+                  continue;
 
 #if CONFIG_COLLECT_COMPONENT_TIMING
-          start_timing(cpi, compound_type_rd_time);
+                start_timing(cpi, compound_type_rd_time);
 #endif
-          int skip_build_pred = 0;
-          const int mi_row = xd->mi_row;
-          const int mi_col = xd->mi_col;
+                int skip_build_pred = 0;
+                const int mi_row = xd->mi_row;
+                const int mi_col = xd->mi_col;
 
-          // Handle a compound predictor, continue if it is determined this
-          // cannot be the best compound mode
-          if (is_comp_pred
+#if CONFIG_CWP
+                // set cwp_search_mask
+                if (is_cwp_allowed(mbmi) && mbmi->cwp_idx == CWP_EQUAL) {
+                  set_cwp_search_mask(cpi, x, bsize, rd_buffers->pred0,
+                                      rd_buffers->pred1, rd_buffers->residual1,
+                                      rd_buffers->diff10,
+                                      block_size_wide[bsize], cwp_search_mask);
+                }
+#endif  // CONFIG_CWP
+
+                // Handle a compound predictor, continue if it is determined
+                // this cannot be the best compound mode
+                if (is_comp_pred
 #if IMPROVED_AMVD && CONFIG_JOINT_MVD
-              && !is_joint_amvd_coding_mode(mbmi->mode)
+                    && !is_joint_amvd_coding_mode(mbmi->mode)
 #endif  // IMPROVED_AMVD && CONFIG_JOINT_MVD
-          ) {
-            const int not_best_mode = process_compound_inter_mode(
-                cpi, x, args, ref_best_rd, cur_mv, bsize,
-                &compmode_interinter_cost, rd_buffers, &orig_dst, &tmp_dst,
-                &rate_mv, rd_stats, skip_rd, &skip_build_pred);
-            if (not_best_mode) continue;
-          }
+#if CONFIG_REFINEMV
+                    && (!mbmi->refinemv_flag ||
+                        !switchable_refinemv_flag(cm, mbmi))
+#endif  // CONFIG_REFINEMV
+                ) {
+                  const int not_best_mode = process_compound_inter_mode(
+                      cpi, x, args, ref_best_rd,
+#if CONFIG_REFINEMV
+                      tmp_cur_mv,
+#else
+          cur_mv,
+#endif  // CONFIG_REFINEMV
+                      bsize, &compmode_interinter_cost, rd_buffers, &orig_dst,
+                      &tmp_dst,
 
+#if CONFIG_REFINEMV
+                      &tmp_rate_mv,
+#else
+
+          &rate_mv,
+#endif  // CONFIG_REFINEMV
+
+                      rd_stats, skip_rd, &skip_build_pred);
+                  if (not_best_mode) continue;
+                }
+
+#if CONFIG_CWP
+                if (cm->features.enable_cwp && is_comp_pred &&
+                    is_joint_amvd_coding_mode(mbmi->mode)) {
+                  if (is_cwp_allowed(mbmi)) {
+                    compmode_interinter_cost =
+                        av1_get_cwp_idx_cost(mbmi->cwp_idx, cm, x);
+                  }
+                }
+#endif  // CONFIG_CWP
 #if CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_FLEX_MVRES
-          assert(check_mv_precision(cm, mbmi, x));
+                assert(check_mv_precision(cm, mbmi, x));
 #endif
 #else
 #if CONFIG_FLEX_MVRES
@@ -4560,20 +5659,20 @@
 #endif  // CONFIG_C071_SUBBLK_WARPMV
 
 #if CONFIG_COLLECT_COMPONENT_TIMING
-          end_timing(cpi, compound_type_rd_time);
+                end_timing(cpi, compound_type_rd_time);
 #endif
 
 #if CONFIG_COLLECT_COMPONENT_TIMING
-          start_timing(cpi, interpolation_filter_search_time);
+                start_timing(cpi, interpolation_filter_search_time);
 #endif
-          // Determine the interpolation filter for this mode
-          ret_val = av1_interpolation_filter_search(
-              x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
-              &skip_build_pred, args, ref_best_rd);
+                // Determine the interpolation filter for this mode
+                ret_val = av1_interpolation_filter_search(
+                    x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
+                    &skip_build_pred, args, ref_best_rd);
 
 #if CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_FLEX_MVRES
-          assert(check_mv_precision(cm, mbmi, x));
+                assert(check_mv_precision(cm, mbmi, x));
 #endif
 #else
 #if CONFIG_FLEX_MVRES
@@ -4581,87 +5680,111 @@
 #endif
 #endif  // CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_COLLECT_COMPONENT_TIMING
-          end_timing(cpi, interpolation_filter_search_time);
+                end_timing(cpi, interpolation_filter_search_time);
 #endif
-          if (args->modelled_rd != NULL && !is_comp_pred) {
-            args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
-          }
+                if (args->modelled_rd != NULL && !is_comp_pred) {
+#if CONFIG_SEP_COMP_DRL
+                  args->modelled_rd[this_mode][ref_mv_idx_type][refs[0]] = rd;
+#else
+      args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
+#endif  // CONFIG_SEP_COMP_DRL
+                }
 
 #if CONFIG_WARPMV
-          if (mbmi->mode != WARPMV) {
+                if (mbmi->mode != WARPMV) {
 #endif  // CONFIG_WARPMV
-            if (ret_val != 0) {
-              restore_dst_buf(xd, orig_dst, num_planes);
-              continue;
-            } else if (cpi->sf.inter_sf
-                           .model_based_post_interp_filter_breakout &&
-                       ref_best_rd != INT64_MAX &&
-                       (rd >> 3) * 3 > ref_best_rd) {
-              restore_dst_buf(xd, orig_dst, num_planes);
-              continue;
-            }
+                  if (ret_val != 0) {
+                    restore_dst_buf(xd, orig_dst, num_planes);
+                    continue;
+                  } else if (cpi->sf.inter_sf
+                                 .model_based_post_interp_filter_breakout &&
+                             ref_best_rd != INT64_MAX &&
+                             (rd >> 3) * 3 > ref_best_rd) {
+                    restore_dst_buf(xd, orig_dst, num_planes);
+                    continue;
+                  }
 #if CONFIG_WARPMV
-          }
+                }
 #endif  // CONFIG_WARPMV
         // Compute modelled RD if enabled
-          if (args->modelled_rd != NULL) {
+                if (args->modelled_rd != NULL) {
 #if CONFIG_OPTFLOW_REFINEMENT
-            if (is_comp_pred && this_mode < NEAR_NEARMV_OPTFLOW) {
+                  if (is_comp_pred && this_mode < NEAR_NEARMV_OPTFLOW) {
 #else
       if (is_comp_pred) {
 #endif  // CONFIG_OPTFLOW_REFINEMENT
-              const int mode0 = compound_ref0_mode(this_mode);
-              const int mode1 = compound_ref1_mode(this_mode);
-              const int64_t mrd =
-                  AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
-                         args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
-
-              if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
-                restore_dst_buf(xd, orig_dst, num_planes);
-                continue;
-              }
-            }
-          }
-          rd_stats->rate += compmode_interinter_cost;
-          if (skip_build_pred != 1
+                    const int mode0 = compound_ref0_mode(this_mode);
+                    const int mode1 = compound_ref1_mode(this_mode);
+                    const int64_t mrd =
+#if CONFIG_SEP_COMP_DRL
+                        AOMMIN(args->modelled_rd[mode0][get_ref_mv_idx(mbmi, 0)]
+                                                [refs[0]],
+                               args->modelled_rd[mode1][get_ref_mv_idx(mbmi, 1)]
+                                                [refs[1]]);
+#else
+            AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
+                   args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
+#endif  // CONFIG_SEP_COMP_DRL
+                    if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
+                      restore_dst_buf(xd, orig_dst, num_planes);
+                      continue;
+                    }
+                  }
+                }
+                rd_stats->rate += compmode_interinter_cost;
+                if (skip_build_pred != 1
 #if CONFIG_WARPMV
-              && (mbmi->mode != WARPMV)
+                    && (mbmi->mode != WARPMV)
 #endif  // CONFIG_WARPMV
 
-          ) {
-            // Build this inter predictor if it has not been previously built
-            av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst,
-                                          bsize, 0, av1_num_planes(cm) - 1);
-          }
+                ) {
+                  // Build this inter predictor if it has not been previously
+                  // built
+                  av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col,
+                                                &orig_dst, bsize, 0,
+                                                av1_num_planes(cm) - 1);
+                }
 
 #if CONFIG_WARPMV
-          // So far we did not make prediction for WARPMV mode
-          assert(IMPLIES(mbmi->mode == WARPMV, skip_build_pred != 1));
+                // So far we did not make prediction for WARPMV mode
+                assert(IMPLIES(mbmi->mode == WARPMV, skip_build_pred != 1));
 #endif  // CONFIG_WARPMV
 
 #if CONFIG_COLLECT_COMPONENT_TIMING
-          start_timing(cpi, motion_mode_rd_time);
+                start_timing(cpi, motion_mode_rd_time);
 #endif
-          int rate2_nocoeff = rd_stats->rate;
+                int rate2_nocoeff = rd_stats->rate;
 #if CONFIG_WARPMV
-          assert(IMPLIES(mbmi->mode == WARPMV,
-                         (rd_stats->rate == base_rate && rate_mv == 0)));
+#if CONFIG_REFINEMV
+                assert(
+                    IMPLIES(mbmi->mode == WARPMV,
+                            (rd_stats->rate == base_rate && tmp_rate_mv == 0)));
+#else
+              assert(IMPLIES(mbmi->mode == WARPMV,
+                             (rd_stats->rate == base_rate && rate_mv == 0)));
+#endif
 #endif  // CONFIG_WARPMV
         // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
         // OBMC_CAUSAL or WARPED_CAUSAL or WARP_EXTEND or WARP_DELTA
-          ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats,
-                                   rd_stats_y, rd_stats_uv, args, ref_best_rd,
-                                   skip_rd, &rate_mv, &orig_dst, best_est_rd,
-                                   do_tx_search, inter_modes_info, 0);
+                ret_val = motion_mode_rd(
+                    cpi, tile_data, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
+                    args, ref_best_rd, skip_rd,
+#if CONFIG_REFINEMV
+                    &tmp_rate_mv,
+#else
+        &rate_mv,
+#endif  // CONFIG_REFINEMV
+
+                    &orig_dst, best_est_rd, do_tx_search, inter_modes_info, 0);
 #if CONFIG_COLLECT_COMPONENT_TIMING
-          end_timing(cpi, motion_mode_rd_time);
+                end_timing(cpi, motion_mode_rd_time);
 #endif
-          assert(IMPLIES(!av1_check_newmv_joint_nonzero(cm, x),
-                         ret_val == INT64_MAX));
+                assert(IMPLIES(!av1_check_newmv_joint_nonzero(cm, x),
+                               ret_val == INT64_MAX));
 
 #if CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_FLEX_MVRES
-          assert(check_mv_precision(cm, mbmi, x));
+                assert(check_mv_precision(cm, mbmi, x));
 #endif
 #else
 #if CONFIG_FLEX_MVRES
@@ -4669,85 +5792,152 @@
 #endif
 #endif  // CONFIG_C071_SUBBLK_WARPMV
 
-          if (ret_val != INT64_MAX) {
-            int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
+                if (ret_val != INT64_MAX) {
+                  int64_t tmp_rd =
+                      RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
 
 #if CONFIG_FLEX_MVRES
-            if (is_pb_mv_precision_active(cm, mbmi, bsize) &&
-                tmp_rd < best_precision_rd_so_far) {
-              best_precision_so_far = mbmi->pb_mv_precision;
-              best_precision_rd_so_far = tmp_rd;
-            }
+                  if (is_pb_mv_precision_active(cm, mbmi, bsize) &&
+                      tmp_rd < best_precision_rd_so_far) {
+                    best_precision_so_far = mbmi->pb_mv_precision;
+                    best_precision_rd_so_far = tmp_rd;
+                  }
 #if CONFIG_BAWP
-            if (tmp_rd <
-                mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd) {
-              // Only update mode_info if the new result is actually better.
-              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx]
-                  .mv.as_int = mbmi->mv[0].as_int;
-              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rate_mv =
-                  rate_mv;
-              mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd =
-                  tmp_rd;
-            }
+#if CONFIG_SEP_COMP_DRL
+                  if (tmp_rd < mode_info[bawp_flag][mbmi->pb_mv_precision]
+                                        [ref_mv_idx_type]
+                                            .rd) {
+                    // Only update mode_info if the new result is actually
+                    // better.
+                    mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type]
+                        .mv.as_int = mbmi->mv[0].as_int;
+#if CONFIG_REFINEMV
+                    mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type]
+                        .rate_mv = tmp_rate_mv;
 #else
-          if (tmp_rd < mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd) {
-            // Only update mode_info if the new result is actually better.
-            mode_info[mbmi->pb_mv_precision][ref_mv_idx].mv.as_int =
-                mbmi->mv[0].as_int;
-            mode_info[mbmi->pb_mv_precision][ref_mv_idx].rate_mv = rate_mv;
-            mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd = tmp_rd;
-          }
+                    mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type]
+                        .rate_mv = rate_mv;
+#endif  // CONFIG_REFINEMV
+                    mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type]
+                        .rd = tmp_rd;
+                  }
+#else
+                  if (tmp_rd <
+                      mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx]
+                          .rd) {
+                    // Only update mode_info if the new result is actually
+                    // better.
+                    mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx]
+                        .mv.as_int = mbmi->mv[0].as_int;
+#if CONFIG_REFINEMV
+                    mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx]
+                        .rate_mv = tmp_rate_mv;
+#else
+                    mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx]
+                        .rate_mv = rate_mv;
+#endif  // CONFIG_REFINEMV
+                    mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd =
+                        tmp_rd;
+                  }
+#endif  // CONFIG_SEP_COMP_DRL
+#else
+#if CONFIG_SEP_COMP_DRL
+                if (tmp_rd <
+                    mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].rd) {
+                  // Only update mode_info if the new result is actually better.
+                  mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].mv.as_int =
+                      mbmi->mv[0].as_int;
+#if CONFIG_REFINEMV
+                  mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].rate_mv =
+                      tmp_rate_mv;
+#else
+
+                  mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].rate_mv =
+                      rate_mv;
+#endif  // CONFIG_REFINEMV
+                  mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].rd = tmp_rd;
+                }
+#else
+                if (tmp_rd < mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd) {
+                  // Only update mode_info if the new result is actually better.
+                  mode_info[mbmi->pb_mv_precision][ref_mv_idx].mv.as_int =
+                      mbmi->mv[0].as_int;
+#if CONFIG_REFINEMV
+                  mode_info[mbmi->pb_mv_precision][ref_mv_idx].rate_mv =
+                      tmp_rate_mv;
+#else
+
+                  mode_info[mbmi->pb_mv_precision][ref_mv_idx].rate_mv =
+                      rate_mv;
+#endif  // CONFIG_REFINEMV
+                  mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd = tmp_rd;
+                }
+#endif  // CONFIG_SEP_COMP_DRL
 #endif
 #else
 #if CONFIG_BAWP
       if (tmp_rd < mode_info[bawp_flag][ref_mv_idx].rd) {
         // Only update mode_info if the new result is actually better.
         mode_info[bawp_flag][ref_mv_idx].mv.as_int = mbmi->mv[0].as_int;
+#if CONFIG_REFINEMV
+        mode_info[bawp_flag][ref_mv_idx].rate_mv = tmp_rate_mv;
+#else
         mode_info[bawp_flag][ref_mv_idx].rate_mv = rate_mv;
+#endif  // CONFIG_REFINEMV
         mode_info[bawp_flag][ref_mv_idx].rd = tmp_rd;
       }
 #else
       if (tmp_rd < mode_info[ref_mv_idx].rd) {
         // Only update mode_info if the new result is actually better.
         mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int;
+#if CONFIG_REFINEMV
+        mode_info[ref_mv_idx].rate_mv = tmp_rate_mv;
+#else
         mode_info[ref_mv_idx].rate_mv = rate_mv;
+#endif  // CONFIG_REFINEMV
         mode_info[ref_mv_idx].rd = tmp_rd;
       }
 #endif  // CONFIG_BAWP
 #endif  // CONFIG_FLEX_MVRES
 
-            // Collect mode stats for multiwinner mode processing
-            store_winner_mode_stats(
-                &cpi->common, x, mbmi, rd_stats, rd_stats_y, rd_stats_uv, refs,
-                mbmi->mode, NULL, bsize, tmp_rd,
-                cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
-            if (tmp_rd < best_rd) {
-              // Update the best rd stats if we found the best mode so far
-              best_rd_stats = *rd_stats;
-              best_rd_stats_y = *rd_stats_y;
-              best_rd_stats_uv = *rd_stats_uv;
-              best_rd = tmp_rd;
-              best_mbmi = *mbmi;
+                  // Collect mode stats for multiwinner mode processing
+                  store_winner_mode_stats(
+                      &cpi->common, x, mbmi, rd_stats, rd_stats_y, rd_stats_uv,
+                      refs, mbmi->mode, NULL, bsize, tmp_rd,
+                      cpi->sf.winner_mode_sf.multi_winner_mode_type,
+                      do_tx_search);
+                  if (tmp_rd < best_rd) {
+                    // Update the best rd stats if we found the best mode so far
+                    best_rd_stats = *rd_stats;
+                    best_rd_stats_y = *rd_stats_y;
+                    best_rd_stats_uv = *rd_stats_uv;
+                    best_rd = tmp_rd;
+                    best_mbmi = *mbmi;
 #if CONFIG_C071_SUBBLK_WARPMV
-              if (is_warp_mode(mbmi->motion_mode)) {
-                store_submi(xd, cm, best_submi, bsize);
-              }
+                    if (is_warp_mode(mbmi->motion_mode)) {
+                      store_submi(xd, cm, best_submi, bsize);
+                    }
 #endif  // CONFIG_C071_SUBBLK_WARPMV
-              best_xskip_txfm = txfm_info->skip_txfm;
-              memcpy(best_blk_skip, txfm_info->blk_skip,
-                     sizeof(best_blk_skip[0]) * xd->height * xd->width);
-              av1_copy_array(best_tx_type_map, xd->tx_type_map,
-                             xd->height * xd->width);
+                    best_xskip_txfm = txfm_info->skip_txfm;
+                    memcpy(best_blk_skip, txfm_info->blk_skip,
+                           sizeof(best_blk_skip[0]) * xd->height * xd->width);
+                    av1_copy_array(best_tx_type_map, xd->tx_type_map,
+                                   xd->height * xd->width);
 #if CONFIG_CROSS_CHROMA_TX
-              av1_copy_array(best_cctx_type_map, xd->cctx_type_map,
-                             xd->height * xd->width);
+                    av1_copy_array(best_cctx_type_map, xd->cctx_type_map,
+                                   xd->height * xd->width);
 #endif  // CONFIG_CROSS_CHROMA_TX
-              motion_mode_cand->rate_mv = rate_mv;
-              motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
-            }
+
+#if CONFIG_REFINEMV
+                    motion_mode_cand->rate_mv = tmp_rate_mv;
+#else
+        motion_mode_cand->rate_mv = rate_mv;
+#endif  // CONFIG_REFINEMV
+                    motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
+                  }
 #if CONFIG_C071_SUBBLK_WARPMV
 #if CONFIG_FLEX_MVRES
-            assert(check_mv_precision(cm, mbmi, x));
+                  assert(check_mv_precision(cm, mbmi, x));
 #endif
 #else
 #if CONFIG_FLEX_MVRES
@@ -4755,19 +5945,41 @@
 #endif
 #endif  // CONFIG_C071_SUBBLK_WARPMV
 
-            if (tmp_rd < ref_best_rd) {
-              ref_best_rd = tmp_rd;
-              best_ref_mv_idx = ref_mv_idx;
-            }
-          }
-          restore_dst_buf(xd, orig_dst, num_planes);
+#if CONFIG_CWP
+                  if (is_cwp_allowed(mbmi)) {
+                    if (tmp_rd < best_cwp_cost) {
+                      best_cwp_cost = tmp_rd;
+                      best_cwp_idx = mbmi->cwp_idx;
+                    }
+                  }
+#endif  // CONFIG_CWP
+                  if (tmp_rd < ref_best_rd) {
+                    ref_best_rd = tmp_rd;
+#if CONFIG_SEP_COMP_DRL
+                    best_ref_mv_idx[0] = ref_mv_idx[0];
+                    best_ref_mv_idx[1] = ref_mv_idx[1];
+#else
+        best_ref_mv_idx = ref_mv_idx;
+#endif  // CONFIG_SEP_COMP_DRL
+                  }
+                }
+                restore_dst_buf(xd, orig_dst, num_planes);
+#if CONFIG_REFINEMV
+              }
+#endif  // CONFIG_REFINEMV
 #if CONFIG_BAWP
-        }
+            }
 #endif
 #if CONFIG_FLEX_MVRES
-      }
+          }
 #endif
+#if CONFIG_CWP
+        }
+#endif  // CONFIG_CWP
+      }
+#if CONFIG_SEP_COMP_DRL
     }
+#endif  // CONFIG_SEP_COMP_DRL
 #if CONFIG_IMPROVED_JMVD
   }
 #endif  // CONFIG_IMPROVED_JMVD
@@ -4798,7 +6010,7 @@
   return rd_stats->rdcost;
 }
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
 // Check if BV is valid
 static INLINE int is_bv_valid(const FULLPEL_MV *full_mv, const AV1_COMMON *cm,
                               const MACROBLOCKD *xd, int mi_row, int mi_col,
@@ -4926,7 +6138,7 @@
   }
   return 0;
 }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
 /*!\brief Search for the best intrabc predictor
  *
@@ -4956,7 +6168,7 @@
   set_mv_precision(mbmi, MV_PRECISION_ONE_PEL);
   set_default_precision_set(cm, mbmi, bsize);
   set_most_probable_mv_precision(cm, mbmi, bsize);
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int is_ibc_cost = 1;
 #endif
 #endif
@@ -4965,6 +6177,10 @@
   mbmi->bawp_flag = 0;
 #endif
 
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
   const int w = block_size_wide[bsize];
@@ -4996,18 +6212,22 @@
   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
 
 #if CONFIG_FLEX_MVRES
+#if CONFIG_SEP_COMP_DRL
+  int_mv dv_ref = av1_find_best_ref_mv_from_stack(mbmi_ext, mbmi, ref_frame,
+#else
   int_mv dv_ref = av1_find_best_ref_mv_from_stack(mbmi_ext, ref_frame,
+#endif
                                                   mbmi->pb_mv_precision);
 #else
   int_mv dv_ref = av1_find_best_ref_mv_from_stack(
       /*allow_hp=*/0, mbmi_ext, ref_frame, /*is_integer=*/0);
 #endif
   dv_ref.as_int = dv_ref.as_int == INVALID_MV ? 0 : dv_ref.as_int;
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   if (mbmi_ext->ref_mv_count[INTRA_FRAME] == 0) {
     dv_ref.as_int = 0;
   }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
   if (dv_ref.as_int == 0) {
     av1_find_ref_dv(&dv_ref, tile, cm->mib_size, mi_row);
   }
@@ -5046,7 +6266,7 @@
 #if CONFIG_FLEX_MVRES
   av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
                                      &dv_ref.as_mv, mbmi->pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                      is_ibc_cost,
 #endif
                                      lookahead_search_sites,
@@ -5056,7 +6276,7 @@
                                      &dv_ref.as_mv, lookahead_search_sites,
                                      /*fine_search_interval=*/0);
 #endif
-#if CONFIG_BVCOST_UPDATE && !CONFIG_FLEX_MVRES
+#if CONFIG_IBC_BV_IMPROVEMENT && !CONFIG_FLEX_MVRES
   // The costs for block vector are stored in x->dv_costs. Assign the costs
   // to mv_cost_params for motion search.
   fullms_params.mv_cost_params.mvjcost = x->dv_costs.joint_mv;
@@ -5064,7 +6284,7 @@
       (int *)&x->dv_costs.mv_component[0][MV_MAX];
   fullms_params.mv_cost_params.mvcost[1] =
       (int *)&x->dv_costs.mv_component[1][MV_MAX];
-#endif  // CONFIG_BVCOST_UPDATE
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
   fullms_params.is_intra_mode = 1;
 #if CONFIG_IBC_SR_EXT
@@ -5074,17 +6294,20 @@
   fullms_params.mi_col = mi_col;
   fullms_params.mi_row = mi_row;
 #endif  // CONFIG_IBC_SR_EXT
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   fullms_params.x = x;
   fullms_params.cm = cm;
   fullms_params.ref_bv_cnt = mbmi_ext->ref_mv_count[INTRA_FRAME];
   mbmi->intrabc_mode = 0;
   mbmi->intrabc_drl_idx = 0;
   mbmi->ref_bv.as_int = 0;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 #if CONFIG_WARP_REF_LIST
   mbmi->warp_ref_idx = 0;
   mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
   mbmi->motion_mode = SIMPLE_TRANSLATION;
   for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
@@ -5166,7 +6389,7 @@
     assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
     assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
     FULLPEL_MOTION_SEARCH_PARAMS fullms_params_init = fullms_params;
     int best_ref_bv_cost = INT_MAX;
     int_mv best_bv;
@@ -5190,7 +6413,7 @@
     mbmi->ref_bv = dv_ref;
     int best_intrabc_drl_idx = mbmi->intrabc_drl_idx;
     int best_intrabc_mode = mbmi->intrabc_mode;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
     av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv
 
@@ -5212,7 +6435,7 @@
 
     int bestsme = av1_full_pixel_search(start_mv, &fullms_params, step_param,
                                         NULL, &best_mv.as_fullmv, NULL);
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
     if (bestsme != INT_MAX && is_bv_valid(&best_mv.as_fullmv, cm, xd, mi_row,
                                           mi_col, bsize, fullms_params)) {
       int cur_ref_bv_cost = bestsme;
@@ -5244,12 +6467,12 @@
         best_bv.as_mv = cur_bv.as_mv;
       }
     }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
     const int hashsme = av1_intrabc_hash_search(
         cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
     if (hashsme != INT_MAX &&
         is_bv_valid(&best_hash_mv.as_fullmv, cm, xd, mi_row, mi_col, bsize,
                     fullms_params)) {
@@ -5299,7 +6522,7 @@
       continue;
     if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize, cm->mib_size_log2))
       continue;
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
     // DV should not have sub-pel.
     assert((dv.col & 7) == 0);
@@ -5322,10 +6545,16 @@
     mbmi->mv[0].as_mv = dv;
     mbmi->interp_fltr = BILINEAR;
     mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0;
+#if CONFIG_CWP
+    mbmi->cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
 
 #if CONFIG_WARP_REF_LIST
     mbmi->warp_ref_idx = 0;
     mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+    mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
 
     mbmi->motion_mode = SIMPLE_TRANSLATION;
@@ -5336,7 +6565,7 @@
 #if CONFIG_FLEX_MVRES
     const IntraBCMvCosts *const dv_costs = &x->dv_costs;
 #else
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
     const IntraBCMVCosts *const dv_costs = &x->dv_costs;
 #else
     const IntraBCMVCosts *const dv_costs = &cpi->dv_costs;
@@ -5345,7 +6574,7 @@
                        (int *)&dv_costs->mv_component[1][MV_MAX] };
 #endif
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
     int rate_mv = 0;
     if (!mbmi->intrabc_mode)
 #if CONFIG_FLEX_MVRES
@@ -5383,7 +6612,7 @@
 #else
     const int rate_mode = x->mode_costs.intrabc_cost[1];
 #endif  // CONFIG_NEW_CONTEXT_MODELING
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
     RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
     if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
@@ -5405,13 +6634,13 @@
     }
   }
   *mbmi = best_mbmi;
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   if (mbmi->use_intrabc[xd->tree_type == CHROMA_PART]) {
     mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = mbmi->ref_bv;
   } else {
     mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv.as_int = 0;
   }
-#endif  // CONFIG_BVP_IMPROVEMENT
+#endif  // CONFIG_IBC_BV_IMPROVEMENT
 
   *rd_stats = best_rdstats;
   memcpy(txfm_info->blk_skip, best_blk_skip,
@@ -5483,9 +6712,13 @@
     }
 
     // Intra block is always coded as non-skip
+#if CONFIG_SKIP_TXFM_OPT
+    rd_cost->rate = rate_y + rate_uv;
+#else
     rd_cost->rate =
         rate_y + rate_uv +
         x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
+#endif  // CONFIG_SKIP_TXFM_OPT
     rd_cost->dist = dist_y + dist_uv;
     rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
     rd_cost->skip_txfm = 0;
@@ -5507,9 +6740,12 @@
   if (xd->tree_type != CHROMA_PART)
     av1_copy_mbmi_ext_to_mbmi_ext_frame(
         &ctx->mbmi_ext_best, x->mbmi_ext,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SEP_COMP_DRL
+        xd->mi[0],
+#endif
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         mbmi->skip_mode,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
         av1_ref_frame_type(xd->mi[0]->ref_frame));
   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
 }
@@ -5551,8 +6787,12 @@
   const MV_REFERENCE_FRAME second_ref_frame = skip_mode_info->ref_frame_idx_1;
 
 #if CONFIG_OPTFLOW_REFINEMENT
-  const PREDICTION_MODE this_mode =
-      cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV;
+  const PREDICTION_MODE this_mode = cm->features.opfl_refine_type
+#if CONFIG_CWP
+                                            && !cm->features.enable_cwp
+#endif  // CONFIG_CWP
+                                        ? NEAR_NEARMV_OPTFLOW
+                                        : NEAR_NEARMV;
 #else
   const PREDICTION_MODE this_mode = NEAR_NEARMV;
 #endif  // CONFIG_OPTFLOW_REFINEMENT
@@ -5564,19 +6804,34 @@
   }
 
   mbmi->mode = this_mode;
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = 0;
+  mbmi->ref_mv_idx[1] = 0;
+#else
   mbmi->ref_mv_idx = 0;
+#endif
   mbmi->uv_mode = UV_DC_PRED;
   mbmi->ref_frame[0] = ref_frame;
   mbmi->ref_frame[1] = second_ref_frame;
+#if CONFIG_CWP
+  mbmi->cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
 #if CONFIG_IBC_SR_EXT
   mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = 0;
 #endif  // CONFIG_IBC_SR_EXT
 #if CONFIG_WARP_REF_LIST
   mbmi->warp_ref_idx = 0;
   mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
 
-#if !CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
+#if !CONFIG_SKIP_MODE_ENHANCEMENT
   const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
   if (x->mbmi_ext->ref_mv_count[ref_frame_type] == UINT8_MAX) {
     if (x->mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
@@ -5600,25 +6855,35 @@
     // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
     av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
   }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
 #if CONFIG_OPTFLOW_REFINEMENT
+#if CONFIG_CWP
+  assert(this_mode == (cm->features.opfl_refine_type && !cm->features.enable_cwp
+                           ? NEAR_NEARMV_OPTFLOW
+                           : NEAR_NEARMV));
+  assert(mbmi->mode ==
+         (cm->features.opfl_refine_type && !cm->features.enable_cwp
+              ? NEAR_NEARMV_OPTFLOW
+              : NEAR_NEARMV));
+#else   // CONFIG_CWP
   assert(this_mode ==
          (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV));
   assert(mbmi->mode ==
          (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV));
+#endif  // CONFIG_CWP
 #else
   assert(this_mode == NEAR_NEARMV);
   assert(mbmi->mode == NEAR_NEARMV);
 #endif
 
-  assert(mbmi->ref_mv_idx == 0);
-#if !CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if !CONFIG_SKIP_MODE_ENHANCEMENT
   if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
     assert(av1_check_newmv_joint_nonzero(cm, x));
     return;
   }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+
   mbmi->fsc_mode[xd->tree_type == CHROMA_PART] = 0;
 #if CONFIG_BAWP
   mbmi->bawp_flag = 0;
@@ -5629,7 +6894,12 @@
   mbmi->comp_group_idx = 0;
   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
   mbmi->motion_mode = SIMPLE_TRANSLATION;
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = 0;
+  mbmi->ref_mv_idx[1] = 0;
+#else
   mbmi->ref_mv_idx = 0;
+#endif  // CONFIG_SEP_COMP_DRL
   mbmi->skip_mode = mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 1;
 
 #if CONFIG_FLEX_MVRES
@@ -5642,6 +6912,9 @@
 #if CONFIG_WARP_REF_LIST
   mbmi->warp_ref_idx = 0;
   mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
 
   set_default_interp_filters(mbmi,
@@ -5650,7 +6923,7 @@
 #endif  // CONFIG_OPTFLOW_REFINEMENT
                              cm->features.interp_filter);
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   const int mi_row = xd->mi_row;
   const int mi_col = xd->mi_col;
 
@@ -5690,17 +6963,54 @@
   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
 
+#if CONFIG_REFINEMV
+  mbmi->mode = this_mode;
+#endif  // CONFIG_REFINEMV
   // loop of ref_mv_idx
+#if CONFIG_SEP_COMP_DRL
+  assert(!has_second_drl(mbmi));
+  int ref_set = get_drl_refmv_count(cm->features.max_drl_bits, x,
+                                    mbmi->ref_frame, this_mode, 0);
+#else
   int ref_set = get_drl_refmv_count(cm->features.max_drl_bits, x,
                                     mbmi->ref_frame, this_mode);
+#endif
 
   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
+#if CONFIG_SEP_COMP_DRL
+    mbmi->ref_mv_idx[0] = ref_mv_idx;
+    mbmi->ref_frame[0] =
+        xd->skip_mvp_candidate_list.ref_frame0[mbmi->ref_mv_idx[0]];
+    mbmi->ref_frame[1] =
+        xd->skip_mvp_candidate_list.ref_frame1[mbmi->ref_mv_idx[0]];
+#else
     mbmi->ref_mv_idx = ref_mv_idx;
 
     mbmi->ref_frame[0] =
         xd->skip_mvp_candidate_list.ref_frame0[mbmi->ref_mv_idx];
     mbmi->ref_frame[1] =
         xd->skip_mvp_candidate_list.ref_frame1[mbmi->ref_mv_idx];
+#endif
+
+#if CONFIG_CWP
+    // Infer the index of compound weighted prediction from DRL list
+    mbmi->cwp_idx =
+#if CONFIG_SEP_COMP_DRL
+        xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx[0]].cwp_idx;
+#else
+        xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].cwp_idx;
+#endif
+#endif  // CONFIG_CWP
+
+#if CONFIG_REFINEMV
+    mbmi->refinemv_flag = (
+#if CONFIG_CWP
+                              mbmi->cwp_idx == CWP_EQUAL &&
+#endif
+                              is_refinemv_allowed_skip_mode(cm, mbmi))
+                              ? 1
+                              : 0;
+#endif  // CONFIG_REFINEMV
 
     if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
       assert(av1_check_newmv_joint_nonzero(cm, x));
@@ -5723,7 +7033,7 @@
       orig_dst.plane[i] = xd->plane[i].dst.buf;
       orig_dst.stride[i] = xd->plane[i].dst.stride;
     }
-#else  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#else  // CONFIG_SKIP_MODE_ENHANCEMENT
   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   for (int i = 0; i < num_planes; i++) {
     xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
@@ -5748,6 +7058,7 @@
     av1_rd_cost_update(x->rdmult, best_rd_cost);
     search_state->best_rd = best_rd_cost->rdcost;
   }
+
   // loop of ref_mv_idx
   const int ref_set = get_drl_refmv_count(cm->features.max_drl_bits, x,
                                           mbmi->ref_frame, this_mode);
@@ -5763,7 +7074,7 @@
       assert(av1_check_newmv_joint_nonzero(cm, x));
       continue;
     }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
                                   av1_num_planes(cm) - 1);
@@ -5776,12 +7087,14 @@
     skip_mode_rd_stats.rate = mode_costs->skip_mode_cost[skip_mode_ctx][1];
 
     // add ref_mv_idx rate
+    // MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+    // add ref_mv_idx rate
     const int drl_cost =
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
         get_skip_drl_cost(cpi->common.features.max_drl_bits, mbmi, x);
 #else
         get_drl_cost(cpi->common.features.max_drl_bits, mbmi, mbmi_ext, x);
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
     skip_mode_rd_stats.rate += drl_cost;
 
     // Do transform search
@@ -5807,17 +7120,22 @@
 
       search_state->best_mbmode.fsc_mode[xd->tree_type == CHROMA_PART] = 0;
 
-#if CONFIG_OPTFLOW_REFINEMENT
-      search_state->best_mbmode.mode =
-          (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV);
-#else
-      search_state->best_mbmode.mode = NEAR_NEARMV;
-#endif  // CONFIG_OPTFLOW_REFINEMENT
+      search_state->best_mbmode.mode = (cm->features.opfl_refine_type
+#if CONFIG_CWP
+                                                && !cm->features.enable_cwp
+#endif  // CONFIG_CWP
+                                            ? NEAR_NEARMV_OPTFLOW
+                                            : NEAR_NEARMV);
       search_state->best_mbmode.ref_frame[0] = mbmi->ref_frame[0];
       search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1];
       search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int;
       search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int;
+#if CONFIG_SEP_COMP_DRL
+      search_state->best_mbmode.ref_mv_idx[0] = mbmi->ref_mv_idx[0];
+      search_state->best_mbmode.ref_mv_idx[1] = mbmi->ref_mv_idx[1];
+#else
       search_state->best_mbmode.ref_mv_idx = mbmi->ref_mv_idx;
+#endif
 
       // Set up tx_size related variables for skip-specific loop filtering.
       if (search_state->best_mbmode.skip_txfm[xd->tree_type == CHROMA_PART]) {
@@ -5837,6 +7155,10 @@
         x->txfm_search_info.skip_txfm = 1;
         search_state->best_mode_skippable = 1;
         search_state->best_skip2 = 1;
+#if CONFIG_SKIP_TXFM_OPT
+        search_state->best_rate_y =
+            x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][1];
+#endif  // CONFIG_SKIP_TXFM_OPT
 
         restore_dst_buf(xd, orig_dst, num_planes);
       } else {
@@ -5873,6 +7195,9 @@
                                  cm,
 #endif  // CONFIG_OPTFLOW_REFINEMENT
                                  cm->features.interp_filter);
+#if CONFIG_REFINEMV
+      search_state->best_mbmode.refinemv_flag = mbmi->refinemv_flag;
+#endif  // CONFIG_REFINEMV
 
       // Update rd_cost
       best_rd_cost->rate = skip_mode_rd_stats.rate;
@@ -5916,7 +7241,12 @@
   }
 
   mbmi->mode = this_mode;
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = 0;
+  mbmi->ref_mv_idx[1] = 0;
+#else
   mbmi->ref_mv_idx = 0;
+#endif  // CONFIG_SEP_COMP_DRL
   mbmi->uv_mode = UV_DC_PRED;
   mbmi->ref_frame[0] = ref_frame;
   mbmi->ref_frame[1] = second_ref_frame;
@@ -5960,7 +7290,12 @@
   mbmi->comp_group_idx = 0;
   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
   mbmi->motion_mode = SIMPLE_TRANSLATION;
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = 0;
+  mbmi->ref_mv_idx[1] = 0;
+#else
   mbmi->ref_mv_idx = 0;
+#endif  // CONFIG_SEP_COMP_DRL
   mbmi->skip_mode = mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 1;
 
   set_default_interp_filters(mbmi,
@@ -5972,6 +7307,10 @@
   set_mv_precision(mbmi, mbmi->max_mv_precision);
 #endif
 
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   for (int i = 0; i < num_planes; i++) {
     xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
@@ -6011,7 +7350,12 @@
     search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1];
     search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int;
     search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int;
+#if CONFIG_SEP_COMP_DRL
+    search_state->best_mbmode.ref_mv_idx[0] = 0;
+    search_state->best_mbmode.ref_mv_idx[1] = 0;
+#else
     search_state->best_mbmode.ref_mv_idx = 0;
+#endif
 
 #if CONFIG_FLEX_MVRES
     search_state->best_mbmode.pb_mv_precision = mbmi->max_mv_precision;
@@ -6179,6 +7523,11 @@
         av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
                                           INT64_MAX);
       }
+#if CONFIG_ATC_DCTX_ALIGNED
+      // Occasionally TX search will be unable to find a best mode decision.
+      // This case needs to be skipped to avoid integer overflows.
+      if (rd_stats_y.rate == INT_MAX) continue;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
       if (num_planes > 1) {
         av1_txfm_uvrd(cpi, x, &rd_stats_uv, INT64_MAX);
@@ -6201,7 +7550,13 @@
         rd_stats_uv.dist = rd_stats_uv.sse;
       } else {
         skip_blk = 0;
+#if CONFIG_SKIP_TXFM_OPT
+        rd_stats_y.rate += is_inter_block(mbmi, xd->tree_type)
+                               ? mode_costs->skip_txfm_cost[skip_ctx][0]
+                               : 0;
+#else
         rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
+#endif  // CONFIG_SKIP_TXFM_OPT
       }
       int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
                       winner_rate_y - winner_rate_uv;
@@ -6676,6 +8031,7 @@
   av1_zero(search_state->single_newmv);
   av1_zero(search_state->single_newmv_rate);
   av1_zero(search_state->single_newmv_valid);
+
   for (int i = 0; i < MB_MODE_COUNT; ++i) {
     for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
       for (int ref_frame = 0; ref_frame < SINGLE_REF_FRAMES; ++ref_frame) {
@@ -6991,7 +8347,12 @@
 ) {
 
   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = 0;
+  mbmi->ref_mv_idx[1] = 0;
+#else
   mbmi->ref_mv_idx = 0;
+#endif
   mbmi->mode = curr_mode;
   mbmi->uv_mode = UV_DC_PRED;
   mbmi->ref_frame[0] = ref_frames[0];
@@ -7000,8 +8361,14 @@
   pmi->palette_size[1] = 0;
   mbmi->filter_intra_mode_info.use_filter_intra = 0;
   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
+#if CONFIG_CWP
+  mbmi->cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
   mbmi->motion_mode = SIMPLE_TRANSLATION;
   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
   set_default_interp_filters(mbmi,
 #if CONFIG_OPTFLOW_REFINEMENT
                              cm,
@@ -7020,10 +8387,16 @@
 #if CONFIG_WARP_REF_LIST
   mbmi->warp_ref_idx = 0;
   mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
 #if CONFIG_BAWP
   mbmi->bawp_flag = 0;
 #endif
+#if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
+  mbmi->jmvd_scale_mode = 0;
+#endif  // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD
 }
 
 #if CONFIG_C071_SUBBLK_WARPMV
@@ -7045,8 +8418,14 @@
   const MV_REFERENCE_FRAME ref_frame = COMPACT_INDEX0_NRS(mbmi->ref_frame[0]);
   const int dir = get_dir_rank(cm, mbmi->ref_frame[0], NULL);
   const int mode_offset = INTER_OFFSET(this_mode);
+#if CONFIG_SEP_COMP_DRL
+  const int ref_set = get_drl_refmv_count(features->max_drl_bits, x,
+                                          mbmi->ref_frame, this_mode, 0);
+  assert(!has_second_drl(mbmi));
+#else
   const int ref_set = get_drl_refmv_count(features->max_drl_bits, x,
                                           mbmi->ref_frame, this_mode);
+#endif
 
   // Simple rd
   int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
@@ -7233,13 +8612,19 @@
       }
     }
   }
-
+#if !CONFIG_SEP_COMP_DRL
   const int ref_set = get_drl_refmv_count(cpi->common.features.max_drl_bits, x,
                                           refs, this_mode);
+#endif
   for (i = 0; i < 2; ++i) {
     if (!ref_searched[i] || (mode[i] != NEARMV)) {
       continue;
     }
+#if CONFIG_SEP_COMP_DRL
+    const int ref_set = get_drl_refmv_count(cpi->common.features.max_drl_bits,
+                                            x, refs, this_mode, i);
+#endif
+
     const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
     for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
       int_mv single_mv;
@@ -7404,8 +8789,16 @@
   if (txfm_search_done) {
     search_state->best_rate_y =
         new_best_rd_stats_y->rate +
+#if CONFIG_SKIP_TXFM_OPT
+        (mode_is_intra
+             ? 0
+             : (x->mode_costs
+                    .skip_txfm_cost[skip_ctx][new_best_rd_stats->skip_txfm ||
+                                              skip_txfm]));
+#else
         x->mode_costs.skip_txfm_cost[skip_ctx]
                                     [new_best_rd_stats->skip_txfm || skip_txfm];
+#endif  // CONFIG_SKIP_TXFM_OPT
     search_state->best_rate_uv = new_best_rd_stats_uv->rate;
   }
   memcpy(ctx->blk_skip, txfm_info->blk_skip,
@@ -7565,14 +8958,14 @@
   }
 #endif  // CONFIG_TIP
 
-#if CONFIG_WARPMV
-  if (this_mode == WARPMV) return 0;
-#endif  // CONFIG_WARPMV
-
   // Check if this mode should be skipped because it is incompatible with the
   // current frame
   if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
     return 1;
+
+#if CONFIG_WARPMV
+  if (this_mode == WARPMV) return 0;
+#endif
   const int ret = inter_mode_search_order_independent_skip(
       cpi, x, args->mode_skip_mask, args->search_state,
       args->skip_ref_frame_mask, this_mode, ref_frames);
@@ -7888,22 +9281,34 @@
     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
   };
 
-  HandleInterModeArgs args = { { NULL },
-                               { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
-                               { NULL },
-                               { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
-                                 MAX_SB_SIZE >> 1 },
-                               NULL,
-                               NULL,
-                               NULL,
-                               search_state.modelled_rd,
-                               INT_MAX,
-                               INT_MAX,
-                               search_state.simple_rd,
-                               0,
-                               interintra_modes,
-                               { { 0, { { 0 } }, { 0 }, 0, 0, 0 } },
-                               0 };
+#if CONFIG_SKIP_ME_FOR_OPFL_MODES
+  int_mv comp_newmv[MODE_CTX_REF_FRAMES][4][NUM_MV_PRECISIONS][2];
+  int comp_newmv_valid[MODE_CTX_REF_FRAMES][4][NUM_MV_PRECISIONS];
+  av1_zero(comp_newmv_valid);
+#endif  // CONFIG_SKIP_ME_FOR_OPFL_MODES
+
+  HandleInterModeArgs args = {
+    { NULL },
+    { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
+    { NULL },
+    { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 },
+    NULL,
+    NULL,
+    NULL,
+    search_state.modelled_rd,
+    INT_MAX,
+    INT_MAX,
+    search_state.simple_rd,
+    0,
+    interintra_modes,
+    { { 0, { { 0 } }, { 0 }, 0, 0, 0 } },
+    0
+#if CONFIG_SKIP_ME_FOR_OPFL_MODES
+    ,
+    comp_newmv,
+    comp_newmv_valid
+#endif  // CONFIG_SKIP_ME_FOR_OPFL_MODES
+  };
 
   // Indicates the appropriate number of simple translation winner modes for
   // exhaustive motion mode evaluation
@@ -8013,6 +9418,13 @@
   mbmi->bawp_flag = 0;
 #endif
 
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
+
+#if CONFIG_SEP_COMP_DRL
+  mbmi->mode = NEARMV;
+#endif
   // init params, set frame modes, speed features
   set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
                                 skip_ref_frame_mask, ref_costs_single,
@@ -8289,10 +9701,18 @@
         mbmi->angle_delta[PLANE_TYPE_Y] = 0;
         mbmi->angle_delta[PLANE_TYPE_UV] = 0;
         mbmi->filter_intra_mode_info.use_filter_intra = 0;
+#if CONFIG_SEP_COMP_DRL
+        mbmi->ref_mv_idx[0] = 0;
+        mbmi->ref_mv_idx[1] = 0;
+#else
         mbmi->ref_mv_idx = 0;
+#endif
 #if CONFIG_WARP_REF_LIST
         mbmi->warp_ref_idx = 0;
         mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+        mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
         const int64_t ref_best_rd = search_state.best_rd;
         RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
@@ -8601,23 +10021,24 @@
   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
 
   // Only try palette mode when the best mode so far is an intra mode.
-  int try_palette = cpi->oxcf.tool_cfg.enable_palette &&
-                    av1_allow_palette(features->allow_screen_content_tools,
-                                      mbmi->sb_type[PLANE_TYPE_Y]) &&
-                    !is_inter_mode(search_state.best_mbmode.mode) &&
-                    rd_cost->rate < INT_MAX;
+  const int try_palette =
+      cpi->oxcf.tool_cfg.enable_palette &&
+      av1_allow_palette(features->allow_screen_content_tools,
+                        mbmi->sb_type[PLANE_TYPE_Y]) &&
+      !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate < INT_MAX;
+  int search_palette_mode = try_palette;
 #if CONFIG_EXT_RECUR_PARTITIONS
   const MB_MODE_INFO *cached_mode = x->inter_mode_cache;
   if (should_reuse_mode(x, REUSE_INTRA_MODE_IN_INTERFRAME_FLAG) &&
       cached_mode &&
       !(cached_mode->mode == DC_PRED &&
         cached_mode->palette_mode_info.palette_size[0] > 0)) {
-    try_palette = 0;
+    search_palette_mode = 0;
   }
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
   RD_STATS this_rd_cost;
   int this_skippable = 0;
-  if (try_palette) {
+  if (search_palette_mode) {
     this_skippable = av1_search_palette_mode(
         &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
         ctx, &this_rd_cost, search_state.best_rd);
@@ -8670,6 +10091,9 @@
 #if CONFIG_WARP_REF_LIST
       mbmi->warp_ref_idx = 0;
       mbmi->max_num_warp_candidates = 0;
+#if CONFIG_CWG_D067_IMPROVED_WARP
+      mbmi->warpmv_with_mvd_flag = 0;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 #endif  // CONFIG_WARP_REF_LIST
       rd_pick_intrabc_mode_sb(cpi, x, ctx, &this_rd_cost, bsize, INT64_MAX);
 
@@ -8697,13 +10121,23 @@
 
   // Make sure that the ref_mv_idx is only nonzero when we're
   // using a mode which can support ref_mv_idx
+#if CONFIG_SEP_COMP_DRL
+  if ((search_state.best_mbmode.ref_mv_idx[0] != 0 ||
+       search_state.best_mbmode.ref_mv_idx[1] != 0) &&
+#else
   if (search_state.best_mbmode.ref_mv_idx != 0 &&
+#endif
       !(have_newmv_in_each_reference(search_state.best_mbmode.mode) ||
 #if CONFIG_JOINT_MVD
         is_joint_mvd_coding_mode(search_state.best_mbmode.mode) ||
 #endif  // CONFIG_JOINT_MVD
         have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
+#if CONFIG_SEP_COMP_DRL
+    search_state.best_mbmode.ref_mv_idx[0] = 0;
+    search_state.best_mbmode.ref_mv_idx[1] = 0;
+#else
     search_state.best_mbmode.ref_mv_idx = 0;
+#endif
   }
 
   if (search_state.best_mbmode.mode == MODE_INVALID ||
@@ -8883,7 +10317,15 @@
   mbmi->tx_size = max_txsize_lookup[bsize];
   x->txfm_search_info.skip_txfm = 1;
 
+#if CONFIG_SEP_COMP_DRL
+  mbmi->ref_mv_idx[0] = 0;
+  mbmi->ref_mv_idx[1] = 0;
+#else
   mbmi->ref_mv_idx = 0;
+#endif  // CONFIG_SEP_COMP_DRL
+#if CONFIG_CWP
+  mbmi->cwp_idx = CWP_EQUAL;
+#endif  // CONFIG_CWP
 
   mbmi->motion_mode = SIMPLE_TRANSLATION;
 #if CONFIG_FLEX_MVRES
@@ -8896,6 +10338,9 @@
 #if CONFIG_BAWP
   mbmi->bawp_flag = 0;
 #endif
+#if CONFIG_REFINEMV
+  mbmi->refinemv_flag = 0;
+#endif  // CONFIG_REFINEMV
 
   av1_count_overlappable_neighbors(cm, xd);
   if (is_motion_variation_allowed_bsize(bsize, mi_row, mi_col) &&
@@ -9119,9 +10564,9 @@
         AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
                                                    above_stride, overlap };
-    foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
-                                  max_neighbor_obmc[mi_size_wide_log2[bsize]],
-                                  calc_target_weighted_pred_above, &ctxt);
+    foreach_overlappable_nb_above(
+        cm, (MACROBLOCKD *)xd, max_neighbor_obmc[mi_size_wide_log2[bsize]],
+        calc_target_weighted_pred_above, &ctxt, false);
   }
 
   for (int i = 0; i < bw * bh; ++i) {
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h
index 17b043f..de2167f 100644
--- a/av1/encoder/rdopt.h
+++ b/av1/encoder/rdopt.h
@@ -177,17 +177,38 @@
 static INLINE void av1_copy_usable_ref_mv_stack_and_weight(
     const MACROBLOCKD *xd, MB_MODE_INFO_EXT *const mbmi_ext,
     MV_REFERENCE_FRAME ref_frame) {
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   if (xd->mi[0]->skip_mode) {
     memcpy(&(mbmi_ext->skip_mvp_candidate_list), &(xd->skip_mvp_candidate_list),
            sizeof(xd->skip_mvp_candidate_list));
     return;
   }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
+#if CONFIG_SEP_COMP_DRL
+  if (has_second_drl(xd->mi[0])) {
+    MV_REFERENCE_FRAME rf[2];
+    av1_set_ref_frame(rf, ref_frame);
+    if (rf[1] < 0) rf[1] = 0;
+    memcpy(mbmi_ext->weight[rf[0]], xd->weight[rf[0]],
+           USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0]));
+    memcpy(mbmi_ext->ref_mv_stack[rf[0]], xd->ref_mv_stack[rf[0]],
+           USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0]));
+    memcpy(mbmi_ext->weight[rf[1]], xd->weight[rf[1]],
+           USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0]));
+    memcpy(mbmi_ext->ref_mv_stack[rf[1]], xd->ref_mv_stack[rf[1]],
+           USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0]));
+  } else {
+    memcpy(mbmi_ext->weight[ref_frame], xd->weight[ref_frame],
+           USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0]));
+    memcpy(mbmi_ext->ref_mv_stack[ref_frame], xd->ref_mv_stack[ref_frame],
+           USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0]));
+  }
+#else
   memcpy(mbmi_ext->weight[ref_frame], xd->weight[ref_frame],
          USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0]));
   memcpy(mbmi_ext->ref_mv_stack[ref_frame], xd->ref_mv_stack[ref_frame],
          USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0]));
+#endif  // CONFIG_SEP_COMP_DRL
 }
 
 #define PRUNE_SINGLE_REFS 0
@@ -301,26 +322,52 @@
 static INLINE void av1_copy_mbmi_ext_to_mbmi_ext_frame(
     MB_MODE_INFO_EXT_FRAME *mbmi_ext_best,
     const MB_MODE_INFO_EXT *const mbmi_ext,
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SEP_COMP_DRL
+    MB_MODE_INFO *mbmi,
+#endif  // CONFIG_SEP_COMP_DRL
+#if CONFIG_SKIP_MODE_ENHANCEMENT
     uint8_t skip_mode,
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
     uint8_t ref_frame_type) {
 
-#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#if CONFIG_SKIP_MODE_ENHANCEMENT
   if (skip_mode) {
     memcpy(&(mbmi_ext_best->skip_mvp_candidate_list),
            &(mbmi_ext->skip_mvp_candidate_list),
            sizeof(mbmi_ext->skip_mvp_candidate_list));
     return;
   }
-#endif  // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX
+#endif  // CONFIG_SKIP_MODE_ENHANCEMENT
 
+#if CONFIG_SEP_COMP_DRL
+  MV_REFERENCE_FRAME rf[2];
+  av1_set_ref_frame(rf, ref_frame_type);
+  if (!has_second_drl(mbmi))
+    rf[0] = ref_frame_type;  //????????????? need to know how encoder work,
+                             // whether the mode has been set
+  memcpy(mbmi_ext_best->ref_mv_stack[0], mbmi_ext->ref_mv_stack[rf[0]],
+         sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
+  memcpy(mbmi_ext_best->weight[0], mbmi_ext->weight[rf[0]],
+         sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
+  mbmi_ext_best->ref_mv_count[0] = mbmi_ext->ref_mv_count[rf[0]];
+
+  if (has_second_drl(mbmi)) {
+    assert(rf[0] == mbmi->ref_frame[0]);
+    assert(rf[1] == mbmi->ref_frame[1]);
+    memcpy(mbmi_ext_best->ref_mv_stack[1], mbmi_ext->ref_mv_stack[rf[1]],
+           sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
+    memcpy(mbmi_ext_best->weight[1], mbmi_ext->weight[rf[1]],
+           sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
+    mbmi_ext_best->ref_mv_count[1] = mbmi_ext->ref_mv_count[rf[1]];
+  }
+#else
   memcpy(mbmi_ext_best->ref_mv_stack, mbmi_ext->ref_mv_stack[ref_frame_type],
          sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE]));
   memcpy(mbmi_ext_best->weight, mbmi_ext->weight[ref_frame_type],
          sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE]));
-  mbmi_ext_best->mode_context = mbmi_ext->mode_context[ref_frame_type];
   mbmi_ext_best->ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
+#endif  // CONFIG_SEP_COMP_DRL
+  mbmi_ext_best->mode_context = mbmi_ext->mode_context[ref_frame_type];
   memcpy(mbmi_ext_best->global_mvs, mbmi_ext->global_mvs,
          sizeof(mbmi_ext->global_mvs));
 
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h
index de13e91..b3861c8 100644
--- a/av1/encoder/rdopt_utils.h
+++ b/av1/encoder/rdopt_utils.h
@@ -459,10 +459,19 @@
 
     winner_mode_stats[mode_idx].rd_cost = *rd_cost;
     if (txfm_search_done) {
+#if CONFIG_SKIP_TXFM_OPT
+      winner_mode_stats[mode_idx].rate_y =
+          rd_cost_y->rate +
+          (!is_intra_mode
+               ? x->mode_costs
+                     .skip_txfm_cost[skip_ctx][rd_cost->skip_txfm || skip_txfm]
+               : 0);
+#else
       winner_mode_stats[mode_idx].rate_y =
           rd_cost_y->rate +
           x->mode_costs
               .skip_txfm_cost[skip_ctx][rd_cost->skip_txfm || skip_txfm];
+#endif  // CONFIG_SKIP_TXFM_OPT
       winner_mode_stats[mode_idx].rate_uv = rd_cost_uv->rate;
     }
   }
diff --git a/av1/encoder/reconinter_enc.c b/av1/encoder/reconinter_enc.c
index 32bb328..655fcec 100644
--- a/av1/encoder/reconinter_enc.c
+++ b/av1/encoder/reconinter_enc.c
@@ -38,6 +38,19 @@
                                    uint16_t **mc_buf, uint16_t **pre,
                                    SubpelParams *subpel_params,
                                    int *src_stride) {
+
+#if CONFIG_REFINEMV
+  if (inter_pred_params->use_ref_padding) {
+    common_calc_subpel_params_and_extend(
+        src_mv, inter_pred_params, xd, mi_x, mi_y, ref,
+#if CONFIG_OPTFLOW_REFINEMENT
+        use_optflow_refinement,
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+        mc_buf, pre, subpel_params, src_stride);
+    return;
+  }
+#endif  // CONFIG_REFINEMV
+
   // These are part of the function signature to use this function through a
   // function pointer. See typedef of 'CalcSubpelParamsFunc'.
   (void)xd;
@@ -87,12 +100,47 @@
     subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK;
     subpel_params->xs = sf->x_step_q4;
     subpel_params->ys = sf->y_step_q4;
+
+#if CONFIG_D071_IMP_MSK_BLD
+    if (inter_pred_params->border_data.enable_bacp) {
+      // Get reference block top left coordinate.
+      subpel_params->x0 = pos_x >> SCALE_SUBPEL_BITS;
+      subpel_params->y0 = pos_y >> SCALE_SUBPEL_BITS;
+      // Get reference block bottom right coordinate.
+      subpel_params->x1 =
+          ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >>
+           SCALE_SUBPEL_BITS) +
+          1;
+      subpel_params->y1 = ((pos_y + (inter_pred_params->block_height - 1) *
+                                        subpel_params->ys) >>
+                           SCALE_SUBPEL_BITS) +
+                          1;
+    }
+#endif  // CONFIG_D071_IMP_MSK_BLD
+
     *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride +
            (pos_x >> SCALE_SUBPEL_BITS);
 #if CONFIG_OPTFLOW_REFINEMENT || CONFIG_EXT_RECUR_PARTITIONS
   } else {
     int pos_x = inter_pred_params->pix_col << SUBPEL_BITS;
     int pos_y = inter_pred_params->pix_row << SUBPEL_BITS;
+
+#if CONFIG_REFINEMV
+#if CONFIG_OPTFLOW_REFINEMENT
+    const int bw = inter_pred_params->original_pu_width;
+    const int bh = inter_pred_params->original_pu_height;
+    const MV mv_q4 = clamp_mv_to_umv_border_sb(
+        xd, src_mv, bw, bh, use_optflow_refinement,
+        inter_pred_params->subsampling_x, inter_pred_params->subsampling_y);
+#else
+    const int bw = inter_pred_params->original_pu_width;
+    const int bh = inter_pred_params->original_pu_height;
+    const MV mv_q4 = clamp_mv_to_umv_border_sb(
+        xd, src_mv, bw, bh, inter_pred_params->subsampling_x,
+        inter_pred_params->subsampling_y);
+#endif  // CONFIG_OPTFLOW_REFINEMENT
+
+#else
 #if CONFIG_OPTFLOW_REFINEMENT
     const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width
                                           : inter_pred_params->block_width;
@@ -108,11 +156,25 @@
         xd, src_mv, bw, bh, inter_pred_params->subsampling_x,
         inter_pred_params->subsampling_y);
 #endif  // CONFIG_OPTFLOW_REFINEMENT
+#endif  // CONFIG_REFINEMV
+
     subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS;
     subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS;
     subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS;
     pos_x += mv_q4.col;
     pos_y += mv_q4.row;
+#if CONFIG_D071_IMP_MSK_BLD
+    if (inter_pred_params->border_data.enable_bacp) {
+      subpel_params->x0 = pos_x >> SUBPEL_BITS;
+      subpel_params->y0 = pos_y >> SUBPEL_BITS;
+
+      // Get reference block bottom right coordinate.
+      subpel_params->x1 =
+          (pos_x >> SUBPEL_BITS) + (inter_pred_params->block_width - 1) + 1;
+      subpel_params->y1 =
+          (pos_y >> SUBPEL_BITS) + (inter_pred_params->block_height - 1) + 1;
+    }
+#endif  // CONFIG_D071_IMP_MSK_BLD
     *pre = pre_buf->buf0 + (pos_y >> SUBPEL_BITS) * pre_buf->stride +
            (pos_x >> SUBPEL_BITS);
   }
@@ -128,18 +190,26 @@
       0 /* mi_y */, 0 /* ref */, NULL /* mc_buf */, enc_calc_subpel_params);
 }
 
-static void enc_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
-                                       int plane, MB_MODE_INFO *mi,
+void enc_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                int plane, MB_MODE_INFO *mi,
 #if CONFIG_BAWP
-                                       const BUFFER_SET *ctx,
+                                const BUFFER_SET *ctx,
 #endif
-                                       int bw, int bh, int mi_x, int mi_y) {
+#if CONFIG_REFINEMV
+                                int build_for_refine_mv_only,
+#endif  // CONFIG_REFINEMV
+                                int bw, int bh, int mi_x, int mi_y) {
   av1_build_inter_predictors(cm, xd, plane, mi,
 #if CONFIG_BAWP
                              ctx,
 #endif
+#if CONFIG_REFINEMV
+                             build_for_refine_mv_only,
+#endif  // CONFIG_REFINEMV
                              0 /* build_for_obmc */, bw, bh, mi_x, mi_y,
-                             NULL /* mc_buf */, enc_calc_subpel_params);
+                             NULL /* mc_buf */,
+
+                             enc_calc_subpel_params);
 }
 
 void av1_enc_build_inter_predictor_y(MACROBLOCKD *xd, int mi_row, int mi_col) {
@@ -168,17 +238,60 @@
                                    int mi_row, int mi_col,
                                    const BUFFER_SET *ctx, BLOCK_SIZE bsize,
                                    int plane_from, int plane_to) {
+#if CONFIG_REFINEMV
+  MB_MODE_INFO *mbmi = xd->mi[0];
+
+  int is_refinemv_supported =
+      mbmi->refinemv_flag && !is_intrabc_block(mbmi, xd->tree_type);
+
+  int need_chroma_dmvr = xd->is_chroma_ref &&
+                         (plane_from != 0 || plane_to != 0) &&
+                         is_refinemv_supported;
+  assert(IMPLIES(need_chroma_dmvr, !is_interintra_pred(mbmi)));
+
+  if (need_chroma_dmvr && default_refinemv_modes(mbmi))
+    need_chroma_dmvr &= (mbmi->comp_group_idx == 0 &&
+                         mbmi->interinter_comp.type == COMPOUND_AVERAGE);
+
+  if (need_chroma_dmvr) {
+    fill_subblock_refine_mv(xd->refinemv_subinfo, xd->plane[0].width,
+                            xd->plane[0].height, mbmi->mv[0].as_mv,
+                            mbmi->mv[1].as_mv);
+
+    // if luma build is not available, we need to get refinemv based on luma
+    // need to search DMVR here based on luma plane
+    if (plane_from != 0) {
+#if CONFIG_BAWP
+      enc_build_inter_predictors(cm, xd, 0, xd->mi[0], ctx, 1,
+                                 xd->plane[0].width, xd->plane[0].height,
+                                 mi_col * MI_SIZE, mi_row * MI_SIZE);
+#else
+      enc_build_inter_predictors(cm, xd, 0, xd->mi[0], 1, xd->plane[0].width,
+                                 xd->plane[0].height, mi_col * MI_SIZE,
+                                 mi_row * MI_SIZE);
+#endif
+    }
+  }
+#endif  // CONFIG_REFINEMV
+
   for (int plane = plane_from; plane <= plane_to; ++plane) {
     if (plane && !xd->is_chroma_ref) break;
     const int mi_x = mi_col * MI_SIZE;
     const int mi_y = mi_row * MI_SIZE;
 #if CONFIG_BAWP
     enc_build_inter_predictors(cm, xd, plane, xd->mi[0], ctx,
+#if CONFIG_REFINEMV
+                               0,
+#endif  // CONFIG_REFINEMV
                                xd->plane[plane].width, xd->plane[plane].height,
                                mi_x, mi_y);
 #else
-    enc_build_inter_predictors(cm, xd, plane, xd->mi[0], xd->plane[plane].width,
-                               xd->plane[plane].height, mi_x, mi_y);
+    enc_build_inter_predictors(cm, xd, plane, xd->mi[0],
+#if CONFIG_REFINEMV
+                               0,
+#endif  // CONFIG_REFINEMV
+                               xd->plane[plane].width, xd->plane[plane].height,
+                               mi_x, mi_y);
 #endif
 
     if (is_interintra_pred(xd->mi[0])) {
@@ -285,7 +398,7 @@
   BLOCK_SIZE bsize = xd->mi[0]->sb_type[PLANE_TYPE_Y];
   foreach_overlappable_nb_above(cm, xd,
                                 max_neighbor_obmc[mi_size_wide_log2[bsize]],
-                                build_obmc_prediction, &ctxt);
+                                build_obmc_prediction, &ctxt, false);
 }
 
 void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
diff --git a/av1/encoder/reconinter_enc.h b/av1/encoder/reconinter_enc.h
index 2309f16..5f3de5f 100644
--- a/av1/encoder/reconinter_enc.h
+++ b/av1/encoder/reconinter_enc.h
@@ -34,6 +34,16 @@
 
 void av1_enc_build_inter_predictor_y(MACROBLOCKD *xd, int mi_row, int mi_col);
 
+void enc_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
+                                int plane, MB_MODE_INFO *mi,
+#if CONFIG_BAWP
+                                const BUFFER_SET *ctx,
+#endif
+#if CONFIG_REFINEMV
+                                int build_for_refine_mv_only,
+#endif  // CONFIG_REFINEMV
+                                int bw, int bh, int mi_x, int mi_y);
+
 // Build one inter predictor. It is called for building predictor for single
 // reference case, or just the 1st or 2nd reference in compound reference case.
 // Can build both regular and masked predictors.
diff --git a/av1/encoder/segmentation.c b/av1/encoder/segmentation.c
index cabf66e..7da61f4 100644
--- a/av1/encoder/segmentation.c
+++ b/av1/encoder/segmentation.c
@@ -100,6 +100,9 @@
   const int bw = mi_size_wide[bsize], bh = mi_size_high[bsize];
   const int hbw = bw / 2, hbh = bh / 2;
   const int qbw = bw / 4, qbh = bh / 4;
+#if CONFIG_UNEVEN_4WAY
+  const int ebw = bw / 8, ebh = bh / 8;
+#endif  // CONFIG_UNEVEN_4WAY
 #else
   const int bs = mi_size_wide[bsize], hbs = bs / 2;
   const int qbs = bs / 4;
@@ -142,21 +145,55 @@
     case PARTITION_HORZ_3:
       CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]);
       CSEGS_RECURSIVE(qbh, 0, ptree->sub_tree[tree_idx++]);
-#if CONFIG_H_PARTITION
       CSEGS_RECURSIVE(qbh, hbw, ptree->sub_tree[tree_idx++]);
-#endif  // CONFIG_H_PARTITION
       if (mi_row + 3 * qbh < mi_params->mi_rows)
         CSEGS_RECURSIVE(3 * qbh, 0, ptree->sub_tree[tree_idx++]);
       break;
     case PARTITION_VERT_3:
       CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]);
       CSEGS_RECURSIVE(0, qbw, ptree->sub_tree[tree_idx++]);
-#if CONFIG_H_PARTITION
       CSEGS_RECURSIVE(hbh, qbw, ptree->sub_tree[tree_idx++]);
-#endif  // CONFIG_H_PARTITION
       if (mi_col + 3 * qbw < mi_params->mi_cols)
         CSEGS_RECURSIVE(0, 3 * qbw, ptree->sub_tree[tree_idx++]);
       break;
+#if CONFIG_UNEVEN_4WAY
+    case PARTITION_HORZ_4A:
+      CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]);
+      if (mi_row + ebh < mi_params->mi_rows)
+        CSEGS_RECURSIVE(ebh, 0, ptree->sub_tree[tree_idx++]);
+      if (mi_row + 3 * ebh < mi_params->mi_rows)
+        CSEGS_RECURSIVE(3 * ebh, 0, ptree->sub_tree[tree_idx++]);
+      if (mi_row + 7 * ebh < mi_params->mi_rows)
+        CSEGS_RECURSIVE(7 * ebh, 0, ptree->sub_tree[tree_idx++]);
+      break;
+    case PARTITION_HORZ_4B:
+      CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]);
+      if (mi_row + ebh < mi_params->mi_rows)
+        CSEGS_RECURSIVE(ebh, 0, ptree->sub_tree[tree_idx++]);
+      if (mi_row + 5 * ebh < mi_params->mi_rows)
+        CSEGS_RECURSIVE(5 * ebh, 0, ptree->sub_tree[tree_idx++]);
+      if (mi_row + 7 * ebh < mi_params->mi_rows)
+        CSEGS_RECURSIVE(7 * ebh, 0, ptree->sub_tree[tree_idx++]);
+      break;
+    case PARTITION_VERT_4A:
+      CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]);
+      if (mi_col + ebw < mi_params->mi_cols)
+        CSEGS_RECURSIVE(0, ebw, ptree->sub_tree[tree_idx++]);
+      if (mi_col + 3 * ebw < mi_params->mi_cols)
+        CSEGS_RECURSIVE(0, 3 * ebw, ptree->sub_tree[tree_idx++]);
+      if (mi_col + 7 * ebw < mi_params->mi_cols)
+        CSEGS_RECURSIVE(0, 7 * ebw, ptree->sub_tree[tree_idx++]);
+      break;
+    case PARTITION_VERT_4B:
+      CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]);
+      if (mi_col + ebw < mi_params->mi_cols)
+        CSEGS_RECURSIVE(0, ebw, ptree->sub_tree[tree_idx++]);
+      if (mi_col + 5 * ebw < mi_params->mi_cols)
+        CSEGS_RECURSIVE(0, 5 * ebw, ptree->sub_tree[tree_idx++]);
+      if (mi_col + 7 * ebw < mi_params->mi_cols)
+        CSEGS_RECURSIVE(0, 7 * ebw, ptree->sub_tree[tree_idx++]);
+      break;
+#endif  // CONFIG_UNEVEN_4WAY
 #else   // CONFIG_EXT_RECUR_PARTITIONS
     case PARTITION_NONE: CSEGS(bs, bs, 0, 0); break;
     case PARTITION_HORZ:
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c
index 93c839a..fa88d44 100644
--- a/av1/encoder/speed_features.c
+++ b/av1/encoder/speed_features.c
@@ -123,16 +123,6 @@
                                                               { 1, 1, 0 },
                                                               { 1, 1, 1 } };
 
-// This table holds the maximum number of reference frames for global motion.
-// The table is indexed as per the speed feature 'gm_search_type'.
-// 0 : All reference frames are allowed.
-// 1 : All reference frames except L2 and L3 are allowed.
-// 2 : All reference frames except L2, L3 and ARF2 are allowed.
-// 3 : No reference frame is allowed.
-static int gm_available_reference_frames[GM_DISABLE_SEARCH + 1] = {
-  INTER_REFS_PER_FRAME, INTER_REFS_PER_FRAME - 2, INTER_REFS_PER_FRAME - 3, 0
-};
-
 // Intra only frames, golden frames (except alt ref overlays) and
 // alt ref frames tend to be coded at a higher than ambient quality
 static int frame_is_boosted(const AV1_COMP *cpi) {
@@ -353,8 +343,9 @@
 #endif
 
   // Speed 0 for all speed features that give neutral coding performance change.
-  sf->gm_sf.gm_disable_recode = 1;
-  sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_LEV2;
+  sf->gm_sf.max_ref_frames = boosted ? 4 : 2;
+  sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1;
+  sf->gm_sf.disable_gm_search_based_on_stats = 1;
 
   sf->part_sf.less_rectangular_check_level = 1;
 #if CONFIG_EXT_RECUR_PARTITIONS
@@ -410,9 +401,6 @@
   sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
 
   if (speed >= 1) {
-    sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_LEV3;
-    sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1;
-
 #if CONFIG_EXT_RECUR_PARTITIONS
     sf->part_sf.intra_cnn_split = 0;
 #else   // CONFIG_EXT_RECUR_PARTITIONS
@@ -514,7 +502,7 @@
     sf->hl_sf.high_precision_mv_usage = CURRENT_Q;
     sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
 
-    sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
+    sf->gm_sf.max_ref_frames = 0;
 
     sf->part_sf.less_rectangular_check_level = 2;
     sf->part_sf.simple_motion_search_prune_agg = 1;
@@ -715,10 +703,9 @@
 }
 
 static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
-  gm_sf->selective_ref_gm = 1;
-  gm_sf->gm_search_type = GM_FULL_SEARCH;
-  gm_sf->gm_disable_recode = 0;
+  gm_sf->max_ref_frames = INTER_REFS_PER_FRAME;
   gm_sf->prune_ref_frame_for_gm_search = 0;
+  gm_sf->disable_gm_search_based_on_stats = 0;
 }
 
 static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
@@ -759,13 +746,19 @@
   part_sf->early_term_after_none_split = 0;
 #if CONFIG_EXT_RECUR_PARTITIONS
   part_sf->prune_rect_with_none_rd = 0;
-  part_sf->prune_part_3_with_part_none = 0;
-  part_sf->prune_part_3_with_part_rect = 0;
+  part_sf->prune_ext_part_with_part_none = 0;
+  part_sf->prune_ext_part_with_part_rect = 0;
+#if CONFIG_UNEVEN_4WAY
+  part_sf->prune_part_4_with_partition_boundary = 0;
+  part_sf->prune_part_4_horz_or_vert = 0;
+  part_sf->prune_part_4_with_part_3 = 0;
+#endif  // CONFIG_UNEVEN_4WAY
   part_sf->two_pass_partition_search = 0;
   part_sf->prune_rect_with_ml = 0;
   part_sf->end_part_search_after_consec_failures = 0;
   part_sf->ext_recur_depth = INT_MAX;
   part_sf->prune_rect_with_split_depth = 0;
+  part_sf->prune_part_h_with_partition_boundary = 0;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 }
 
@@ -853,6 +846,10 @@
 #if CONFIG_EXT_RECUR_PARTITIONS
   inter_sf->reuse_erp_mode_flag = 0;
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
+
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  inter_sf->prune_warpmv_prob_thresh = 32;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
 }
 
 static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
@@ -1053,17 +1050,22 @@
       sf->part_sf.simple_motion_search_early_term_none = 1;
       AOM_FALLTHROUGH_INTENDED;
     case 5:
+      sf->part_sf.prune_part_h_with_partition_boundary = true;
+      sf->part_sf.adaptive_partition_search_order = true;
       sf->tx_sf.use_largest_tx_size_for_small_bsize = true;
       // TODO(chiyotsai@google.com): This speed feature causes large regression
       // on b2 testset. Disable this for now until we figure out how to avoid
       // the loss.
       // sf->part_sf.end_part_search_after_consec_failures = 1;
       AOM_FALLTHROUGH_INTENDED;
-    case 4:
-      sf->part_sf.prune_part_3_with_part_rect = 1;
+    case 4: sf->part_sf.prune_ext_part_with_part_rect = 1;
+#if CONFIG_UNEVEN_4WAY
+      sf->part_sf.prune_part_4_horz_or_vert = 1;
+      sf->part_sf.prune_part_4_with_part_3 = 1;
+#endif  // CONFIG_UNEVEN_4WAY
       AOM_FALLTHROUGH_INTENDED;
     case 3:
-      sf->part_sf.prune_part_3_with_part_none = 1;
+      sf->part_sf.prune_ext_part_with_part_none = 1;
       AOM_FALLTHROUGH_INTENDED;
     case 2:
       sf->inter_sf.prune_ref_frame_for_rect_partitions =
@@ -1249,9 +1251,8 @@
     // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve
     // better parallelism when number of threads available are greater than or
     // equal to maximum number of reference frames allowed for global motion.
-    if (sf->gm_sf.gm_search_type != GM_DISABLE_SEARCH &&
-        (cpi->oxcf.max_threads >=
-         gm_available_reference_frames[sf->gm_sf.gm_search_type]))
+    if (sf->gm_sf.max_ref_frames > 0 &&
+        cpi->oxcf.max_threads >= sf->gm_sf.max_ref_frames)
       sf->gm_sf.prune_ref_frame_for_gm_search = 0;
   }
 }
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h
index 61e21d1..e3a6391 100644
--- a/av1/encoder/speed_features.h
+++ b/av1/encoder/speed_features.h
@@ -33,13 +33,6 @@
 } MESH_PATTERN;
 
 enum {
-  GM_FULL_SEARCH,
-  GM_REDUCED_REF_SEARCH_SKIP_LEV2,
-  GM_REDUCED_REF_SEARCH_SKIP_LEV3,
-  GM_DISABLE_SEARCH
-} UENUM1BYTE(GM_SEARCH_TYPE);
-
-enum {
   INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) |
               (1 << D135_PRED) | (1 << D113_PRED) | (1 << D157_PRED) |
               (1 << D203_PRED) | (1 << D67_PRED) | (1 << SMOOTH_PRED) |
@@ -369,20 +362,16 @@
 } TPL_SPEED_FEATURES;
 
 typedef struct GLOBAL_MOTION_SPEED_FEATURES {
-  // Do not compute the global motion parameters for a LAST2_FRAME or
-  // LAST3_FRAME if the GOLDEN_FRAME is closer and it has a non identity
-  // global model.
-  int selective_ref_gm;
-
-  GM_SEARCH_TYPE gm_search_type;
-
-  // whether to disable the global motion recode loop
-  int gm_disable_recode;
+  int max_ref_frames;
 
   // During global motion estimation, prune remaining reference frames in a
   // given direction(past/future), if the evaluated ref_frame in that direction
   // yields gm_type as INVALID/TRANSLATION/IDENTITY
   int prune_ref_frame_for_gm_search;
+
+  // Disable global motion estimation based on stats of previous frames in the
+  // GF group
+  int disable_gm_search_based_on_stats;
 } GLOBAL_MOTION_SPEED_FEATURES;
 
 typedef struct PARTITION_SPEED_FEATURES {
@@ -496,11 +485,23 @@
   // Prunes PARTITION_3 if PARTITION_NONE is used instead of PARTITION_HORZ|VERT
   int prune_rect_with_none_rd;
 
-  // Prunes PARTITION_3 if PARTITION_NONE is used instead of PARTITION_HORZ|VERT
-  int prune_part_3_with_part_none;
+  // Prunes extended partitions if PARTITION_NONE is used instead of
+  // PARTITION_HORZ|VERT.
+  int prune_ext_part_with_part_none;
 
-  // Prunes PARTITION_3 partition 3 doesn't split in the same direction
-  int prune_part_3_with_part_rect;
+  // Prunes extended partitions if rect sub-partitions don't further split in
+  // the same direction.
+  int prune_ext_part_with_part_rect;
+
+#if CONFIG_UNEVEN_4WAY
+  // Prunes PARTITION_HORZ_4A/4B if vertical is the best partition, and
+  // Prunes PARTITION_VERT_4A/4B if horizontal is the best partition.
+  int prune_part_4_horz_or_vert;
+
+  // Prunes PARTITION_HORZ_4A/4B based on PARTITION_HORZ_3 search result, and
+  // Prunes PARTITION_VERT_4A/4B based on PARTITION_VERT_3 search result.
+  int prune_part_4_with_part_3;
+#endif  // CONFIG_UNEVEN_4WAY
 
   int two_pass_partition_search;
 
@@ -516,6 +517,21 @@
 
   // Prune rect partitions if PARTITION_SPLIT goes deep.
   int prune_rect_with_split_depth;
+
+  // Search horizontal and vertical split before PARTITION_NONE if the neighbor
+  // blocks are much smaller than the current block size.
+  int adaptive_partition_search_order;
+
+  // Prune h partition types if their resulting boundary does not agree with
+  // the current best partition's boundary after searching NONE, HORZ, and VERT.
+  int prune_part_h_with_partition_boundary;
+
+#if CONFIG_UNEVEN_4WAY
+  // Prune r-way partition types if their resulting boundary does not agree with
+  // the current best partition's boundary after searching NONE, HORZ, VERT, and
+  // H-parts.
+  int prune_part_4_with_partition_boundary;
+#endif  // CONFIG_UNEVEN_4WAY
 #endif  // CONFIG_EXT_RECUR_PARTITIONS
 } PARTITION_SPEED_FEATURES;
 
@@ -725,6 +741,11 @@
   // Prune warped motion search using previous frame stats.
   int prune_warped_prob_thresh;
 
+#if CONFIG_CWG_D067_IMPROVED_WARP
+  // Prune warpmv with mvd search using previous frame stats.
+  int prune_warpmv_prob_thresh;
+#endif  // CONFIG_CWG_D067_IMPROVED_WARP
+
   // Enable/disable interintra wedge search.
   int disable_wedge_interintra_search;
 
diff --git a/av1/encoder/subgop.c b/av1/encoder/subgop.c
index 025fb82..9240d6c 100644
--- a/av1/encoder/subgop.c
+++ b/av1/encoder/subgop.c
@@ -195,9 +195,15 @@
   switch (code) {
     case FRAME_TYPE_INO_VISIBLE:
     case FRAME_TYPE_INO_REPEAT:
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    case FRAME_TYPE_OOO_UNFILTERED: return 1;
+    case FRAME_TYPE_INO_SHOWEXISTING:
+    case FRAME_TYPE_OOO_FILTERED: return 0;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     case FRAME_TYPE_INO_SHOWEXISTING: return 1;
     case FRAME_TYPE_OOO_FILTERED:
     case FRAME_TYPE_OOO_UNFILTERED: return 0;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     default: assert(0 && "Invalid frame type code"); return 0;
   }
 }
@@ -211,9 +217,24 @@
   // Each disp frame index must be shown exactly once and in ascending order
   int last_visible = 0;
   for (int s = 0; s < config->num_steps; ++s) {
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    if (config->step[s].type_code == FRAME_TYPE_INO_VISIBLE ||
+        config->step[s].type_code == FRAME_TYPE_INO_REPEAT) {
+      int updated_last_visible = config->step[s].disp_frame_idx;
+      do {
+        last_visible = updated_last_visible;
+        for (int k = 0; k < s; ++k) {
+          if (is_visible(config->step[k].type_code) &&
+              config->step[k].disp_frame_idx == last_visible + 1) {
+            updated_last_visible = config->step[k].disp_frame_idx;
+          }
+        }
+      } while (last_visible != updated_last_visible);
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     if (is_visible(config->step[s].type_code)) {
       if (config->step[s].disp_frame_idx != last_visible + 1) return 0;
       last_visible = config->step[s].disp_frame_idx;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
   }
   if (last_visible != config->num_frames) return 0;
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c
index f00c3e2..0e7fdce 100644
--- a/av1/encoder/temporal_filter.c
+++ b/av1/encoder/temporal_filter.c
@@ -98,7 +98,7 @@
   // Save input state.
 #if CONFIG_FLEX_MVRES
   const AV1_COMMON *cm = &cpi->common;
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int is_ibc_cost = 0;
 #endif
 #endif
@@ -153,7 +153,7 @@
                                      &baseline_mv,
 #if CONFIG_FLEX_MVRES
                                      pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                      is_ibc_cost,
 #endif
 #endif
@@ -226,7 +226,7 @@
                                            subblock_size, &baseline_mv,
 #if CONFIG_FLEX_MVRES
                                            pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                            is_ibc_cost,
 #endif
 #endif
@@ -1111,9 +1111,19 @@
 
   // Set showable frame.
   if (filter_frame_lookahead_idx >= 0) {
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    // When enable_frame_output_order == 1, it is intended to set showable_frame
+    // to one only for the coded frames to be outputted. When enable_overlay ==
+    // 1, showable_frame of the filtered frame is set to zero by default.
+    cpi->common.showable_frame =
+        (!cpi->oxcf.ref_frm_cfg.enable_frame_output_order &&
+         (num_frames_for_filtering == 1 || is_second_arf)) ||
+        cpi->oxcf.ref_frm_cfg.enable_frame_output_order ||
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     cpi->common.showable_frame = num_frames_for_filtering == 1 ||
                                  is_second_arf ||
-                                 (cpi->oxcf.algo_cfg.enable_overlay == 0);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        (cpi->oxcf.algo_cfg.enable_overlay == 0);
   }
 
   // Do filtering.
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c
index 8f95c72..221054e 100644
--- a/av1/encoder/tokenize.c
+++ b/av1/encoder/tokenize.c
@@ -28,7 +28,7 @@
 #include "av1/encoder/rdopt.h"
 #include "av1/encoder/tokenize.h"
 
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
 static int cost_and_tokenize_map(Av1ColorMapParam *param, TokenExtra **t,
                                  int plane, int calc_rate, int allow_update_cdf,
                                  FRAME_COUNTS *counts, MapCdf map_pb_cdf,
@@ -157,7 +157,7 @@
   if (calc_rate) return this_rate;
   return 0;
 }
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 
 static void get_palette_params(const MACROBLOCK *const x, int plane,
                                BLOCK_SIZE bsize, Av1ColorMapParam *params) {
@@ -167,12 +167,12 @@
   params->color_map = xd->plane[plane].color_index_map;
   params->map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf
                           : xd->tile_ctx->palette_y_color_index_cdf;
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   params->identity_row_cdf = plane ? xd->tile_ctx->identity_row_cdf_uv
                                    : xd->tile_ctx->identity_row_cdf_y;
   params->identity_row_cost = plane ? &x->mode_costs.palette_uv_row_flag_cost
                                     : &x->mode_costs.palette_y_row_flag_cost;
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
   params->color_cost = plane ? &x->mode_costs.palette_uv_color_cost
                              : &x->mode_costs.palette_y_color_cost;
   params->n_colors = pmi->palette_size[plane];
@@ -200,7 +200,7 @@
   get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params);
   MapCdf map_pb_cdf = plane ? x->tile_pb_ctx->palette_uv_color_index_cdf
                             : x->tile_pb_ctx->palette_y_color_index_cdf;
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   IdentityRowCdf eq_row_pb_cdf = plane ? x->tile_pb_ctx->identity_row_cdf_uv
                                        : x->tile_pb_ctx->identity_row_cdf_y;
   return cost_and_tokenize_map(&color_map_params, NULL, plane, 1, 0, NULL,
@@ -208,7 +208,7 @@
 #else
   return cost_and_tokenize_map(&color_map_params, NULL, plane, 1, 0, NULL,
                                map_pb_cdf);
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 }
 
 void av1_tokenize_color_map(const MACROBLOCK *const x, int plane,
@@ -218,7 +218,7 @@
   assert(plane == 0 || plane == 1);
   Av1ColorMapParam color_map_params;
   get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params);
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   MapCdf map_pb_cdf = plane ? x->tile_pb_ctx->palette_uv_color_index_cdf
                             : x->tile_pb_ctx->palette_y_color_index_cdf;
   IdentityRowCdf eq_row_pb_cdf = plane ? x->tile_pb_ctx->identity_row_cdf_uv
@@ -234,7 +234,7 @@
                             : x->tile_pb_ctx->palette_y_color_index_cdf;
   cost_and_tokenize_map(&color_map_params, t, plane, 0, allow_update_cdf,
                         counts, map_pb_cdf);
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 }
 
 static void tokenize_vartx(ThreadData *td, TX_SIZE tx_size,
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h
index 675b1f1..b886ea6 100644
--- a/av1/encoder/tokenize.h
+++ b/av1/encoder/tokenize.h
@@ -24,10 +24,10 @@
 typedef struct {
   aom_cdf_prob *color_map_cdf;
   uint8_t token;
-#if CONFIG_NEW_COLOR_MAP_CODING
+#if CONFIG_PALETTE_IMPROVEMENTS
   aom_cdf_prob *identity_row_cdf;
   uint8_t identity_row_flag;
-#endif  // CONFIG_NEW_COLOR_MAP_CODING
+#endif  // CONFIG_PALETTE_IMPROVEMENTS
 } TokenExtra;
 
 typedef struct {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c
index f91ed5b..a09040c 100644
--- a/av1/encoder/tpl_model.c
+++ b/av1/encoder/tpl_model.c
@@ -156,7 +156,7 @@
 #if CONFIG_FLEX_MVRES
   const MvSubpelPrecision pb_mv_precision = cm->features.fr_mv_precision;
   full_pel_lower_mv_precision(&start_mv, pb_mv_precision);
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
   const int is_ibc_cost = 0;
 #endif
 #endif
@@ -165,7 +165,7 @@
   av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &center_mv,
 #if CONFIG_FLEX_MVRES
                                      pb_mv_precision,
-#if CONFIG_BVCOST_UPDATE
+#if CONFIG_IBC_BV_IMPROVEMENT
                                      is_ibc_cost,
 #endif
 #endif
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c
index fe75770..0f8f819 100644
--- a/av1/encoder/tx_search.c
+++ b/av1/encoder/tx_search.c
@@ -1192,6 +1192,12 @@
         best_tx_type != DCT_DCT) {
       update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT);
     }
+#if CONFIG_ATC_DCTX_ALIGNED
+    if (plane == 0 && x->plane[plane].eobs[block] == 1 &&
+        best_tx_type != DCT_DCT && !is_inter) {
+      update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT);
+    }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   }
 }
 
@@ -1357,10 +1363,10 @@
 static uint32_t get_intra_txb_hash(MACROBLOCK *x, int plane, int blk_row,
                                    int blk_col, BLOCK_SIZE plane_bsize,
                                    TX_SIZE tx_size
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
                                    ,
                                    PREDICTION_MODE intra_dir
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 ) {
   int16_t tmp_data[64 * 64];
   const int diff_stride = block_size_wide[plane_bsize];
@@ -1380,11 +1386,11 @@
   }
   CRC32C *crc = &x->txfm_search_info.mb_rd_record.crc_calculator;
   const uint32_t hash = av1_get_crc32c_value(crc, hash_data, 2 * txb_w * txb_h);
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   return (hash << 9) + (tx_size << 4) + (intra_dir);
 #else
   return (hash << 5) + tx_size;
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
 }
 
 // pruning thresholds for prune_txk_type and prune_txk_type_separ
@@ -1404,7 +1410,7 @@
          frame_is_intra_only(&cpi->common) &&
          !is_inter_block(xd->mi[0], xd->tree_type) && plane == 0 &&
          tx_size_wide[tx_size] == tx_size_high[tx_size]);
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   MB_MODE_INFO *mbmi = xd->mi[0];
   PREDICTION_MODE intra_dir;
   if (mbmi->filter_intra_mode_info.use_filter_intra)
@@ -1417,7 +1423,7 @@
 #else
   const uint32_t intra_hash =
       get_intra_txb_hash(x, plane, blk_row, blk_col, plane_bsize, tx_size);
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
   const int intra_hash_idx =
       find_tx_size_rd_info(&txfm_info->txb_rd_record_intra, intra_hash);
   *intra_txb_rd_info =
@@ -2087,14 +2093,14 @@
   const AV1_COMMON *cm = &cpi->common;
   MACROBLOCKD *xd = &x->e_mbd;
   MB_MODE_INFO *mbmi = xd->mi[0];
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   PREDICTION_MODE intra_dir;
   if (mbmi->filter_intra_mode_info.use_filter_intra)
     intra_dir =
         fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode];
   else
     intra_dir = mbmi->mode;
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
   const TxfmSearchParams *txfm_params = &x->txfm_search_params;
   const int is_inter = is_inter_block(mbmi, xd->tree_type);
   const int fast_tx_search = ftxs_mode & FTXS_DCT_AND_1D_DCT_ONLY;
@@ -2120,12 +2126,12 @@
         av1_get_tx_type(xd, get_plane_type(plane), blk_row, blk_col, tx_size,
                         cm->features.reduced_tx_set_used);
   }
-#if !CONFIG_ATC_NEWTXSETS
+#if !CONFIG_ATC
   PREDICTION_MODE intra_dir =
       mbmi->filter_intra_mode_info.use_filter_intra
           ? fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode]
           : mbmi->mode;
-#endif  // !CONFIG_ATC_NEWTXSETS
+#endif  // !CONFIG_ATC
   uint16_t ext_tx_used_flag =
       cpi->sf.tx_sf.tx_type_search.use_reduced_intra_txset &&
               tx_set_type == EXT_TX_SET_DTT4_IDTX_1DDCT
@@ -2138,7 +2144,7 @@
     txk_allowed = DCT_DCT;
   }
 
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   if (!is_inter) {
     uint16_t mdtx_mask =
         av1_md_trfm_used_flag[av1_size_class[tx_size]]
@@ -2150,6 +2156,9 @@
           (1 << DCT_DCT) | (1 << ADST_ADST);  // DCT_DCT, ADST_ADST
     }
 #endif  // CONFIG_ATC_REDUCED_TXSET
+#if CONFIG_ATC_DCTX_ALIGNED
+    if (txsize_sqr_up_map[tx_size] == TX_32X32) ext_tx_used_flag |= (1 << IDTX);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   }
 #if CONFIG_ATC_REDUCED_TXSET
   else {
@@ -2158,7 +2167,7 @@
     }
   }
 #endif  // CONFIG_ATC_REDUCED_TXSET
-#endif  // CONFIG_ATC_NEWTXSETS
+#endif  // CONFIG_ATC
   if (cpi->oxcf.txfm_cfg.enable_flip_idtx == 0)
     ext_tx_used_flag &= DCT_ADST_TX_MASK;
 
@@ -2207,6 +2216,7 @@
     if (plane) {
       const CctxType cctx_type = av1_get_cctx_type(xd, blk_row, blk_col);
       assert(cctx_type == CCTX_NONE);
+      (void)cctx_type;
     }
 #endif  // CONFIG_DEBUG && CONFIG_CROSS_CHROMA_TX
 
@@ -2242,7 +2252,12 @@
   }
 
   if (mbmi->fsc_mode[xd->tree_type == CHROMA_PART] &&
-      txsize_sqr_up_map[tx_size] < TX_32X32 && plane == PLANE_TYPE_Y) {
+#if CONFIG_ATC_DCTX_ALIGNED
+      txsize_sqr_up_map[tx_size] <= TX_32X32
+#else
+      txsize_sqr_up_map[tx_size] < TX_32X32
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+      && plane == PLANE_TYPE_Y) {
     txk_allowed = IDTX;
     allowed_tx_mask = (1 << txk_allowed);
   }
@@ -2381,6 +2396,9 @@
     best_rd_stats->skip_txfm = 1;
 
     x->plane[plane].eobs[block] = 0;
+#if CONFIG_ATC_DCTX_ALIGNED
+    x->plane[plane].bobs[block] = 0;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
     *block_sse = ROUND_POWER_OF_TWO((*block_sse), (xd->bd - 8) * 2);
 
@@ -2485,6 +2503,9 @@
       best_rd_stats->sse = intra_txb_rd_info->sse;
       best_rd_stats->skip_txfm = intra_txb_rd_info->eob == 0;
       x->plane[plane].eobs[block] = intra_txb_rd_info->eob;
+#if CONFIG_ATC_DCTX_ALIGNED
+      x->plane[plane].bobs[block] = intra_txb_rd_info->bob;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
       x->plane[plane].txb_entropy_ctx[block] =
           intra_txb_rd_info->txb_entropy_ctx;
       best_eob = intra_txb_rd_info->eob;
@@ -2597,6 +2618,9 @@
   av1_setup_quant(tx_size, !skip_trellis,
                   skip_trellis ? xform_quant_b : AV1_XFORM_QUANT_FP,
                   cpi->oxcf.q_cfg.quant_b_adapt, &quant_param);
+#if CONFIG_ATC_DCTX_ALIGNED
+  int eob_found = 0;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
   // Iterate through all transform type candidates.
   for (int idx = 0; idx < TX_TYPES; ++idx) {
@@ -2622,8 +2646,13 @@
     bool skip_idx = false;
     xd->enable_ist = cm->seq_params.enable_ist &&
                      !cpi->sf.tx_sf.tx_type_search.skip_stx_search &&
-                     !mbmi->fsc_mode[xd->tree_type == CHROMA_PART];
+                     !mbmi->fsc_mode[xd->tree_type == CHROMA_PART] &&
+                     !xd->lossless[mbmi->segment_id];
+#if CONFIG_ATC_DCTX_ALIGNED
+    const int max_stx = xd->enable_ist && !(eob_found) ? 4 : 1;
+#else
     const int max_stx = xd->enable_ist ? 4 : 1;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     for (int stx = 0; stx < max_stx; ++stx) {
       TX_TYPE tx_type = (TX_TYPE)txk_map[idx];
       if (!(allowed_tx_mask & (1 << tx_type))) continue;
@@ -2635,6 +2664,9 @@
           ((tx_type != DCT_DCT && tx_type != ADST_ADST) || plane != 0 ||
            is_inter_block(mbmi, xd->tree_type) || dc_only_blk ||
            intra_mode >= PAETH_PRED || filter || !is_depth0 ||
+#if CONFIG_ATC_DCTX_ALIGNED
+           (eob_found) ||
+#endif  // CONFIG_ATC_DCTX_ALIGNED
            mbmi->fsc_mode[xd->tree_type == CHROMA_PART] ||
            xd->lossless[mbmi->segment_id]);
       if (skip_stx && stx) continue;
@@ -2675,6 +2707,17 @@
           if (*eob != 0) *eob = av1_get_max_eob(txfm_param.tx_size);
         }
       }
+#if CONFIG_ATC_DCTX_ALIGNED
+      // pre-skip DC only case to make things faster
+      uint16_t *const eob = &p->eobs[block];
+      if (*eob == 1 && plane == PLANE_TYPE_Y && !is_inter) {
+        if (tx_type == DCT_DCT) eob_found = 1;
+        if (tx_type != DCT_DCT || (stx && get_primary_tx_type(tx_type))) {
+          update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT);
+          continue;
+        }
+      }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
       // Calculate rate cost of quantized coefficients.
       if (quant_param.use_optimize_b) {
         av1_optimize_b(cpi, x, plane, block, tx_size, tx_type,
@@ -2699,6 +2742,19 @@
                                 txb_ctx, cm->features.reduced_tx_set_used);
       }
 
+#if CONFIG_ATC_DCTX_ALIGNED
+      if (*eob == 1 && plane == PLANE_TYPE_Y && !is_inter) {
+        // post quant-skip DC only case
+        if (tx_type == DCT_DCT) eob_found = 1;
+        if (tx_type != DCT_DCT || (stx && get_primary_tx_type(tx_type))) {
+          if (plane == 0)
+            update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT);
+          continue;
+        }
+        if (get_secondary_tx_type(tx_type) > 0) continue;
+        if (txfm_param.sec_tx_type > 0) continue;
+      }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
       // If rd cost based on coeff rate alone is already more than best_rd,
       // terminate early.
       if (RDCOST(x->rdmult, rate_cost, 0) > best_rd) continue;
@@ -2831,6 +2887,14 @@
     if (skip_idx) break;
   }
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  if (((best_eob == 1 && best_tx_type != DCT_DCT && plane == 0) ||
+       best_rd == INT64_MAX) &&
+      !is_inter) {
+    best_tx_type = DCT_DCT;
+    if (plane == 0) update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT);
+  }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   best_rd_stats->skip_txfm = best_eob == 0;
   if (plane == 0) update_txk_array(xd, blk_row, blk_col, tx_size, best_tx_type);
   x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx;
@@ -2851,6 +2915,13 @@
     best_rd_stats->sse = block_sse;
   }
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  if (plane == 0 && x->plane[plane].eobs[block] == 1 &&
+      best_tx_type != DCT_DCT && !is_inter) {
+    av1_invalid_rd_stats(best_rd_stats);
+  }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
   if (intra_txb_rd_info != NULL) {
     intra_txb_rd_info->valid = 1;
     intra_txb_rd_info->entropy_context = cur_joint_ctx;
@@ -2878,6 +2949,12 @@
   // can use them for prediction.
   recon_intra(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
               txb_ctx, skip_trellis, best_tx_type, 0, &rate_cost, best_eob);
+#if CONFIG_ATC_DCTX_ALIGNED
+  if (plane == 0 && x->plane[plane].eobs[block] == 1 &&
+      best_tx_type != DCT_DCT && !is_inter) {
+    av1_invalid_rd_stats(best_rd_stats);
+  }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   p->dqcoeff = orig_dqcoeff;
 #endif  // CONFIG_CROSS_CHROMA_TX
 }
@@ -2894,6 +2971,9 @@
   MB_MODE_INFO *mbmi = xd->mi[0];
   struct macroblock_plane *const p_c1 = &x->plane[AOM_PLANE_U];
   struct macroblock_plane *const p_c2 = &x->plane[AOM_PLANE_V];
+#if CONFIG_ATC_DCTX_ALIGNED
+  const int is_inter = is_inter_block(mbmi, xd->tree_type);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
   const int max_eob = av1_get_max_eob(tx_size);
   int64_t best_rd = RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist);
@@ -3020,6 +3100,11 @@
     if (eobs_ptr_c1[block] == 0 || sse_dqcoeff_c2 > sse_dqcoeff_c1) {
       continue;
     }
+#if CONFIG_ATC_DCTX_ALIGNED
+    if (eobs_ptr_c1[block] == 1 && !is_inter && cctx_type != CCTX_NONE) {
+      continue;
+    }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
     // If rd cost based on coeff rate alone is already more than best_rd,
     // terminate early.
@@ -3061,7 +3146,16 @@
   assert(best_rd != INT64_MAX);
 
   best_rd_stats->skip_txfm = (best_eob_c1 == 0 && best_eob_c2 == 0);
+#if CONFIG_ATC_DCTX_ALIGNED
+  if (best_eob_c1 == 1 && !is_inter && best_cctx_type != CCTX_NONE) {
+    best_cctx_type = CCTX_NONE;
+    update_cctx_array(xd, blk_row, blk_col, 0, 0, TX_4X4, CCTX_NONE);
+  } else {
+    update_cctx_array(xd, blk_row, blk_col, 0, 0, TX_4X4, best_cctx_type);
+  }
+#else
   update_cctx_array(xd, blk_row, blk_col, 0, 0, TX_4X4, best_cctx_type);
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   p_c1->txb_entropy_ctx[block] = best_txb_ctx_c1;
   p_c2->txb_entropy_ctx[block] = best_txb_ctx_c2;
   p_c1->eobs[block] = best_eob_c1;
@@ -3144,6 +3238,11 @@
 #endif  // CONFIG_CROSS_CHROMA_TX
                  txb_ctx, ftxs_mode, skip_trellis, ref_rdcost, &this_rd_stats);
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  if (this_rd_stats.dist == INT64_MAX || this_rd_stats.rate == INT_MAX) {
+    return;
+  }
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   av1_merge_rd_stats(rd_stats, &this_rd_stats);
 
 #if !CONFIG_NEW_TX_PARTITION
@@ -3201,6 +3300,9 @@
     rd_stats->rate = zero_blk_rate;
     rd_stats->dist = rd_stats->sse;
     p->eobs[block] = 0;
+#if CONFIG_ATC_DCTX_ALIGNED
+    p->bobs[block] = 0;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
     update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT);
   }
   rd_stats->skip_txfm = pick_skip_txfm;
@@ -3648,7 +3750,13 @@
   const int64_t skip_txfm_rd = is_inter_block(mbmi, xd->tree_type)
                                    ? RDCOST(x->rdmult, skip_txfm_rate, 0)
                                    : INT64_MAX;
+#if CONFIG_SKIP_TXFM_OPT
+  const int64_t no_skip_txfm_rd = is_inter_block(mbmi, xd->tree_type)
+                                      ? RDCOST(x->rdmult, no_skip_txfm_rate, 0)
+                                      : 0;
+#else
   const int64_t no_skip_txfm_rd = RDCOST(x->rdmult, no_skip_txfm_rate, 0);
+#endif  // CONFIG_SKIP_TXFM_OPT
   const int skip_trellis = 0;
   av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd,
                        AOMMIN(no_skip_txfm_rd, skip_txfm_rd), AOM_PLANE_Y, bs,
@@ -3873,7 +3981,11 @@
                  &txb_ctx, args->ftxs_mode, args->skip_trellis,
                  args->best_rd - args->current_rd, &this_rd_stats);
 
-  if (this_rd_stats.dist == INT64_MAX) {
+  if (this_rd_stats.dist == INT64_MAX
+#if CONFIG_ATC_DCTX_ALIGNED
+      || this_rd_stats.rate == INT_MAX
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+  ) {
     args->exit_early = 1;
     args->incomplete_exit = 1;
     return;
@@ -3883,11 +3995,7 @@
     assert(!is_inter || plane_bsize < BLOCK_8X8);
 #if CONFIG_ADAPTIVE_DS_FILTER
     cfl_store_tx(xd, blk_row, blk_col, tx_size,
-#if DS_FRAME_LEVEL
-                 cm->features.ds_filter_type);
-#else
                  cm->seq_params.enable_cfl_ds_filter);
-#endif  // DS_FRAME_LEVEL
 #else
     cfl_store_tx(xd, blk_row, blk_col, tx_size);
 #endif  // CONFIG_ADAPTIVE_DS_FILTER
@@ -3963,8 +4071,13 @@
   const int skip_txfm_rate = mode_costs->skip_txfm_cost[skip_ctx][1];
   const int64_t skip_txfm_rd =
       is_inter ? RDCOST(x->rdmult, skip_txfm_rate, 0) : INT64_MAX;
+#if CONFIG_SKIP_TXFM_OPT
+  const int64_t no_this_rd =
+      is_inter ? RDCOST(x->rdmult, no_skip_txfm_rate + tx_size_rate, 0) : 0;
+#else
   const int64_t no_this_rd =
       RDCOST(x->rdmult, no_skip_txfm_rate + tx_size_rate, 0);
+#endif  // CONFIG_SKIP_TXFM_OPT
 
   mbmi->tx_size = tx_size;
   av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd,
@@ -4221,7 +4334,11 @@
 #endif  // CONFIG_CROSS_CHROMA_TX
                    txb_ctx, args->ftxs_mode, args->skip_trellis,
                    args->best_rd - args->current_rd, this_rd_stats);
-    if (this_rd_stats->dist == INT64_MAX) {
+    if (this_rd_stats->dist == INT64_MAX
+#if CONFIG_ATC_DCTX_ALIGNED
+        || this_rd_stats->rate == INT_MAX
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+    ) {
       args->exit_early = 1;
       args->incomplete_exit = 1;
     }
diff --git a/av1/encoder/x86/encodetxb_sse2.c b/av1/encoder/x86/encodetxb_sse2.c
index 93f4032..35c664b 100644
--- a/av1/encoder/x86/encodetxb_sse2.c
+++ b/av1/encoder/x86/encodetxb_sse2.c
@@ -544,11 +544,17 @@
 // Note: levels[] must be in the range [0, 127], inclusive.
 void av1_get_nz_map_contexts_skip_sse2(const uint8_t *const levels,
                                        const int16_t *const scan,
+#if CONFIG_ATC_DCTX_ALIGNED
+                                       const uint16_t bob,
+#endif  // CONFIG_ATC_DCTX_ALIGNED
                                        const uint16_t eob,
                                        const TX_SIZE tx_size,
                                        int8_t *const coeff_contexts) {
   (void)scan;
   (void)eob;
+#if CONFIG_ATC_DCTX_ALIGNED
+  (void)bob;
+#endif  // CONFIG_ATC_DCTX_ALIGNED
   const int width = get_txb_wide(tx_size);
   const int height = get_txb_high(tx_size);
   // coeff_contexts must be 16 byte aligned.
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake
index 4fc3b51..4ad1bf5 100644
--- a/build/cmake/aom_config_defaults.cmake
+++ b/build/cmake/aom_config_defaults.cmake
@@ -137,25 +137,22 @@
                    "Use zero offset for non-normative bit upshift")
 
 # AV2 experiment flags.
-set_aom_config_var(
-  CONFIG_ATC_COEFCODING 1
-  "AV2 enable adaptive transform coefficient coding improvement.")
-set_aom_config_var(CONFIG_ATC_NEWTXSETS 1
-                   "AV2 enable adaptive transform coding and new TX sets.")
+set_aom_config_var(CONFIG_ATC 1 "AV2 enable adaptive transform coding.")
 set_aom_config_var(CONFIG_ATC_REDUCED_TXSET 1
                    "AV2 enable reduced transform set.")
+set_aom_config_var(
+  CONFIG_ATC_DCTX_ALIGNED 1
+  "AV2 TX signaling restriction for DC blocks with EOB alignment.")
 set_aom_config_var(CONFIG_BYPASS_IMPROVEMENT 1
                    "AV2 enable entropy bypass improvement.")
-set_aom_config_var(CONFIG_C043_MVP_IMPROVEMENTS 1
-                   "AV2 enable MVP list improvements.")
-set_aom_config_var(CONFIG_C063_TMVP_IMPROVEMENT 1
-                   "AV2 experiment flag for improved TMVP derivation.")
-set_aom_config_var(CONFIG_EXT_RECUR_PARTITIONS 1 NUMBER
-                   "AV2 Fully recursive partitions experiment flag")
-set_aom_config_var(CONFIG_H_PARTITION 1 NUMBER
-                   "AV2 H partition mode experiment flag")
+set_aom_config_var(CONFIG_EXT_DIR 1 "AV2 extended intra prediction angles.")
+set_aom_config_var(
+  CONFIG_EXT_RECUR_PARTITIONS 1 NUMBER
+  "AV2 Fully recursive partitions including H partitions experiment flag")
 set_aom_config_var(CONFIG_BLOCK_256 1 NUMBER "AV2 BLOCK_256 experiment flag")
 set_aom_config_var(CONFIG_ERP_TFLITE 0 NUMBER "Build ERP with TFLite")
+set_aom_config_var(CONFIG_UNEVEN_4WAY 1 NUMBER
+                   "AV2 uneven 4-way partition experiment flag")
 set_aom_config_var(CONFIG_COMPOUND_WARP_SAMPLES 1 NUMBER
                    "AV2 compound warped motion samples experiment flag")
 set_aom_config_var(CONFIG_NEW_TX_PARTITION 1
@@ -163,12 +160,14 @@
 set_aom_config_var(
   CONFIG_ORIP 1
   "AV2 experiment flag to enable offset based refinement of intra prediction.")
+set_aom_config_var(
+  CONFIG_IDIF 1
+  "AV2 experiment flag to enable Intra Directional Interpolation Filter.")
 set_aom_config_var(CONFIG_ORIP_DC_DISABLED 0
                    "AV2 experiment flag to disable ORIP for DC mode.")
 set_aom_config_var(CONFIG_ORIP_NONDC_DISABLED 0
                    "AV2 experiment flag to disable ORIP for non-DC modes.")
-set_aom_config_var(CONFIG_SMVP_IMPROVEMENT 1 "Enable SMVP improvement")
-set_aom_config_var(CONFIG_TMVP_IMPROVEMENT 1 "Enable TMVP improvement")
+set_aom_config_var(CONFIG_MVP_IMPROVEMENT 1 "Enable MVP improvement")
 set_aom_config_var(CONFIG_REF_MV_BANK 1 "AV2 ref mv bank experiment flag")
 set_aom_config_var(
   CONFIG_CCSO 1 "AV2 experiment flag to enable cross component sample offset.")
@@ -178,8 +177,6 @@
   CONFIG_IBP_DC 1
   "AV2 experiment flag to enable intra bi-prediction for DC mode.")
 set_aom_config_var(CONFIG_AIMC 1 "AV2 adaptive intra mode coding flag.")
-set_aom_config_var(CONFIG_COMPLEXITY_SCALABLE_MVP 1
-                   "Enable complexity scalable mvp")
 set_aom_config_var(
   CONFIG_CONTEXT_DERIVATION 1
   "AV2 experiment flag to enable modified context derivation : CWG-B065.")
@@ -199,31 +196,40 @@
 set_aom_config_var(CONFIG_PC_WIENER 1 NUMBER
                    "AV2 pixel-classified Wiener filter experiment flag")
 # End: CWG-C016
+set_aom_config_var(CONFIG_HIGH_PASS_CROSS_WIENER_FILTER 1
+                   "AV2 high pass cross non-sep wiener filter experiment flag")
+set_aom_config_var(
+  CONFIG_FLEXIBLE_RU_SIZE 1
+  "AV2 experiment flag to choose RU size between 128x128, 256x256 and 512x512")
 
 # Source of throughput analysis : CWG-B065
 set_aom_config_var(CONFIG_THROUGHPUT_ANALYSIS 0
                    "AV2 experiment flag to measure throughput.")
 set_aom_config_var(CONFIG_IBC_SR_EXT 1 "Enables IntraBC search range extension")
-set_aom_config_var(CONFIG_BVP_IMPROVEMENT 1 "Enables BVP improvements")
-set_aom_config_var(CONFIG_BVCOST_UPDATE 1 "Enables sb-level update for bv cost")
+set_aom_config_var(CONFIG_IBC_BV_IMPROVEMENT 1
+                   "Enables BV improvements for IBC")
 set_aom_config_var(CONFIG_CCSO_EXT 1
                    "AV2 experiment flag to enable extended CCSO.")
+set_aom_config_var(CONFIG_CFL_IMPROVEMENTS 1
+                   "AV2 Cfl improvements from CWG-D029.")
+set_aom_config_var(CONFIG_ADPTIVE_DS_422 1
+                   "AV2 adaptive downsampling in CfL for 422 from CWG-D028.")
 set_aom_config_var(CONFIG_ADAPTIVE_MVD 1 "Enable adaptive MVD resolution")
 set_aom_config_var(CONFIG_JOINT_MVD 1 "Enable joint MVD coding")
 set_aom_config_var(CONFIG_IMPROVED_JMVD 1
                    "Enable joint MVD coding with multiple scaling factors")
-set_aom_config_var(CONFIG_INDEP_PALETTE_PARSING 1
-                   "AV2 experiment flag for palette parsing independency.")
-set_aom_config_var(CONFIG_NEW_COLOR_MAP_CODING 1
-                   "AV2 experiment flag to enable improved palette coding.")
+set_aom_config_var(
+  CONFIG_PALETTE_IMPROVEMENTS
+  1
+  "AV2 experiment flag for palette parsing independency and improved palette color map coding."
+)
 set_aom_config_var(CONFIG_SKIP_MODE_SSE_BUG_FIX 1
                    "AV2 experiment flag to fix the SSE calc bug for skip mode.")
 set_aom_config_var(CONFIG_SKIP_MODE_ENHANCEMENT 1
-                   "AV2 experiment flag to enable skip mode enhancement: C019.")
-set_aom_config_var(
-  CONFIG_SKIP_MODE_DRL_WITH_REF_IDX 1
-  "AV2 experiment flag to enable DRL with ref_MV_idx for skip mode.")
+                   "AV2 experiment flag to enable skip mode enhancement.")
 set_aom_config_var(CONFIG_TIP 1 "Enable temporal interpolated prediction (TIP)")
+set_aom_config_var(CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT 0
+                   "Enable frame output order derivation from order hint")
 set_aom_config_var(CONFIG_OPTFLOW_ON_TIP 1
                    "Enable optical flow refinement on top of TIP")
 set_aom_config_var(CONFIG_FLEX_MVRES 1
@@ -236,6 +242,28 @@
                    "Enable parity hiding for coefficients coding. (PH)")
 set_aom_config_var(CONFIG_BAWP 1 "Enable block adaptive weighted prediction")
 set_aom_config_var(CONFIG_WARPMV 1 "Enable warpmv modes")
+set_aom_config_var(CONFIG_IMPROVED_ANGULAR_INTRA 1
+                   "Improved angular intra prediction mode")
+set_aom_config_var(CONFIG_D071_IMP_MSK_BLD 1
+                   "Enable single reference mode for frame boundary")
+
+set_aom_config_var(CONFIG_SKIP_TXFM_OPT 1
+                   "Enable to optimize the signaling of skip_txfm")
+set_aom_config_var(CONFIG_CWP 1 "Enables compound weighted prediction.")
+set_aom_config_var(CONFIG_REFINEMV 1 "Enable refinemv modes")
+
+set_aom_config_var(CONFIG_EXPLICIT_TEMPORAL_DIST_CALC 1
+                   "Enable to explicit temporal distance calculation")
+
+set_aom_config_var(CONFIG_IMPROVED_GLOBAL_MOTION 1
+                   "New global motion syntax for AV2")
+set_aom_config_var(CONFIG_SEP_COMP_DRL 1
+                   "Use separate drl list for compound modes")
+set_aom_config_var(CONFIG_SKIP_ME_FOR_OPFL_MODES 1
+                   "Reuse the mvs of compound mode from non-opfl path")
+
+set_aom_config_var(CONFIG_CWG_D067_IMPROVED_WARP 1
+                   "Improvement of warp motions")
 
 # This is an encode-only change.
 set_aom_config_var(CONFIG_MV_SEARCH_RANGE 1
@@ -244,6 +272,9 @@
                    "AV2 experiment flag to fix CDEF syntax.")
 set_aom_config_var(CONFIG_IMPROVED_CFL 1
                    "Enable improved CfL mode from CWG-C044")
+set_aom_config_var(CONFIG_BLEND_MODE 1
+                   "Enable improved intra blend mode from CWG-D046")
+
 set_aom_config_var(
   CONFIG_PEF 1 "AV2 experiment flag to enable prediction enhancement filter")
 
@@ -264,6 +295,10 @@
 set_aom_config_var(CONFIG_CROSS_CHROMA_TX 1
                    "AV2 cross chroma component transform experiment flag.")
 set_aom_config_var(CONFIG_WEDGE_MOD_EXT 1 "AV2 wedge modes extensions.")
+
+set_aom_config_var(CONFIG_MF_IMPROVEMENT 1
+                   "Enable to improve temporal motion projection")
+
 #
 # Variables in this section control optional features of the build system.
 #
diff --git a/build/cmake/aom_experiment_deps.cmake b/build/cmake/aom_experiment_deps.cmake
index 760ed4b..e9460f2 100644
--- a/build/cmake/aom_experiment_deps.cmake
+++ b/build/cmake/aom_experiment_deps.cmake
@@ -37,11 +37,10 @@
     change_config_and_warn(CONFIG_CCSO_EXT 0 !CONFIG_CCSO)
   endif()
 
-  # CONFIG_ATC_REDUCED_TXSET depends on CONFIG_ATC_NEWTXSETS. If
-  # CONFIG_ATC_NEWTXSETS is off, then CONFIG_ATC_REDUCED_TXSET needs to be
-  # disabled.
-  if(NOT CONFIG_ATC_NEWTXSETS AND CONFIG_ATC_REDUCED_TXSET)
-    change_config_and_warn(CONFIG_ATC_REDUCED_TXSET 0 !CONFIG_ATC_NEWTXSETS)
+  # CONFIG_ATC_REDUCED_TXSET depends on CONFIG_ATC. If CONFIG_ATC is off, then
+  # CONFIG_ATC_REDUCED_TXSET needs to be disabled.
+  if(NOT CONFIG_ATC AND CONFIG_ATC_REDUCED_TXSET)
+    change_config_and_warn(CONFIG_ATC_REDUCED_TXSET 0 !CONFIG_ATC)
   endif()
 
   # CONFIG_OPTFLOW_ON_TIP is dependent on CONFIG_OPTFLOW_REFINEMENT and
@@ -71,6 +70,17 @@
     change_config_and_warn(CONFIG_WARPMV 0 !CONFIG_WARP_REF_LIST)
   endif()
 
+  # CONFIG_CWG_D067_IMPROVED_WARP depends on CONFIG_WARP_REF_LIST
+  if(NOT CONFIG_WARP_REF_LIST AND CONFIG_CWG_D067_IMPROVED_WARP)
+    change_config_and_warn(CONFIG_CWG_D067_IMPROVED_WARP 0
+                           !CONFIG_WARP_REF_LIST)
+  endif()
+
+  # CONFIG_CWG_D067_IMPROVED_WARP depends on CONFIG_WARPMV
+  if(NOT CONFIG_WARPMV AND CONFIG_CWG_D067_IMPROVED_WARP)
+    change_config_and_warn(CONFIG_CWG_D067_IMPROVED_WARP 0 !CONFIG_WARPMV)
+  endif()
+
   # Begin: CWG-C016.
   if(CONFIG_WIENER_NONSEP_CROSS_FILT)
     change_config_and_warn(CONFIG_WIENER_NONSEP 1
@@ -78,11 +88,11 @@
   endif()
   # End: CWG-C016.
 
-  # CONFIG_H_PARTITION is dependent on CONFIG_EXT_RECUR_PARTITIONS. If
-  # CONFIG_EXT_RECUR_PARTITIONS is off, CONFIG_H_PARTITION needs to be turned
+  # CONFIG_UNEVEN_4WAY is dependent on CONFIG_EXT_RECUR_PARTITIONS. If
+  # CONFIG_EXT_RECUR_PARTITIONS is off, CONFIG_UNEVEN_4WAY needs to be turned
   # off.
-  if(NOT CONFIG_EXT_RECUR_PARTITIONS AND CONFIG_H_PARTITION)
-    change_config_and_warn(CONFIG_H_PARTITION 0 !CONFIG_EXT_RECUR_PARTITIONS)
+  if(NOT CONFIG_EXT_RECUR_PARTITIONS AND CONFIG_UNEVEN_4WAY)
+    change_config_and_warn(CONFIG_UNEVEN_4WAY 0 !CONFIG_EXT_RECUR_PARTITIONS)
   endif()
 
 endmacro()
diff --git a/common/args.c b/common/args.c
index ffec73b..82a7e2a 100644
--- a/common/args.c
+++ b/common/args.c
@@ -96,10 +96,19 @@
 #if CONFIG_BAWP
     GET_PARAMS(enable_bawp);
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+    GET_PARAMS(enable_cwp);
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+    GET_PARAMS(enable_imp_msk_bld);
+#endif  // CONFIG_D071_IMP_MSK_BLD
     GET_PARAMS(enable_fsc);
 #if CONFIG_ORIP
     GET_PARAMS(enable_orip);
 #endif
+#if CONFIG_IDIF
+    GET_PARAMS(enable_idif);
+#endif  // CONFIG_IDIF
     GET_PARAMS(enable_ist);
 #if CONFIG_CROSS_CHROMA_TX
     GET_PARAMS(enable_cctx);
@@ -117,6 +126,9 @@
 #if CONFIG_JOINT_MVD
     GET_PARAMS(enable_joint_mvd);
 #endif  // CONFIG_JOINT_MVD
+#if CONFIG_REFINEMV
+    GET_PARAMS(enable_refinemv);
+#endif  // CONFIG_REFINEMV
     GET_PARAMS(enable_flip_idtx);
     GET_PARAMS(enable_deblocking);
     GET_PARAMS(enable_cdef);
diff --git a/common/av1_config.c b/common/av1_config.c
index e8a9215..3d00147 100644
--- a/common/av1_config.c
+++ b/common/av1_config.c
@@ -9,6 +9,7 @@
  * source code in the PATENTS file, you can obtain it at
  * aomedia.org/license/patent-license/.
  */
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 
@@ -234,7 +235,10 @@
 }
 
 // Parse Sequence Header OBU for coding tools beyond AV1
-int parse_sequence_header_beyond_av1(struct aom_read_bit_buffer *reader) {
+int parse_sequence_header_beyond_av1(struct aom_read_bit_buffer *reader,
+                                     bool reduced_still_picture_header) {
+  (void)reduced_still_picture_header;
+
   int result = 0;
 #if CONFIG_REF_MV_BANK
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_refmvbank);
@@ -244,6 +248,10 @@
     AV1C_READ_BITS_OR_RETURN_ERROR(max_reference_frames, 2);
   }
   AV1C_READ_BIT_OR_RETURN_ERROR(explicit_ref_frame_map);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  // 0: show_existing_frame, 1: implicit derivation
+  AV1C_READ_BIT_OR_RETURN_ERROR(enable_frame_output_order);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_sdp);
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_ist);
 #if CONFIG_CROSS_CHROMA_TX
@@ -259,6 +267,12 @@
 #if CONFIG_BAWP
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_bawp);
 #endif  // CONFIG_BAWP
+#if CONFIG_CWP
+  AV1C_READ_BIT_OR_RETURN_ERROR(enable_cwp);
+#endif  // CONFIG_CWP
+#if CONFIG_D071_IMP_MSK_BLD
+  AV1C_READ_BIT_OR_RETURN_ERROR(enable_imp_msk_bld);
+#endif  // CONFIG_D071_IMP_MSK_BLD
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_fsc);
 #if CONFIG_CCSO
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_ccso);
@@ -269,10 +283,16 @@
 #if CONFIG_ORIP
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_orip);
 #endif
+#if CONFIG_IDIF
+  AV1C_READ_BIT_OR_RETURN_ERROR(enable_idif);
+#endif  // CONFIG_IDIF
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_ibp);
 #if CONFIG_ADAPTIVE_MVD
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_adaptive_mvd);
 #endif  // CONFIG_ADAPTIVE_MVD
+#if CONFIG_REFINEMV
+  AV1C_READ_BIT_OR_RETURN_ERROR(enable_refinemv);
+#endif  // CONFIG_REFINEMV
 #if CONFIG_FLEX_MVRES
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_flex_mvres);
 #endif  // CONFIG_FLEX_MVRES
@@ -282,6 +302,11 @@
 #if CONFIG_PAR_HIDING
   AV1C_READ_BIT_OR_RETURN_ERROR(enable_parity_hiding);
 #endif  // CONFIG_PAR_HIDING
+#if CONFIG_IMPROVED_GLOBAL_MOTION
+  if (!reduced_still_picture_header) {
+    AV1C_READ_BIT_OR_RETURN_ERROR(enable_global_motion);
+  }
+#endif  // CONFIG_IMPROVED_GLOBAL_MOTION
 
   return 0;
 }
@@ -447,7 +472,7 @@
   AV1C_READ_BIT_OR_RETURN_ERROR(film_grain_params_present);
 
   // Sequence header for coding tools beyond AV1
-  parse_sequence_header_beyond_av1(reader);
+  parse_sequence_header_beyond_av1(reader, reduced_still_picture_header);
 
   return 0;
 }
diff --git a/common/y4minput.c b/common/y4minput.c
index d24bf56..aec02e5 100644
--- a/common/y4minput.c
+++ b/common/y4minput.c
@@ -24,12 +24,13 @@
 // Reads 'size' bytes from 'file' into 'buf' with some fault tolerance.
 // Returns true on success.
 static int file_read(void *buf, size_t size, FILE *file) {
-  const int kMaxRetries = 5;
-  int retry_count = 0;
-  int file_error;
+  const int kMaxTries = 5;
+  int try_count = 0;
+  int file_error = 0;
   size_t len = 0;
-  do {
+  while (!feof(file) && len < size && try_count < kMaxTries) {
     const size_t n = fread((uint8_t *)buf + len, 1, size - len, file);
+    ++try_count;
     len += n;
     file_error = ferror(file);
     if (file_error) {
@@ -42,13 +43,13 @@
         return 0;
       }
     }
-  } while (!feof(file) && len < size && ++retry_count < kMaxRetries);
+  }
 
   if (!feof(file) && len != size) {
     fprintf(stderr,
             "Error reading file: %u of %u bytes read,"
-            " error: %d, retries: %d, %d: %s\n",
-            (uint32_t)len, (uint32_t)size, file_error, retry_count, errno,
+            " error: %d, tries: %d, %d: %s\n",
+            (uint32_t)len, (uint32_t)size, file_error, try_count, errno,
             strerror(errno));
   }
   return len == size;
@@ -1142,9 +1143,15 @@
     y4m_ctx->dst_buf = (unsigned char *)malloc(y4m_ctx->dst_buf_sz);
   else
     y4m_ctx->dst_buf = (unsigned char *)malloc(2 * y4m_ctx->dst_buf_sz);
+  if (!y4m_ctx->dst_buf) return -1;
 
-  if (y4m_ctx->aux_buf_sz > 0)
+  if (y4m_ctx->aux_buf_sz > 0) {
     y4m_ctx->aux_buf = (unsigned char *)malloc(y4m_ctx->aux_buf_sz);
+    if (!y4m_ctx->aux_buf) {
+      free(y4m_ctx->dst_buf);
+      return -1;
+    }
+  }
   return 0;
 }
 
diff --git a/examples/inspect.c b/examples/inspect.c
index 4e98dca..1cc05e8 100644
--- a/examples/inspect.c
+++ b/examples/inspect.c
@@ -601,13 +601,39 @@
   }
   const int num_syms = accounting->syms.num_syms;
   const int num_strs = accounting->syms.dictionary.num_strs;
-  buf += put_str(buf, "  \"symbolsMap\": [");
+  buf += put_str(buf, "  \"symbolsFileMap\": [");
   for (i = 0; i < num_strs; i++) {
-    buf += snprintf(buf, MAX_BUFFER, "\"%s\"",
-                    accounting->syms.dictionary.strs[i]);
+    buf += snprintf(buf, MAX_BUFFER, "\"%s:%d\"",
+                    accounting->syms.dictionary.acct_infos[i].c_file,
+                    accounting->syms.dictionary.acct_infos[i].c_line);
     if (i < num_strs - 1) *(buf++) = ',';
   }
   buf += put_str(buf, "],\n");
+
+  buf += put_str(buf, "  \"symbolsMap\": [");
+  for (i = 0; i < num_strs; i++) {
+    buf += snprintf(buf, MAX_BUFFER, "\"%s\"",
+                    accounting->syms.dictionary.acct_infos[i].c_func);
+    if (i < num_strs - 1) *(buf++) = ',';
+  }
+  buf += put_str(buf, "],\n");
+
+  buf += put_str(buf, "  \"symbolsTagsMap\": [");
+  for (i = 0; i < num_strs; i++) {
+    buf += put_str(buf, "[");
+    for (int j = 0; j < AOM_ACCOUNTING_MAX_TAGS; j++) {
+      if (accounting->syms.dictionary.acct_infos[i].tags[j] == NULL) break;
+      if (j > 0) {
+        *(buf++) = ',';
+      }
+      buf += snprintf(buf, MAX_BUFFER, "\"%s\"",
+                      accounting->syms.dictionary.acct_infos[i].tags[j]);
+    }
+    buf += put_str(buf, "]");
+    if (i < num_strs - 1) *(buf++) = ',';
+  }
+  buf += put_str(buf, "],\n");
+
   buf += put_str(buf, "  \"symbols\": [\n    ");
   AccountingSymbolContext context;
   context.x = -2;
@@ -618,11 +644,12 @@
     if (memcmp(&context, &sym->context, sizeof(AccountingSymbolContext)) != 0) {
       buf += put_num(buf, '[', sym->context.x, 0);
       buf += put_num(buf, ',', sym->context.y, ']');
-    } else {
-      buf += put_num(buf, '[', sym->id, 0);
-      buf += put_num(buf, ',', sym->bits, 0);
-      buf += put_num(buf, ',', sym->samples, ']');
+      *(buf++) = ',';
     }
+    buf += put_num(buf, '[', sym->id, 0);
+    buf += put_num(buf, ',', sym->bits, 0);
+    buf += put_num(buf, ',', sym->value, 0);
+    buf += put_num(buf, ',', sym->coding_mode, ']');
     context = sym->context;
     if (i < num_syms - 1) *(buf++) = ',';
   }
@@ -748,6 +775,8 @@
                   frame_data.delta_q_present_flag);
   buf += snprintf(buf, MAX_BUFFER, "  \"deltaQRes\": %d,\n",
                   frame_data.delta_q_res);
+  buf += snprintf(buf, MAX_BUFFER, "  \"superblockSize\": %d,\n",
+                  frame_data.superblock_size);
   buf += put_str(buf, "  \"config\": {");
   buf += put_map(buf, config_map);
   buf += put_str(buf, "},\n");
@@ -764,6 +793,7 @@
 void ifd_init_cb() {
   aom_inspect_init ii;
   ii.inspect_cb = inspect;
+  ii.inspect_sb_cb = NULL;
   ii.inspect_ctx = NULL;
   aom_codec_control(&codec, AV1_SET_INSPECTION_CALLBACK, &ii);
 }
diff --git a/test/accounting_test.cc b/test/accounting_test.cc
index 8d8d964..1e640fa 100644
--- a/test/accounting_test.cc
+++ b/test/accounting_test.cc
@@ -42,11 +42,9 @@
   aom_accounting_init(&accounting);
   br.accounting = &accounting;
   for (int i = 0; i < kSymbols; i++) {
-    aom_read(&br, 32, "A");
+    aom_read(&br, 32, ACCT_INFO("A"));
   }
-  // Consecutive symbols that are the same are coalesced.
-  GTEST_ASSERT_EQ(accounting.syms.num_syms, 1);
-  GTEST_ASSERT_EQ(accounting.syms.syms[0].samples, (unsigned int)kSymbols);
+  GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols);
 
   aom_accounting_reset(&accounting);
   GTEST_ASSERT_EQ(accounting.syms.num_syms, 0);
@@ -55,9 +53,9 @@
   aom_reader_init(&br, bw_buffer, bw.pos);
   br.accounting = &accounting;
   for (int i = 0; i < kSymbols; i++) {
-    aom_read(&br, 32, "A");
-    aom_read(&br, 32, "B");
-    aom_read(&br, 32, "B");
+    aom_read(&br, 32, ACCT_INFO("A"));
+    aom_read(&br, 32, ACCT_INFO("B"));
+    aom_read(&br, 32, ACCT_INFO("B"));
   }
   GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols * 2);
   uint32_t tell_frac = aom_reader_tell_frac(&br);
@@ -66,11 +64,15 @@
   }
   GTEST_ASSERT_EQ(tell_frac, 0U);
 
-  GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, "A"),
-                  aom_accounting_dictionary_lookup(&accounting, "A"));
+  AccountingSymbolInfo a1 = ACCT_INFO("A");
+  AccountingSymbolInfo a2 = ACCT_INFO("A");
+  GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, &a1),
+                  aom_accounting_dictionary_lookup(&accounting, &a2));
 
   // Check for collisions. The current aom_accounting_hash function returns
   // the same hash code for AB and BA.
-  GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, "AB"),
-                  aom_accounting_dictionary_lookup(&accounting, "BA"));
+  AccountingSymbolInfo ab = ACCT_INFO("AB");
+  AccountingSymbolInfo ba = ACCT_INFO("BA");
+  GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, &ab),
+                  aom_accounting_dictionary_lookup(&accounting, &ba));
 }
diff --git a/test/altref_test.cc b/test/altref_test.cc
index cf7ccd3..f5dfb4a 100644
--- a/test/altref_test.cc
+++ b/test/altref_test.cc
@@ -185,7 +185,11 @@
 
   virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
     (void)pkt;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    frame_num_ += pkt->data.frame.frame_count;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     ++frame_num_;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
   }
 
   const gfIntervalParam gf_interval_param_;
diff --git a/test/av1_encoder_parms_get_to_decoder.cc b/test/av1_encoder_parms_get_to_decoder.cc
index 29043b1..47b4448 100644
--- a/test/av1_encoder_parms_get_to_decoder.cc
+++ b/test/av1_encoder_parms_get_to_decoder.cc
@@ -24,8 +24,6 @@
 
 namespace {
 
-const int kMaxPsnr = 100;
-
 struct ParamPassingTestVideo {
   const char *name;
   uint32_t width;
@@ -99,6 +97,7 @@
   virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
                                   ::libaom_test::Encoder *encoder) {
     if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, 5);
       encoder->Control(AV1E_SET_COLOR_PRIMARIES, encode_parms.color_primaries);
       encoder->Control(AV1E_SET_TRANSFER_CHARACTERISTICS,
                        encode_parms.transfer_characteristics);
@@ -130,7 +129,9 @@
 
   virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
     if (encode_parms.lossless) {
-      EXPECT_EQ(kMaxPsnr, pkt->data.psnr.psnr[0]);
+      const double lossless_psnr =
+          get_lossless_psnr(test_video_.width, test_video_.height, 8, false);
+      EXPECT_EQ(lossless_psnr, pkt->data.psnr.psnr[0]);
     }
   }
 
diff --git a/test/binary_codes_test.cc b/test/binary_codes_test.cc
index 68cb79c..1f99f20 100644
--- a/test/binary_codes_test.cc
+++ b/test/binary_codes_test.cc
@@ -25,8 +25,6 @@
 #include "aom_dsp/binary_codes_reader.h"
 #include "aom_dsp/binary_codes_writer.h"
 
-#define ACCT_STR __func__
-
 using libaom_test::ACMRandom;
 
 namespace {
@@ -73,7 +71,7 @@
           assert(k == enc_values[n][k][r][v][1]);
           const uint16_t ref = enc_values[n][k][r][v][2];
           const uint16_t value =
-              aom_read_primitive_refsubexpfin(&br, range, k, ref, ACCT_STR);
+              aom_read_primitive_refsubexpfin(&br, range, k, ref, ACCT_INFO());
           GTEST_ASSERT_EQ(value, enc_values[n][k][r][v][3]);
         }
       }
diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc
index 9c9eac8..983afd9 100644
--- a/test/boolcoder_test.cc
+++ b/test/boolcoder_test.cc
@@ -78,7 +78,7 @@
           } else if (bit_method == 3) {
             bit = bit_rnd(2);
           }
-          GTEST_ASSERT_EQ(aom_read(&br, probas[i], NULL), bit)
+          GTEST_ASSERT_EQ(aom_read(&br, probas[i], {}), bit)
               << "pos: " << i << " / " << kBitsToTest
               << " bit_method: " << bit_method << " method: " << method;
         }
@@ -105,25 +105,26 @@
     aom_reader br;
     aom_reader_init(&br, bw_buffer, bw.pos);
     uint32_t last_tell = aom_reader_tell(&br);
-    uint32_t last_tell_frac = aom_reader_tell_frac(&br);
+    uint64_t last_tell_frac = aom_reader_tell_frac(&br);
     double frac_diff_total = 0;
     GTEST_ASSERT_GE(aom_reader_tell(&br), 0u);
     GTEST_ASSERT_LE(aom_reader_tell(&br), 1u);
     ASSERT_FALSE(aom_reader_has_overflowed(&br));
     for (int i = 0; i < kSymbols; i++) {
-      aom_read(&br, p, NULL);
+      aom_read(&br, p, {});
       uint32_t tell = aom_reader_tell(&br);
-      uint32_t tell_frac = aom_reader_tell_frac(&br);
+      uint64_t tell_frac = aom_reader_tell_frac(&br);
       GTEST_ASSERT_GE(tell, last_tell)
           << "tell: " << tell << ", last_tell: " << last_tell;
       GTEST_ASSERT_GE(tell_frac, last_tell_frac)
           << "tell_frac: " << tell_frac
           << ", last_tell_frac: " << last_tell_frac;
       // Frac tell should round up to tell.
-      GTEST_ASSERT_EQ(tell, (tell_frac + 7) >> 3);
+      GTEST_ASSERT_EQ(tell, (tell_frac + (1 << OD_BITRES) - 1) >> OD_BITRES);
       last_tell = tell;
       frac_diff_total +=
-          fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability));
+          fabs(((tell_frac - last_tell_frac) / (double)(1 << OD_BITRES)) +
+               log2(probability));
       last_tell_frac = tell_frac;
     }
     const uint32_t expected = (uint32_t)(-kSymbols * log2(probability));
@@ -152,7 +153,7 @@
     aom_reader_init(&br, bw_buffer, bw.pos);
     ASSERT_FALSE(aom_reader_has_overflowed(&br));
     for (int i = 0; i < kSymbols; i++) {
-      GTEST_ASSERT_EQ(aom_read(&br, p, NULL), 1);
+      GTEST_ASSERT_EQ(aom_read(&br, p, {}), 1);
       ASSERT_FALSE(aom_reader_has_overflowed(&br));
     }
     // In the worst case, the encoder uses just a tiny fraction of the last
@@ -171,11 +172,11 @@
     // additional bits; therefore the number of reads should be increased;
     // 174 * 8 will be enough to consume more than this number of bits.
     for (int i = 0; i < 174 * 8; i++) {
-      aom_read(&br, p, NULL);
+      aom_read(&br, p, {});
     }
 #else
     for (int i = 0; i < 174; i++) {
-      aom_read(&br, p, NULL);
+      aom_read(&br, p, {});
     }
 #endif
     ASSERT_TRUE(aom_reader_has_overflowed(&br));
diff --git a/test/cpu_speed_test.cc b/test/cpu_speed_test.cc
index bddd8ab..e9984ad 100644
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -19,15 +19,13 @@
 
 namespace {
 
-const int kMaxPSNR = 100;
-
 class CpuSpeedTest
     : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
       public ::libaom_test::EncoderTest {
  protected:
   CpuSpeedTest()
       : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
-        set_cpu_used_(GET_PARAM(2)), min_psnr_(kMaxPSNR),
+        set_cpu_used_(GET_PARAM(2)), min_psnr_(DBL_MAX),
         tune_content_(AOM_CONTENT_DEFAULT) {}
   virtual ~CpuSpeedTest() {}
 
@@ -38,7 +36,7 @@
     cfg_.rc_end_usage = AOM_VBR;
   }
 
-  virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = kMaxPSNR; }
+  virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = DBL_MAX; }
 
   virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
                                   ::libaom_test::Encoder *encoder) {
@@ -75,14 +73,19 @@
   cfg_.rc_target_bitrate = 400;
   cfg_.rc_max_quantizer = 0;
   cfg_.rc_min_quantizer = 0;
+  const unsigned int width = 208;
+  const unsigned int height = 144;
+  const unsigned int bit_depth = 8;
 
-  ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       10);
+  ::libaom_test::I420VideoSource video("hantro_odd.yuv", width, height, 30, 1,
+                                       0, 10);
 
   init_flags_ = AOM_CODEC_USE_PSNR;
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  EXPECT_GE(min_psnr_, kMaxPSNR);
+  const double lossless_psnr =
+      get_lossless_psnr(width, height, bit_depth, false);
+  EXPECT_EQ(min_psnr_, lossless_psnr);
 }
 
 void CpuSpeedTest::TestScreencastQ0() {
@@ -91,11 +94,17 @@
   cfg_.rc_target_bitrate = 400;
   cfg_.rc_max_quantizer = 0;
   cfg_.rc_min_quantizer = 0;
+  const unsigned int width = 640;
+  const unsigned int height = 480;
+  const unsigned int bit_depth = 8;
 
   init_flags_ = AOM_CODEC_USE_PSNR;
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  EXPECT_GE(min_psnr_, kMaxPSNR);
+
+  const double lossless_psnr =
+      get_lossless_psnr(width, height, bit_depth, false);
+  EXPECT_EQ(min_psnr_, lossless_psnr);
 }
 
 void CpuSpeedTest::TestTuneScreen() {
diff --git a/test/decode_perf_test.cc b/test/decode_perf_test.cc
index 5e5380e..4a85357 100644
--- a/test/decode_perf_test.cc
+++ b/test/decode_perf_test.cc
@@ -148,7 +148,11 @@
   }
 
   virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    out_frames_ += pkt->data.frame.frame_count;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     ++out_frames_;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
 
     // Write initial file header if first frame.
     if (pkt->data.frame.pts == 0)
diff --git a/test/ec_test.cc b/test/ec_test.cc
index d89c991..6b30c66 100644
--- a/test/ec_test.cc
+++ b/test/ec_test.cc
@@ -44,7 +44,7 @@
 #if CONFIG_BYPASS_IMPROVEMENT
     unsigned *mode;
 #endif  // CONFIG_BYPASS_IMPROVEMENT
-    unsigned *tell;
+    unsigned long *tell;
     unsigned *enc_method;
     int j;
     sz = rand() / ((RAND_MAX >> (rand() % 9U)) + 1U);
@@ -54,7 +54,7 @@
 #if CONFIG_BYPASS_IMPROVEMENT
     mode = (unsigned *)malloc(sz * sizeof(*mode));
 #endif  // CONFIG_BYPASS_IMPROVEMENT
-    tell = (unsigned *)malloc((sz + 1) * sizeof(*tell));
+    tell = (unsigned long *)malloc((sz + 1) * sizeof(*tell));
     enc_method = (unsigned *)malloc(sz * sizeof(*enc_method));
     od_ec_enc_reset(&enc);
     tell[0] = od_ec_enc_tell_frac(&enc);
@@ -139,7 +139,7 @@
     EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[0])
         << "od_ec_dec_tell() mismatch between encoder and decoder "
            "at symbol 0: "
-        << (unsigned)od_ec_dec_tell_frac(&dec) << " instead of " << tell[0]
+        << (unsigned long)od_ec_dec_tell_frac(&dec) << " instead of " << tell[0]
         << " (Random seed: " << seed << ").\n";
     for (j = 0; j < sz; j++) {
       int dec_method;
@@ -208,7 +208,7 @@
       EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[j + 1])
           << "od_ec_dec_tell() mismatch between encoder and "
              "decoder at symbol "
-          << j + 1 << ": " << (unsigned)od_ec_dec_tell_frac(&dec)
+          << j + 1 << ": " << (unsigned long)od_ec_dec_tell_frac(&dec)
           << " instead of " << tell[j + 1] << " (Random seed: " << seed
           << ").\n";
     }
diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc
index 2d191cd..74045a4 100644
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -202,6 +202,10 @@
     number_spatial_layers_ = GetNumSpatialLayers();
 
     bool again;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    unsigned int rec_frame_cnt = 0;
+    unsigned int failed_frame_cnt = 0;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     for (again = true; again; video->Next()) {
       again = (video->img() != NULL);
 
@@ -236,6 +240,9 @@
                 if (!HandleDecodeResult(res_dec, decoder.get())) break;
 
                 has_dxdata = true;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+                rec_frame_cnt += pkt->data.frame.frame_count;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
               }
               ASSERT_GE(pkt->data.frame.pts, last_pts_);
               if (sl == number_spatial_layers_) last_pts_ = pkt->data.frame.pts;
@@ -260,7 +267,22 @@
             }
           }
           if (img_dec) DecompressedFrameHook(*img_dec, video->pts());
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+          failed_frame_cnt = 0;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
         }
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        // Continue the encoding process, when an empty packet is received
+        // by skipping OBU with show_existing_frame == 1) and
+        // no longer input frames are remained due to lag_in frames.
+        // However the consecutive(10) packets are empty/failed, stop the
+        // encoding.
+        else if (rec_frame_cnt < video->limit() && !again &&
+                 failed_frame_cnt < 10) {
+          again = true;
+          failed_frame_cnt++;
+        }
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
         if (!Continue()) break;
       }  // Loop over spatial layers
     }
diff --git a/test/encodetxb_test.cc b/test/encodetxb_test.cc
index 58f1711..84ebbad 100644
--- a/test/encodetxb_test.cc
+++ b/test/encodetxb_test.cc
@@ -61,7 +61,7 @@
     libaom_test::ClearSystemState();
   }
 
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
   void GetNzMapContextsRun() {
     const int kNumTests = 10;
     int result = 0;
@@ -149,7 +149,7 @@
              (elapsed_time_ref * 1.0) / (elapsed_time * 1.0));
     }
   }
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
 
  private:
   void InitDataWithEob(const int16_t *const scan, const int bwl,
@@ -193,7 +193,7 @@
 };
 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(EncodeTxbTest);
 
-#if !CONFIG_ATC_COEFCODING
+#if !CONFIG_ATC
 TEST_P(EncodeTxbTest, GetNzMapContexts) { GetNzMapContextsRun(); }
 
 TEST_P(EncodeTxbTest, DISABLED_SpeedTestGetNzMapContexts) {
@@ -209,7 +209,7 @@
 INSTANTIATE_TEST_SUITE_P(NEON, EncodeTxbTest,
                          ::testing::Values(av1_get_nz_map_contexts_neon));
 #endif
-#endif  // !CONFIG_ATC_COEFCODING
+#endif  // !CONFIG_ATC
 
 typedef void (*av1_txb_init_levels_func)(const tran_low_t *const coeff,
                                          const int width, const int height,
diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc
index cd9ca58..dc63465 100644
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -82,6 +82,9 @@
     if (video->frame() == 0) {
       encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
       encoder->Control(AOME_SET_ENABLEAUTOALTREF, enable_altref_);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
     frame_flags_ &= ~(AOM_EFLAG_NO_REF_FRAME_MVS | AOM_EFLAG_ERROR_RESILIENT |
                       AOM_EFLAG_NO_UPD_ALL | AOM_EFLAG_SET_S_FRAME |
@@ -502,6 +505,9 @@
                                   ::libaom_test::Encoder *encoder) {
     if (video->frame() == 0) {
       encoder->Control(AOME_SET_CPUUSED, 5);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       if (rc_end_usage_ == AOM_Q) {
         encoder->Control(AOME_SET_QP, 210);
       }
diff --git a/test/fwd_kf_test.cc b/test/fwd_kf_test.cc
index db429da..6c0050b 100644
--- a/test/fwd_kf_test.cc
+++ b/test/fwd_kf_test.cc
@@ -26,7 +26,7 @@
 } FwdKfTestParam;
 
 const FwdKfTestParam kTestParams[] = {
-  { 4, 31.1 },  { 6, 31.1 },  { 8, 32.6 },
+  { 4, 31.1 },  { 6, 31.1 },  { 8, 32.3 },
   { 12, 31.7 }, { 16, 32.3 }, { 18, 32.1 }
 };
 
@@ -183,7 +183,7 @@
   is_fwd_kf_present_ = 0;
   libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                      cfg_.g_timebase.den, cfg_.g_timebase.num,
-                                     0, 150);
+                                     0, 60);
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
   ASSERT_EQ(is_fwd_kf_present_, 1);
 }
diff --git a/test/gf_pyr_height_test.cc b/test/gf_pyr_height_test.cc
index 7b8c8e3..8cfd996 100644
--- a/test/gf_pyr_height_test.cc
+++ b/test/gf_pyr_height_test.cc
@@ -68,19 +68,19 @@
 }
 
 // Params: encoding mode, rate control mode and GFPyrHeightTestParam object.
-class GFPyrHeightTest
+class GFPyrHeightTestLarge
     : public ::libaom_test::CodecTestWith3Params<
           libaom_test::TestMode, aom_rc_mode, GFPyrHeightTestParam>,
       public ::libaom_test::EncoderTest {
  protected:
-  GFPyrHeightTest()
+  GFPyrHeightTestLarge()
       : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
         rc_mode_(GET_PARAM(2)) {
     gf_min_pyr_height_ = GET_PARAM(3).gf_min_pyr_height;
     gf_max_pyr_height_ = GET_PARAM(3).gf_max_pyr_height;
     psnr_threshold_ = GET_PARAM(3).psnr_thresh;
   }
-  virtual ~GFPyrHeightTest() {}
+  virtual ~GFPyrHeightTestLarge() {}
 
   virtual void SetUp() {
     InitializeConfig();
@@ -139,7 +139,7 @@
   double psnr_;
 };
 
-TEST_P(GFPyrHeightTest, EncodeAndVerifyPSNR) {
+TEST_P(GFPyrHeightTestLarge, EncodeAndVerifyPSNR) {
   libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                      cfg_.g_timebase.den, cfg_.g_timebase.num,
                                      0, 32);
@@ -149,7 +149,7 @@
       << "GF Max Pyramid Height = " << gf_max_pyr_height_;
 }
 
-AV1_INSTANTIATE_TEST_SUITE(GFPyrHeightTest, GOODQUALITY_TEST_MODES,
+AV1_INSTANTIATE_TEST_SUITE(GFPyrHeightTestLarge, GOODQUALITY_TEST_MODES,
                            ::testing::Values(AOM_Q, AOM_VBR),
                            ::testing::ValuesIn(kTestParams));
 }  // namespace
diff --git a/test/horz_superres_test.cc b/test/horz_superres_test.cc
index c7f1e54..be61740 100644
--- a/test/horz_superres_test.cc
+++ b/test/horz_superres_test.cc
@@ -54,9 +54,9 @@
 
 const TestVideoParam kTestVideoVectors[] = {
   { "park_joy_90p_8_420.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 5, 0, 25.0,
-    44.7 },
+    43.0 },
   { "park_joy_90p_10_444.y4m", AOM_IMG_FMT_I44416, AOM_BITS_10, 1, 5, 0, 28.0,
-    47.7 },
+    47.5 },
   { "screendata.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 4, 1, 16.0, 56.0 },
   // Image coding (single frame).
   { "niklas_1280_720_30.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 1, 0, 25.0,
diff --git a/test/kf_test.cc b/test/kf_test.cc
index 4e290e3..4a2910c 100644
--- a/test/kf_test.cc
+++ b/test/kf_test.cc
@@ -73,6 +73,20 @@
     }
   }
 
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+    if (kf_dist_ != -1) {
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      kf_dist_ += pkt->data.frame.frame_count;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      (void)pkt;
+      ++kf_dist_;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      if (kf_dist_ > (int)kf_dist_param_.max_kf_dist) {
+        is_kf_interval_violated_ = true;
+      }
+    }
+  }
+
   virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
                                   libaom_test::Decoder *decoder) {
     EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
@@ -81,12 +95,6 @@
       int frame_flags = 0;
       AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_FRAME_FLAGS,
                                     &frame_flags);
-      if (kf_dist_ != -1) {
-        kf_dist_++;
-        if (kf_dist_ > (int)kf_dist_param_.max_kf_dist) {
-          is_kf_interval_violated_ = true;
-        }
-      }
       if ((frame_flags & AOM_FRAME_IS_KEY) ==
           static_cast<aom_codec_frame_flags_t>(AOM_FRAME_IS_KEY)) {
         if (kf_dist_ != -1 && kf_dist_ < (int)kf_dist_param_.min_kf_dist) {
@@ -172,7 +180,9 @@
           is_kf_placement_violated_ = true;
         }
       }
+#if !CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       ++frame_num_;
+#endif  // !CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
     return AOM_CODEC_OK == res_dec;
   }
diff --git a/test/level_test.cc b/test/level_test.cc
index 7c053d1..b5e0869 100644
--- a/test/level_test.cc
+++ b/test/level_test.cc
@@ -26,21 +26,21 @@
 const int kLevelKeepStats = 24;
 // Speed settings tested
 static const int kCpuUsedVectors[] = {
-  1,
   2,
   3,
   4,
+  5,
 };
 
-class LevelTest
+class LevelTestLarge
     : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>,
       public ::libaom_test::EncoderTest {
  protected:
-  LevelTest()
+  LevelTestLarge()
       : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
         cpu_used_(GET_PARAM(2)), target_level_(31) {}
 
-  virtual ~LevelTest() {}
+  virtual ~LevelTestLarge() {}
 
   virtual void SetUp() {
     InitializeConfig();
@@ -70,7 +70,7 @@
   int level_[32];
 };
 
-TEST_P(LevelTest, TestTargetLevelApi) {
+TEST_P(LevelTestLarge, TestTargetLevelApi) {
   static aom_codec_iface_t *codec = &aom_codec_av1_cx_algo;
   aom_codec_ctx_t enc;
   aom_codec_enc_cfg_t cfg;
@@ -96,7 +96,7 @@
   EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc));
 }
 
-TEST_P(LevelTest, TestTargetLevel19) {
+TEST_P(LevelTestLarge, TestTargetLevel19) {
   std::unique_ptr<libaom_test::VideoSource> video;
   video.reset(new libaom_test::Y4mVideoSource("park_joy_90p_8_420.y4m", 0, 10));
   ASSERT_TRUE(video.get() != NULL);
@@ -105,9 +105,9 @@
   ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
 }
 
-TEST_P(LevelTest, TestLevelMonitoringLowBitrate) {
+TEST_P(LevelTestLarge, TestLevelMonitoringLowBitrate) {
   // To save run time, we only test speed 4.
-  if (cpu_used_ == 4) {
+  if (cpu_used_ == 5) {
     libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 40);
     target_level_ = kLevelKeepStats;
@@ -118,22 +118,23 @@
   }
 }
 
-TEST_P(LevelTest, TestLevelMonitoringHighBitrate) {
+TEST_P(LevelTestLarge, TestLevelMonitoringHighBitrate) {
   // To save run time, we only test speed 4.
-  if (cpu_used_ == 4) {
+  if (cpu_used_ == 5) {
+    const int num_frames = 17;
     libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 40);
+                                       30, 1, 0, num_frames);
     target_level_ = kLevelKeepStats;
     cfg_.rc_target_bitrate = 4000;
-    cfg_.g_limit = 40;
+    cfg_.g_limit = num_frames;
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
     ASSERT_EQ(level_[0], 4);
   }
 }
 
-TEST_P(LevelTest, TestTargetLevel0) {
+TEST_P(LevelTestLarge, TestTargetLevel0) {
   // To save run time, we only test speed 4.
-  if (cpu_used_ == 4) {
+  if (cpu_used_ == 5) {
     libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                        30, 1, 0, 50);
     const int target_level = 0;
@@ -144,7 +145,7 @@
   }
 }
 
-AV1_INSTANTIATE_TEST_SUITE(LevelTest,
+AV1_INSTANTIATE_TEST_SUITE(LevelTestLarge,
                            ::testing::Values(::libaom_test::kOnePassGood),
                            ::testing::ValuesIn(kCpuUsedVectors));
 }  // namespace
diff --git a/test/lossless_test.cc b/test/lossless_test.cc
index a5cf3d7..69f3ff4 100644
--- a/test/lossless_test.cc
+++ b/test/lossless_test.cc
@@ -22,15 +22,13 @@
 
 namespace {
 
-const int kMaxPsnr = 100;
-
 class LosslessTestLarge
     : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode,
                                                  aom_rc_mode>,
       public ::libaom_test::EncoderTest {
  protected:
   LosslessTestLarge()
-      : EncoderTest(GET_PARAM(0)), psnr_(kMaxPsnr), nframes_(0),
+      : EncoderTest(GET_PARAM(0)), psnr_(DBL_MAX), nframes_(0),
         encoding_mode_(GET_PARAM(1)), rc_end_usage_(GET_PARAM(2)) {}
 
   virtual ~LosslessTestLarge() {}
@@ -53,7 +51,7 @@
   }
 
   virtual void BeginPassHook(unsigned int /*pass*/) {
-    psnr_ = kMaxPsnr;
+    psnr_ = DBL_MAX;
     nframes_ = 0;
   }
 
@@ -95,11 +93,17 @@
   init_flags_ = AOM_CODEC_USE_PSNR;
 
   // intentionally changed the dimension for better testing coverage
-  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 5);
+  const unsigned int width = 352;
+  const unsigned int height = 288;
+  const unsigned int bit_depth = 8;
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", width,
+                                     height, timebase.den, timebase.num, 0, 5);
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const double psnr_lossless = GetMinPsnr();
-  EXPECT_GE(psnr_lossless, kMaxPsnr);
+
+  const double min_psnr = GetMinPsnr();
+  const double lossless_psnr =
+      get_lossless_psnr(width, height, bit_depth, false);
+  EXPECT_EQ(min_psnr, lossless_psnr);
 }
 
 TEST_P(LosslessTestLarge, TestLossLessEncoding444) {
@@ -115,8 +119,10 @@
   init_flags_ = AOM_CODEC_USE_PSNR;
 
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const double psnr_lossless = GetMinPsnr();
-  EXPECT_GE(psnr_lossless, kMaxPsnr);
+
+  const double min_psnr = GetMinPsnr();
+  const double lossless_psnr = get_lossless_psnr(352, 288, 8, true);
+  EXPECT_EQ(min_psnr, lossless_psnr);
 }
 
 TEST_P(LosslessTestLarge, TestLossLessEncodingCtrl) {
@@ -131,11 +137,17 @@
 
   init_flags_ = AOM_CODEC_USE_PSNR;
 
-  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 5);
+  const unsigned int width = 352;
+  const unsigned int height = 288;
+  const unsigned int bit_depth = 8;
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", width,
+                                     height, timebase.den, timebase.num, 0, 5);
   ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const double psnr_lossless = GetMinPsnr();
-  EXPECT_GE(psnr_lossless, kMaxPsnr);
+
+  const double min_psnr = GetMinPsnr();
+  const double lossless_psnr =
+      get_lossless_psnr(width, height, bit_depth, false);
+  EXPECT_EQ(min_psnr, lossless_psnr);
 }
 
 AV1_INSTANTIATE_TEST_SUITE(LosslessTestLarge, GOODQUALITY_TEST_MODES,
diff --git a/test/resize_test.cc b/test/resize_test.cc
index f4fc0ee..46a17a1 100644
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -279,8 +279,11 @@
 
 #if WRITE_COMPRESSED_STREAM
   virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    out_frames_ += pkt->data.frame.frame_count;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     ++out_frames_;
-
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     // Write initial file header if first frame.
     if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_);
 
@@ -372,6 +375,9 @@
     if (video->frame() == 0) {
       encoder->Control(AOME_SET_CPUUSED, cpu_used_);
       encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
   }
 
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 340e1e8..fd21490 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -1495,8 +1495,10 @@
   make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 8),
   make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 8),
   make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 8),
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 8),
   make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 8),
+#endif  // !CONFIG_UNEVEN_4WAY
   make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 8),
   make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 8),
   make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 8),
@@ -1508,8 +1510,10 @@
   make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 10),
   make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 10),
   make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 10),
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 10),
   make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 10),
+#endif  // !CONFIG_UNEVEN_4WAY
   make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 10),
   make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 10),
   make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 10),
@@ -1521,8 +1525,10 @@
   make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 12),
   make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 12),
   make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 12),
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 12),
   make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 12),
+#endif  // !CONFIG_UNEVEN_4WAY
   make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 12),
   make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 12),
   make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 12),
@@ -1543,11 +1549,13 @@
   make_tuple(32, 8, &aom_highbd_sad32x8_sse2, 12),
   make_tuple(8, 32, &aom_highbd_sad8x32_sse2, 12),
 
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 8),
-  make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 8),
   make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 10),
-  make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 10),
   make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 12),
+#endif  // !CONFIG_UNEVEN_4WAY
+  make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 8),
+  make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 10),
   make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 12),
 };
 INSTANTIATE_TEST_SUITE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
@@ -1559,8 +1567,10 @@
   make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 8),
   make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 8),
   make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 8),
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 8),
   make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 8),
+#endif  // !CONFIG_UNEVEN_4WAY
   make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 8),
   make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 8),
   make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 8),
@@ -1576,8 +1586,10 @@
   make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 10),
   make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 10),
   make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 10),
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 10),
   make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 10),
+#endif  // !CONFIG_UNEVEN_4WAY
   make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 10),
   make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 10),
   make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 10),
@@ -1593,8 +1605,10 @@
   make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 12),
   make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 12),
   make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 12),
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 12),
   make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 12),
+#endif  // !CONFIG_UNEVEN_4WAY
   make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 12),
   make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 12),
   make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 12),
@@ -1614,8 +1628,10 @@
   make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 8),
   make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 8),
   make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 8),
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 8),
   make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 8),
+#endif  // !CONFIG_UNEVEN_4WAY
   make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 8),
   make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 8),
   make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 8),
@@ -1627,8 +1643,10 @@
   make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 10),
   make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 10),
   make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 10),
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 10),
   make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 10),
+#endif  // !CONFIG_UNEVEN_4WAY
   make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 10),
   make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 10),
   make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 10),
@@ -1640,8 +1658,10 @@
   make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 12),
   make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 12),
   make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 12),
+#if !CONFIG_UNEVEN_4WAY
   make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 12),
   make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 12),
+#endif  // !CONFIG_UNEVEN_4WAY
   make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 12),
   make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 12),
   make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 12),
diff --git a/test/scan_test.cc b/test/scan_test.cc
index cf3c317..e356c50 100644
--- a/test/scan_test.cc
+++ b/test/scan_test.cc
@@ -110,7 +110,7 @@
       SCAN_MODE scan_mode;
       TX_CLASS tx_class = tx_type_to_class[(TX_TYPE)tx_type];
       if (tx_class == TX_CLASS_2D) {
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
         scan_mode = SCAN_MODE_COL_DIAG;
 #else
         if (rows == cols) {
@@ -120,7 +120,7 @@
         } else {
           scan_mode = SCAN_MODE_COL_DIAG;
         }
-#endif  // CONFIG_ATC_COEFCODING
+#endif  // CONFIG_ATC
       } else if (tx_class == TX_CLASS_VERT) {
         scan_mode = SCAN_MODE_ROW_1D;
       } else {
diff --git a/test/sef_test.cc b/test/sef_test.cc
new file mode 100644
index 0000000..e1bfa72
--- /dev/null
+++ b/test/sef_test.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2023, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 3-Clause Clear License
+ * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear
+ * License was not distributed with this source code in the LICENSE file, you
+ * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/.  If the
+ * Alliance for Open Media Patent License 1.0 was not distributed with this
+ * source code in the PATENTS file, you can obtain it at
+ * aomedia.org/license/patent-license/.
+ */
+
+#include <ostream>
+
+#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+static const struct SEFTestParam {
+  int enable_frame_output_order_derivation;
+  double psnr_thresh;
+} sefTestParams[] = {
+  // enable_frame_output_order_derivation = 0
+  { 0, 30.0 },
+  // enable_frame_output_order_derivation = 1
+  { 1, 30.0 },
+};
+
+// Compiler may decide to add some padding to the struct above for alignment,
+// which the gtest may try to print (on error for example). This would cause
+// valgrind to complain that the padding is uninitialized. To avoid that, we
+// provide our own function to print the struct.
+// This also makes '--gtest_list_tests' output more understandable.
+std::ostream &operator<<(std::ostream &os, const SEFTestParam &p) {
+  os << "SEFTestParam { "
+     << "frame_output_order_derivation = "
+     << p.enable_frame_output_order_derivation << ", "
+     << "psnr_thresh = " << p.psnr_thresh << " }";
+  return os;
+}
+
+// Params: encoding mode, rate control mode and SEFTestParam object.
+class SEFTest
+    : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode,
+                                                 aom_rc_mode, SEFTestParam>,
+      public ::libaom_test::EncoderTest {
+ protected:
+  SEFTest()
+      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
+        rc_mode_(GET_PARAM(2)) {
+    enable_frame_output_order_derivation_ =
+        GET_PARAM(3).enable_frame_output_order_derivation;
+    psnr_threshold_ = GET_PARAM(3).psnr_thresh;
+  }
+  virtual ~SEFTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    const aom_rational timebase = { 1, 30 };
+    cfg_.g_timebase = timebase;
+    cpu_used_ = 4;
+    cfg_.rc_end_usage = rc_mode_;
+    cfg_.g_lag_in_frames = 19;
+    cfg_.g_threads = 0;
+    init_flags_ = AOM_CODEC_USE_PSNR;
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
+                                  ::libaom_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(AOME_SET_CPUUSED, cpu_used_);
+      if (rc_mode_ == AOM_Q) {
+        encoder->Control(AOME_SET_QP, 210);
+      }
+      encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(AOME_SET_ARNR_STRENGTH, 5);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION,
+                       enable_frame_output_order_derivation_);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_) return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() { return psnr_threshold_; }
+
+  ::libaom_test::TestMode encoding_mode_;
+  aom_rc_mode rc_mode_;
+  int enable_frame_output_order_derivation_;
+  double psnr_threshold_;
+  int cpu_used_;
+  int nframes_;
+  double psnr_;
+};
+
+TEST_P(SEFTest, TestShowExistingFrame) {
+  libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     cfg_.g_timebase.den, cfg_.g_timebase.num,
+                                     0, 32);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_GT(GetAveragePsnr(), GetPsnrThreshold())
+      << "Frame output order derivation = "
+      << enable_frame_output_order_derivation_ << ", ";
+}
+
+AV1_INSTANTIATE_TEST_SUITE(SEFTest, GOODQUALITY_TEST_MODES,
+                           ::testing::Values(AOM_Q),
+                           ::testing::ValuesIn(sefTestParams));
+}  // namespace
diff --git a/test/subgop_test.cc b/test/subgop_test.cc
index c77b5c0..be62bd3 100644
--- a/test/subgop_test.cc
+++ b/test/subgop_test.cc
@@ -62,6 +62,9 @@
 // low delay config without references
 extern "C" const char subgop_config_str_ld[];
 
+const int kCpuUsed = 5;
+const unsigned int kFrames = 70;
+
 typedef enum {
   DEFAULT,
   ENHANCE,
@@ -88,7 +91,6 @@
   int max_gf_interval;
   int frame_w;
   int frame_h;
-  int cpu_used;
   int lag_in_frames;
   int use_fixed_qp_offsets;
 } SubgopTestParams;
@@ -104,69 +106,69 @@
 static const SubgopTestParams SubGopTestVectors[] = {
   // Default subgop config
   { subgop_config_str_preset_map[DEFAULT].preset_tag,
-    "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 5, 35, 0 },
+    "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 35, 0 },
   { subgop_config_str_preset_map[DEFAULT].preset_tag, "desktop1.320_180.yuv", 0,
-    16, 320, 180, 5, 35, 0 },
+    16, 320, 180, 35, 0 },
   { subgop_config_str_preset_map[DEFAULT].preset_tag,
-    "pixel_capture_w320h240.yuv", 16, 16, 320, 240, 5, 35, 1 },
+    "pixel_capture_w320h240.yuv", 16, 16, 320, 240, 35, 1 },
   { subgop_config_str_preset_map[DEFAULT].preset_tag,
-    "hantro_collage_w352h288.yuv", 0, 32, 352, 288, 5, 35, 0 },
+    "hantro_collage_w352h288.yuv", 0, 32, 352, 288, 35, 0 },
   { subgop_config_str_preset_map[DEFAULT].preset_tag,
-    "pixel_capture_w320h240.yuv", 32, 32, 320, 240, 5, 35, 1 },
+    "pixel_capture_w320h240.yuv", 32, 32, 320, 240, 35, 1 },
 
   // Enhanced subgop config
   { subgop_config_str_preset_map[ENHANCE].preset_tag, "niklas_640_480_30.yuv",
-    0, 15, 640, 480, 5, 35, 0 },
+    0, 15, 640, 480, 35, 0 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag, "paris_352_288_30.y4m", 0,
-    6, 352, 288, 5, 35, 0 },
+    6, 352, 288, 35, 0 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag,
-    "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 5, 35, 0 },
+    "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 35, 0 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag,
-    "pixel_capture_w320h240.yuv", 0, 12, 320, 240, 5, 35, 0 },
+    "pixel_capture_w320h240.yuv", 0, 12, 320, 240, 35, 0 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag, "niklas_1280_720_30.y4m",
-    0, 11, 1280, 720, 5, 35, 0 },
+    0, 11, 1280, 720, 35, 0 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag, "screendata.y4m", 0, 16,
-    640, 480, 5, 35, 0 },
+    640, 480, 35, 0 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag,
-    "pixel_capture_w320h240.yuv", 0, 14, 320, 240, 5, 35, 0 },
+    "pixel_capture_w320h240.yuv", 0, 14, 320, 240, 35, 0 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag, "desktop1.320_180.yuv", 0,
-    10, 320, 180, 5, 35, 0 },
+    10, 320, 180, 35, 0 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag, "paris_352_288_30.y4m", 0,
-    13, 352, 288, 5, 35, 0 },
+    13, 352, 288, 35, 0 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag,
-    "pixel_capture_w320h240.yuv", 0, 8, 320, 240, 5, 35, 0 },
+    "pixel_capture_w320h240.yuv", 0, 8, 320, 240, 35, 0 },
 
   // Asymmetric subgop config
   { subgop_config_str_preset_map[ASYMMETRIC].preset_tag,
-    "pixel_capture_w320h240.yuv", 0, 16, 320, 240, 5, 35, 0 },
+    "pixel_capture_w320h240.yuv", 0, 16, 320, 240, 35, 0 },
   { subgop_config_str_preset_map[ASYMMETRIC].preset_tag, "desktop1.320_180.yuv",
-    0, 16, 320, 180, 5, 35, 0 },
+    0, 16, 320, 180, 35, 0 },
 
   // Temporal scalable subgop config
   { subgop_config_str_preset_map[TEMPORAL_SCALABLE].preset_tag,
-    "pixel_capture_w320h240.yuv", 0, 16, 320, 240, 5, 35, 0 },
+    "pixel_capture_w320h240.yuv", 0, 16, 320, 240, 35, 0 },
   { subgop_config_str_preset_map[TEMPORAL_SCALABLE].preset_tag,
-    "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 5, 35, 0 },
+    "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 35, 0 },
 
   // Low delay subgop config
   { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "paris_352_288_30.y4m",
-    0, 16, 352, 288, 5, 0, 0 },
+    0, 16, 352, 288, 0, 0 },
   { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "desktop1.320_180.yuv",
-    16, 16, 320, 180, 5, 0, 1 },
+    16, 16, 320, 180, 0, 1 },
   { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "paris_352_288_30.y4m",
-    0, 32, 352, 288, 5, 0, 0 },
+    0, 32, 352, 288, 0, 0 },
   { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "desktop1.320_180.yuv",
-    32, 32, 320, 180, 5, 0, 1 },
+    32, 32, 320, 180, 0, 1 },
 
   // Non-default subgop config
   { subgop_config_str_nondef[0], "pixel_capture_w320h240.yuv", 0, 4, 320, 240,
-    5, 35, 0 },
-  { subgop_config_str_nondef[0], "desktop1.320_180.yuv", 0, 5, 320, 180, 5, 35,
+    35, 0 },
+  { subgop_config_str_nondef[0], "desktop1.320_180.yuv", 0, 5, 320, 180, 35,
     0 },
   { subgop_config_str_nondef[0], "pixel_capture_w320h240.yuv", 0, 7, 320, 240,
-    5, 35, 0 },
+    35, 0 },
   { subgop_config_str_nondef[0], "hantro_collage_w352h288.yuv", 0, 9, 352, 288,
-    5, 35, 0 },
+    35, 0 },
 };
 
 std::ostream &operator<<(std::ostream &os, const SubgopTestParams &test_arg) {
@@ -175,8 +177,7 @@
             << " min_gf_interval:" << test_arg.min_gf_interval
             << " max_gf_interval:" << test_arg.max_gf_interval
             << " frame_width:" << test_arg.frame_w
-            << " frame_height:" << test_arg.frame_h
-            << " cpu_used:" << test_arg.cpu_used
+            << " frame_height:" << test_arg.frame_h << " cpu_used:" << kCpuUsed
             << " lag_in_frames:" << test_arg.lag_in_frames
             << " use_fixed_qp_offsets:" << test_arg.use_fixed_qp_offsets
             << " }";
@@ -229,7 +230,7 @@
   virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
                                   ::libaom_test::Encoder *encoder) {
     if (video->frame() == 0) {
-      encoder->Control(AOME_SET_CPUUSED, subgop_test_params_.cpu_used);
+      encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
       if (rc_end_usage_ == AOM_Q) {
         encoder->Control(AOME_SET_QP, 210);
       }
@@ -262,7 +263,6 @@
     ResetSubgop();
     is_first_frame_in_subgop_key_ = 0;
     frames_from_key_ = 0;
-    frame_num_ = 0;
     enable_subgop_stats_ = 1;
     memset(&subgop_last_step_, 0, sizeof(subgop_last_step_));
   }
@@ -420,12 +420,18 @@
   // Validates frametype(along with temporal filtering), frame coding order
   bool ValidateSubgopFrametype() {
     for (int idx = 0; idx < subgop_cfg_ref_->num_steps; idx++) {
-      EXPECT_EQ(subgop_cfg_ref_->step[idx].disp_frame_idx,
-                subgop_cfg_test_.step[idx].disp_frame_idx)
-          << "Error:display_index doesn't match";
-      EXPECT_EQ(subgop_cfg_ref_->step[idx].type_code,
-                subgop_cfg_test_.step[idx].type_code)
-          << "Error:frame type doesn't match";
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      if (subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        EXPECT_EQ(subgop_cfg_ref_->step[idx].disp_frame_idx,
+                  subgop_cfg_test_.step[idx].disp_frame_idx)
+            << "Error:display_index doesn't match";
+        EXPECT_EQ(subgop_cfg_ref_->step[idx].type_code,
+                  subgop_cfg_test_.step[idx].type_code)
+            << "Error:frame type doesn't match";
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      }
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
     return 1;
   }
@@ -438,12 +444,18 @@
         max_pyramid_level = subgop_cfg_ref_->step[idx].pyr_level;
     }
     for (int idx = 0; idx < subgop_cfg_ref_->num_steps; idx++) {
-      int8_t ref_pyramid_level =
-          (subgop_cfg_ref_->step[idx].pyr_level == max_pyramid_level)
-              ? MAX_ARF_LAYERS
-              : subgop_cfg_ref_->step[idx].pyr_level;
-      EXPECT_EQ(subgop_cfg_test_.step[idx].pyr_level, ref_pyramid_level)
-          << "Error:pyramid level doesn't match";
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      if (subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        int8_t ref_pyramid_level =
+            (subgop_cfg_ref_->step[idx].pyr_level == max_pyramid_level)
+                ? MAX_ARF_LAYERS
+                : subgop_cfg_ref_->step[idx].pyr_level;
+        EXPECT_EQ(subgop_cfg_test_.step[idx].pyr_level, ref_pyramid_level)
+            << "Error:pyramid level doesn't match";
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      }
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
   }
 
@@ -454,13 +466,19 @@
     int pyramid_level;
     for (int idx = 0; idx < subgop_cfg_ref_->num_steps; idx++) {
       pyramid_level = subgop_cfg_test_.step[idx].pyr_level;
-      if (level_qindex[pyramid_level] < 0) {
-        level_qindex[pyramid_level] = subgop_data_.step[idx].qindex;
-      } else if (!subgop_data_.step[idx].show_existing_frame &&
-                 !subgop_data_.step[idx].is_filtered) {
-        EXPECT_EQ(level_qindex[pyramid_level], subgop_data_.step[idx].qindex)
-            << "Error:qindex in a pyramid level doesn't match";
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      if (subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        if (level_qindex[pyramid_level] < 0) {
+          level_qindex[pyramid_level] = subgop_data_.step[idx].qindex;
+        } else if (!subgop_data_.step[idx].show_existing_frame &&
+                   !subgop_data_.step[idx].is_filtered) {
+          EXPECT_EQ(level_qindex[pyramid_level], subgop_data_.step[idx].qindex)
+              << "Error:qindex in a pyramid level doesn't match";
+        }
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       }
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
     for (pyramid_level = 1; pyramid_level <= MAX_ARF_LAYERS; pyramid_level++) {
       if (level_qindex[pyramid_level] >= 0) {
@@ -486,13 +504,22 @@
       int refresh_frame_flags = curr_step_data->refresh_frame_flags;
       // Validates user-defined refresh_flag with decoder
       if (subgop_cfg_ref_->step[idx].refresh != -1 &&
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+          subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) {
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
           !curr_step_data->show_existing_frame) {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
         EXPECT_EQ(subgop_cfg_ref_->step[idx].refresh,
                   (int8_t)refresh_frame_flags)
             << "Error: refresh flag mismatch";
       }
       // Validates reference picture management w.r.t refresh_flags
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      if (refresh_frame_flags &&
+          subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) {
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       if (refresh_frame_flags && !curr_step_data->show_existing_frame) {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
         for (int mask = refresh_frame_flags; mask; mask >>= 1) {
           if (mask & 1)
             EXPECT_EQ(curr_step_data->disp_frame_idx,
@@ -521,7 +548,11 @@
       unsigned int *ref_frame_map =
           (idx > 0) ? subgop_data_.step[idx - 1].ref_frame_map
                     : subgop_last_step_.ref_frame_map;
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      if (subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) {
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       if (!subgop_data_.step[idx].show_existing_frame) {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
         EXPECT_EQ(subgop_cfg_ref_->step[idx].num_references,
                   subgop_cfg_test_.step[idx].num_references)
             << "Error:Reference frames count doesn't match";
@@ -530,7 +561,13 @@
       // config.
       for (int ref = 0; ref < subgop_cfg_test_.step[idx].num_references;
            ref++) {
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+        if (subgop_cfg_ref_->step[idx].type_code !=
+                FRAME_TYPE_INO_SHOWEXISTING &&
+            subgop_data_.step[idx].is_valid_ref_frame[ref]) {
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
         if (subgop_data_.step[idx].is_valid_ref_frame[ref]) {
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
           EXPECT_EQ(subgop_cfg_ref_->step[idx].references[ref],
                     subgop_cfg_test_.step[idx].references[ref])
               << "Error:Reference frame level doesn't match";
@@ -574,12 +611,21 @@
     }
   }
 
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    frame_num_in_subgop_ += pkt->data.frame.frame_count;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    (void)pkt;
+    ++frame_num_in_subgop_;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  }
+
   virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
                                   libaom_test::Decoder *decoder) {
     EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
     if (AOM_CODEC_OK != res_dec) return 0;
     aom_codec_ctx_t *ctx_dec = decoder->GetDecoder();
-    frame_num_in_subgop_++;
+
     int is_last_frame_in_subgop = (frame_num_in_subgop_ == subgop_info_.size);
 
     if (subgop_info_.is_user_specified ||
@@ -614,7 +660,6 @@
       }
       ResetSubgop();
     }
-    frame_num_++;
     return AOM_CODEC_OK == res_dec;
   }
 
@@ -642,59 +687,62 @@
     libaom_test::I420VideoSource video(
         subgop_test_params_.input_file, subgop_test_params_.frame_w,
         subgop_test_params_.frame_h, cfg_.g_timebase.den, cfg_.g_timebase.num,
-        0, 200);
+        0, kFrames);
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
   } else {
-    ::libaom_test::Y4mVideoSource video(subgop_test_params_.input_file, 0, 200);
+    ::libaom_test::Y4mVideoSource video(subgop_test_params_.input_file, 0,
+                                        kFrames);
     ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
   }
 }
 
 AV1_INSTANTIATE_TEST_SUITE(SubGopTestLarge,
                            ::testing::ValuesIn(SubGopTestVectors),
-                           ::testing::Values(AOM_Q, AOM_VBR, AOM_CQ, AOM_CBR));
+                           ::testing::Values(AOM_Q, AOM_VBR
+                                             // Disabled to reduce combinations.
+                                             //, AOM_CQ, AOM_CBR
+                                             ));
 
 typedef struct {
   const char *subgop_str;
   const char *input_file;
   int frame_w;
   int frame_h;
-  int cpu_used;
   int lag_in_frames;
 } SubgopPsnrTestParams;
 
 static const SubgopPsnrTestParams SubGopPsnrTestVectors[] = {
   { subgop_config_str_preset_map[DEFAULT].preset_tag,
-    "hantro_collage_w352h288.yuv", 352, 288, 3, 35 },
+    "hantro_collage_w352h288.yuv", 352, 288, 35 },
   { subgop_config_str_preset_map[DEFAULT].preset_tag, "desktop1.320_180.yuv",
-    320, 180, 5, 35 },
+    320, 180, 35 },
 
   { subgop_config_str_preset_map[ENHANCE].preset_tag,
-    "hantro_collage_w352h288.yuv", 352, 288, 3, 35 },
+    "hantro_collage_w352h288.yuv", 352, 288, 35 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag,
-    "pixel_capture_w320h240.yuv", 320, 240, 5, 35 },
+    "pixel_capture_w320h240.yuv", 320, 240, 35 },
   // TODO(any): Enable after fix
   /* { subgop_config_str_preset_map[ENHANCE].preset_tag, "paris_352_288_30.y4m",
-     352, 288, 3, 35 },
+     352, 288, 35 },
      { subgop_config_str_preset_map[ENHANCE].preset_tag, "screendata.y4m", 640,
-     480, 5, 35 },
+     480, 35 },
      { subgop_config_str_preset_map[ENHANCE].preset_tag, "paris_352_288_30.y4m",
-     352, 288, 5, 35 }, */
+     352, 288, 35 }, */
 
   { subgop_config_str_preset_map[ASYMMETRIC].preset_tag,
-    "pixel_capture_w320h240.yuv", 320, 240, 5, 35 },
+    "pixel_capture_w320h240.yuv", 320, 240, 35 },
   // TODO(any): Enable after fix
   /* { subgop_config_str_preset_map[ASYMMETRIC].preset_tag,
-    "desktop1.320_180.yuv", 320, 180, 3, 35 }, */
+    "desktop1.320_180.yuv", 320, 180, 35 }, */
 
   { subgop_config_str_preset_map[TEMPORAL_SCALABLE].preset_tag,
-    "hantro_collage_w352h288.yuv", 352, 288, 5, 35 },
+    "hantro_collage_w352h288.yuv", 352, 288, 35 },
 
   // TODO(any): Enable after fix
   /* { subgop_config_str_preset_map[LOW_DELAY].preset_tag,
-     "paris_352_288_30.y4m", 352, 288, 5, 0 },
+     "paris_352_288_30.y4m", 352, 288, 0 },
      { subgop_config_str_preset_map[LOW_DELAY].preset_tag,
-     "desktop1.320_180.yuv", 320, 180, 3, 0 }, */
+     "desktop1.320_180.yuv", 320, 180, 0 }, */
 };
 
 std::ostream &operator<<(std::ostream &os,
@@ -702,8 +750,7 @@
   return os << "SubgopPsnrTestParams { sub_gop_config:" << test_arg.subgop_str
             << " source_file:" << test_arg.input_file
             << " frame_width:" << test_arg.frame_w
-            << " frame_height:" << test_arg.frame_h
-            << " cpu_used:" << test_arg.cpu_used
+            << " frame_height:" << test_arg.frame_h << " cpu_used:" << kCpuUsed
             << " lag_in_frames:" << test_arg.lag_in_frames << " }";
 }
 
@@ -748,7 +795,10 @@
   virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
                                   ::libaom_test::Encoder *encoder) {
     if (video->frame() == 0) {
-      encoder->Control(AOME_SET_CPUUSED, test_params_.cpu_used);
+      encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
+      if (rc_end_usage_ == AOM_Q) {
+        encoder->Control(AOME_SET_QP, 210);
+      }
       if (enable_subgop_)
         encoder->Control(AV1E_SET_SUBGOP_CONFIG_STR, test_params_.subgop_str);
     }
@@ -764,8 +814,7 @@
 
 TEST_P(SubGopPSNRCheckTestLarge, SubGopPSNRCheck) {
   std::unique_ptr<libaom_test::VideoSource> video;
-  const unsigned int kFrames = 100;
-  const double psnr_diff_thresh = 0.3;
+  const double psnr_diff_thresh = 0.5;
   if (is_extension_y4m(test_params_.input_file)) {
     video.reset(
         new libaom_test::Y4mVideoSource(test_params_.input_file, 0, kFrames));
@@ -800,7 +849,6 @@
   const char *input_file;
   int frame_w;
   int frame_h;
-  int cpu_used;
   int lag_in_frames;
   int max_gf_interval;
 } SubGopSwitchTestParams;
@@ -810,30 +858,29 @@
   return os << "SubGopSwitchTestParams { sub_gop_config:" << test_arg.subgop_str
             << " source_file:" << test_arg.input_file
             << " frame_width:" << test_arg.frame_w
-            << " frame_height:" << test_arg.frame_h
-            << " cpu_used:" << test_arg.cpu_used
+            << " frame_height:" << test_arg.frame_h << " cpu_used:" << kCpuUsed
             << " lag_in_frames:" << test_arg.lag_in_frames
             << " max_gf_interval:" << test_arg.max_gf_interval << " }";
 }
 
 static const SubGopSwitchTestParams SubgopSwitchTestVectors[] = {
   { subgop_config_str_preset_map[DEFAULT].preset_tag, "niklas_640_480_30.yuv",
-    640, 480, 5, 35, 16 },
+    640, 480, 35, 16 },
   /* TODO(sarahparker/debargha): Enable after adding default 32 subgop config.
    { subgop_config_str_preset_map[DEFAULT].preset_tag, "niklas_640_480_30.yuv",
-    640, 480, 5, 35, 32 },*/
+    640, 480, 35, 32 },*/
   { subgop_config_str_preset_map[ENHANCE].preset_tag, "desktop1.320_180.yuv",
-    320, 180, 3, 35, 16 },
+    320, 180, 35, 16 },
   { subgop_config_str_preset_map[ENHANCE].preset_tag,
-    "hantro_collage_w352h288.yuv", 352, 288, 5, 35, 16 },
+    "hantro_collage_w352h288.yuv", 352, 288, 35, 16 },
   { subgop_config_str_preset_map[ASYMMETRIC].preset_tag,
-    "pixel_capture_w320h240.yuv", 320, 240, 3, 35, 16 },
+    "pixel_capture_w320h240.yuv", 320, 240, 35, 16 },
   { subgop_config_str_preset_map[TEMPORAL_SCALABLE].preset_tag,
-    "paris_352_288_30.y4m", 352, 288, 3, 35, 16 },
+    "paris_352_288_30.y4m", 352, 288, 35, 16 },
   { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "screendata.y4m", 640,
-    480, 5, 0, 16 },
+    480, 0, 16 },
   { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "screendata.y4m", 640,
-    480, 5, 0, 32 },
+    480, 0, 32 },
 };
 
 using libaom_test::ACMRandom;
@@ -897,18 +944,26 @@
     // Set max gf interval
     if (subgop_str) encoder->Control(AV1E_SET_MAX_GF_INTERVAL, max_gf_interval);
 
+    // Keep min gf interval same as max gf interval in most cases, to ensure
+    // that user-provided subgop config is used.
+    int min_gf_interval = max_gf_interval;
+    // In case of no subgop config / enhanced subgop config, test arbitrary gf
+    // intervals by setting a lower min gf interval.
+    if (!subgop_str || !strcmp(subgop_str, "enh")) min_gf_interval = 6;
+
+    // Set min gf interval
+    encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_gf_interval);
+
     last_subgop_str_ = subgop_str;
   }
 
   virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video,
                                   ::libaom_test::Encoder *encoder) {
     if (video->frame() == 0) {
-      encoder->Control(AOME_SET_CPUUSED, test_params_.cpu_used);
+      encoder->Control(AOME_SET_CPUUSED, kCpuUsed);
       if (rc_end_usage_ == AOM_Q) {
         encoder->Control(AOME_SET_QP, 210);
       }
-      // Set min gf interval
-      encoder->Control(AV1E_SET_MIN_GF_INTERVAL, 6);
       set_subgop_config(encoder);
     }
 
@@ -943,13 +998,20 @@
     return 1;
   }
 
+  virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) {
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    frame_num_in_subgop_ += pkt->data.frame.frame_count;
+#else   // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+    (void)pkt;
+    ++frame_num_in_subgop_;
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+  }
+
   virtual bool HandleDecodeResult(const aom_codec_err_t res_dec,
                                   libaom_test::Decoder *decoder) {
     EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError();
     if (AOM_CODEC_OK != res_dec) return 0;
 
-    frame_num_in_subgop_++;
-
     return AOM_CODEC_OK == res_dec;
   }
   SubGopSwitchTestParams test_params_;
@@ -966,8 +1028,6 @@
 
 TEST_P(SubGopSwitchingTestLarge, SubGopSwitching) {
   std::unique_ptr<libaom_test::VideoSource> video;
-  const unsigned int kFrames = 175;
-
   if (is_extension_y4m(test_params_.input_file)) {
     video.reset(
         new libaom_test::Y4mVideoSource(test_params_.input_file, 0, kFrames));
diff --git a/test/test.cmake b/test/test.cmake
index a402306..7642949 100644
--- a/test/test.cmake
+++ b/test/test.cmake
@@ -132,6 +132,7 @@
       "${AOM_ROOT}/test/segment_binarization_sync.cc"
       "${AOM_ROOT}/test/still_picture_test.cc"
       "${AOM_ROOT}/test/subgop_test.cc"
+      "${AOM_ROOT}/test/sef_test.cc"
       "${AOM_ROOT}/test/superframe_test.cc"
       "${AOM_ROOT}/test/tile_config_test.cc"
       "${AOM_ROOT}/test/tile_independence_test.cc"
diff --git a/test/tile_config_test.cc b/test/tile_config_test.cc
index 2bed6cb..8588c53 100644
--- a/test/tile_config_test.cc
+++ b/test/tile_config_test.cc
@@ -324,6 +324,9 @@
                        tile_group_config_params_.num_tile_cols);
       encoder->Control(AV1E_SET_TILE_ROWS,
                        tile_group_config_params_.num_tile_rows);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
     }
   }
 
diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc
index aca8c8d..9a6689d 100644
--- a/test/tile_independence_test.cc
+++ b/test/tile_independence_test.cc
@@ -62,6 +62,9 @@
     if (video->frame() == 0) {
       encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_);
       encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_);
+#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
+      encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0);
+#endif  // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT
       SetCpuUsed(encoder);
     } else if (video->frame() == 3) {
       encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_);
diff --git a/test/util.h b/test/util.h
index e311207..e0ffe04 100644
--- a/test/util.h
+++ b/test/util.h
@@ -22,27 +22,49 @@
 // Macros
 #define GET_PARAM(k) std::get<k>(GetParam())
 
+// Same as 'aom_sse_to_psnr'.
+inline double sse_to_psnr(double samples, double peak, double sse) {
+  static const double kMinSSE = 0.5;
+  const bool zero_sse = (sse < kMinSSE);
+  if (zero_sse) sse = kMinSSE;
+  assert(sse > 0.0);
+  double psnr = 10.0 * log10(samples * peak * peak / sse);
+  if (zero_sse) psnr = ceil(psnr);
+  return psnr;
+}
+
 inline double compute_psnr(const aom_image_t *img1, const aom_image_t *img2) {
   assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) &&
          (img1->d_h == img2->d_h));
 
   const unsigned int width_y = img1->d_w;
   const unsigned int height_y = img1->d_h;
-  unsigned int i, j;
 
-  int64_t sqrerr = 0;
-  for (i = 0; i < height_y; ++i)
-    for (j = 0; j < width_y; ++j) {
-      int64_t d = img1->planes[AOM_PLANE_Y][i * img1->stride[AOM_PLANE_Y] + j] -
-                  img2->planes[AOM_PLANE_Y][i * img2->stride[AOM_PLANE_Y] + j];
-      sqrerr += d * d;
+  double sse = 0;
+  for (unsigned int i = 0; i < height_y; ++i) {
+    for (unsigned int j = 0; j < width_y; ++j) {
+      const double d =
+          img1->planes[AOM_PLANE_Y][i * img1->stride[AOM_PLANE_Y] + j] -
+          img2->planes[AOM_PLANE_Y][i * img2->stride[AOM_PLANE_Y] + j];
+      sse += d * d;
     }
-  double mse = static_cast<double>(sqrerr) / (width_y * height_y);
-  double psnr = 100.0;
-  if (mse > 0.0) {
-    psnr = 10 * log10(255.0 * 255.0 / mse);
   }
-  return psnr;
+  return sse_to_psnr(width_y * height_y, 255.0, sse);
+}
+
+// Returns the expected total PSNR for the zero distortion case, based on frame
+// dimensions.
+// If `is_yuv444` is true: assumes YUV4:4:4 format, otherwise assumes YUV4:2:0.
+inline double get_lossless_psnr(unsigned int width, unsigned int height,
+                                unsigned int bit_depth, bool is_yuv444) {
+#if CONFIG_AV2CTC_PSNR_PEAK
+  const double peak = (double)(255 << (bit_depth - 8));
+#else
+  const double peak = (double)((1 << in_bit_depth) - 1);
+#endif  // CONFIG_AV2CTC_PSNR_PEAK
+  const double y_samples = width * height;
+  const double uv_samples = is_yuv444 ? 2 * y_samples : 2 * y_samples / 4;
+  return sse_to_psnr(y_samples + uv_samples, peak, 0);
 }
 
 static INLINE double get_time_mark(aom_usec_timer *t) {
diff --git a/test/variance_test.cc b/test/variance_test.cc
index df15fcd..4e1942a 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -1436,9 +1436,11 @@
 const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = {
 #if CONFIG_BLOCK_256X256
 //  SubpelVarianceParams(8, 8, &aom_highbd_12_sub_pixel_variance256x256_avx2,
-//  12), SubpelVarianceParams(8, 7,
-//  &aom_highbd_12_sub_pixel_variance256x128_avx2, 12), SubpelVarianceParams(7,
-//  8, &aom_highbd_12_sub_pixel_variance128x256_avx2, 12),
+//  12),
+//  SubpelVarianceParams(8, 7, &aom_highbd_12_sub_pixel_variance256x128_avx2,
+//  12),
+//  SubpelVarianceParams(7, 8, &aom_highbd_12_sub_pixel_variance128x256_avx2,
+//  12),
 #endif  // CONFIG_BLOCK_256X256
   // SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_avx2,
   // 12),
@@ -1528,8 +1530,10 @@
   SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12),
   SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12),
   SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12),
+#if !CONFIG_UNEVEN_4WAY
   SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12),
   SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12),
+#endif  // !CONFIG_UNEVEN_4WAY
   SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12),
   SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12),
   SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12),
@@ -1542,8 +1546,10 @@
   SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10),
   SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10),
   SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10),
+#if !CONFIG_UNEVEN_4WAY
   SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10),
   SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10),
+#endif  // !CONFIG_UNEVEN_4WAY
   SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10),
   SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10),
   SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10),
@@ -1556,8 +1562,10 @@
   SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8),
   SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8),
   SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8),
+#if !CONFIG_UNEVEN_4WAY
   SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8),
   SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8),
+#endif  // !CONFIG_UNEVEN_4WAY
   SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8),
   SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8),
   SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8),
@@ -1566,19 +1574,25 @@
   SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12),
   SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12),
   SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12),
+#if !CONFIG_UNEVEN_4WAY
   SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12),
+#endif  // !CONFIG_UNEVEN_4WAY
   // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12),
   SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10),
   SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10),
   SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10),
   SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10),
+#if !CONFIG_UNEVEN_4WAY
   SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10),
+#endif  // !CONFIG_UNEVEN_4WAY
   // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10),
   SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8),
   SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8),
   SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8),
   SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8),
+#if !CONFIG_UNEVEN_4WAY
   SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8),
+#endif  // !CONFIG_UNEVEN_4WAY
   // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8),
 };
 INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest,
@@ -1597,10 +1611,12 @@
                           12),
   SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2,
                           12),
+#if !CONFIG_UNEVEN_4WAY
   SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2,
                           12),
   SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2,
                           12),
+#endif  // !CONFIG_UNEVEN_4WAY
   SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2,
                           12),
   SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2,
@@ -1619,10 +1635,12 @@
                           10),
   SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2,
                           10),
+#if !CONFIG_UNEVEN_4WAY
   SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2,
                           10),
   SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2,
                           10),
+#endif  // !CONFIG_UNEVEN_4WAY
   SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2,
                           10),
   SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2,
@@ -1641,10 +1659,12 @@
                           8),
   SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2,
                           8),
+#if !CONFIG_UNEVEN_4WAY
   SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2,
                           8),
   SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2,
                           8),
+#endif  // !CONFIG_UNEVEN_4WAY
   SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2,
                           8),
   SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2,
@@ -1660,8 +1680,10 @@
                           12),
   SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2,
                           12),
+#if !CONFIG_UNEVEN_4WAY
   SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2,
                           12),
+#endif  // !CONFIG_UNEVEN_4WAY
   // SubpelAvgVarianceParams(2, 4,
   // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12),
   SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2,
@@ -1672,8 +1694,10 @@
                           10),
   SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2,
                           10),
+#if !CONFIG_UNEVEN_4WAY
   SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2,
                           10),
+#endif  // !CONFIG_UNEVEN_4WAY
   // SubpelAvgVarianceParams(2, 4,
   // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10),
   SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2,
@@ -1684,8 +1708,10 @@
                           8),
   SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2,
                           8),
+#if !CONFIG_UNEVEN_4WAY
   SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2,
                           8),
+#endif  // !CONFIG_UNEVEN_4WAY
   // SubpelAvgVarianceParams(2, 4,
   // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8),
 };
diff --git a/tools/aom_entropy_optimizer.c b/tools/aom_entropy_optimizer.c
index 11b187c..c71fb75 100644
--- a/tools/aom_entropy_optimizer.c
+++ b/tools/aom_entropy_optimizer.c
@@ -420,6 +420,15 @@
                    "default_uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES]"
                    "[CDF_SIZE(UV_INTRA_MODES)]");
 
+#if CONFIG_EXT_DIR
+  /* MRL index */
+  cts_each_dim[0] = MRL_INDEX_CONTEXTS;
+  cts_each_dim[1] = MRL_LINE_NUMBER;
+  optimize_cdf_table(&fc.mrl_index[0][0], probsfile, 2, cts_each_dim,
+                     "static const aom_cdf_prob default_mrl_index_cdf"
+                     "[MRL_INDEX_CONTEXTS][CDF_SIZE(MRL_LINE_NUMBER)]");
+#endif  // CONFIG_EXT_DIR
+
 #if CONFIG_CROSS_CHROMA_TX
   /* cctx type */
   cts_each_dim[0] = EXT_TX_SIZES;
@@ -461,6 +470,26 @@
                      "static aom_cdf_prob default_do_ext_partition_cdf"
                      "[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_"
                      "CONTEXTS][CDF_SIZE(2)]");
+#if CONFIG_UNEVEN_4WAY
+  cts_each_dim[0] = PARTITION_STRUCTURE_NUM;
+  cts_each_dim[1] = NUM_RECT_PARTS;
+  cts_each_dim[2] = PARTITION_CONTEXTS;
+  cts_each_dim[3] = 2;
+  optimize_cdf_table(&fc.do_uneven_4way_partition[0][0][0][0], probsfile, 4,
+                     cts_each_dim,
+                     "static aom_cdf_prob default_do_uneven_4way_partition_cdf"
+                     "[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_"
+                     "CONTEXTS][CDF_SIZE(2)]");
+  cts_each_dim[0] = PARTITION_STRUCTURE_NUM;
+  cts_each_dim[1] = NUM_RECT_PARTS;
+  cts_each_dim[2] = PARTITION_CONTEXTS;
+  cts_each_dim[3] = NUM_UNEVEN_4WAY_PARTS;
+  optimize_cdf_table(
+      &fc.uneven_4way_partition_type[0][0][0][0], probsfile, 4, cts_each_dim,
+      "static aom_cdf_prob default_uneven_4way_partition_type_cdf"
+      "[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_"
+      "CONTEXTS][CDF_SIZE(NUM_UNEVEN_4WAY_PARTS)]");
+#endif  // CONFIG_UNEVEN_4WAY
 #else
   /* block partition */
   cts_each_dim[0] = PARTITION_STRUCTURE_NUM;
@@ -481,7 +510,7 @@
   cts_each_dim[1] = EXT_TX_SIZES;
   cts_each_dim[2] = INTRA_MODES;
   cts_each_dim[3] = TX_TYPES;
-#if CONFIG_ATC_NEWTXSETS
+#if CONFIG_ATC
   int intra_ext_tx_types_each_ctx[EXT_TX_SETS_INTRA] = { 0, INTRA_TX_SET1 };
   optimize_cdf_table_var_modes_4d(
       &fc.intra_ext_tx[0][0][0][0], probsfile, 4, cts_each_dim,
@@ -498,6 +527,19 @@
       "[EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)]");
 #endif
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  cts_each_dim[0] = EXT_TX_SETS_INTER;
+  cts_each_dim[1] = EOB_TX_CTXS;
+  cts_each_dim[2] = EXT_TX_SIZES;
+  cts_each_dim[3] = TX_TYPES;
+  int inter_ext_tx_types_each_ctx[EXT_TX_SETS_INTER] = { 0, 16, 12, 2 };
+  optimize_cdf_table_var_modes_4d(
+      &fc.inter_ext_tx[0][0][0][0], probsfile, 4, cts_each_dim,
+      inter_ext_tx_types_each_ctx,
+      "static const aom_cdf_prob "
+      "default_inter_ext_tx_cdf[EXT_TX_SETS_INTER][EOB_TX_CTXS]"
+      "[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)]");
+#else
   cts_each_dim[0] = EXT_TX_SETS_INTER;
   cts_each_dim[1] = EXT_TX_SIZES;
   cts_each_dim[2] = TX_TYPES;
@@ -507,6 +549,7 @@
       inter_ext_tx_types_each_ctx,
       "static const aom_cdf_prob default_inter_ext_tx_cdf[EXT_TX_SETS_INTER]"
       "[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)]");
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
   /* Chroma from Luma */
 #if CONFIG_IMPROVED_CFL
@@ -699,7 +742,7 @@
                      "default_bawp_cdf[CDF_SIZE(2)]");
 #endif
   /* Intra/inter flag */
-#if CONFIG_CONTEXT_DERIVATION
+#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   cts_each_dim[0] = INTRA_INTER_SKIP_TXFM_CONTEXTS;
   cts_each_dim[1] = INTRA_INTER_CONTEXTS;
   cts_each_dim[2] = 2;
@@ -714,7 +757,7 @@
       &fc.intra_inter[0][0], probsfile, 2, cts_each_dim,
       "static const aom_cdf_prob\n"
       "default_intra_inter_cdf[INTRA_INTER_CONTEXTS][CDF_SIZE(2)]");
-#endif  // CONFIG_CONTEXT_DERIVATION
+#endif  // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT
   /* Single/comp ref flag */
   cts_each_dim[0] = COMP_INTER_CONTEXTS;
   cts_each_dim[1] = 2;
@@ -877,7 +920,7 @@
       "static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)]");
 #endif  // CONFIG_NEW_CONTEXT_MODELING
 
-#if CONFIG_BVP_IMPROVEMENT
+#if CONFIG_IBC_BV_IMPROVEMENT
   /* intrabc mode flag*/
   cts_each_dim[0] = 2;
   optimize_cdf_table(&fc.intrabc_mode[0], probsfile, 1, cts_each_dim,
@@ -962,6 +1005,17 @@
                      "av1_default_idtx_sign_cdfs[TOKEN_CDF_Q_CTXS]"
                      "[IDTX_SIGN_CONTEXTS][CDF_SIZE(2)]");
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
+  cts_each_dim[1] = SIG_COEF_CONTEXTS_BOB;
+  cts_each_dim[2] = NUM_BASE_LEVELS + 1;
+  optimize_cdf_table(
+      &fc.coeff_base_bob_multi[0][0][0], probsfile, 3, cts_each_dim,
+      "static const aom_cdf_prob av1_default_coeff_base_bob_multi_cdfs"
+      "[TOKEN_CDF_Q_CTXS][SIG_COEF_CONTEXTS_BOB]"
+      "[CDF_SIZE(NUM_BASE_LEVELS + 1)]");
+#endif  // CONFIG_ATC_DCTX_ALIGNED
+
 #if CONFIG_CONTEXT_DERIVATION
   cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
   cts_each_dim[1] = V_TXB_SKIP_CONTEXTS;
@@ -983,6 +1037,62 @@
       "[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]"
       "[CDF_SIZE(2)]");
 
+#if CONFIG_ATC_DCTX_ALIGNED
+  cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = EOB_MAX_SYMS - 6;
+  optimize_cdf_table(
+      &fc.eob_multi16[0][0][0], probsfile, 3, cts_each_dim,
+      "static const aom_cdf_prob av1_default_eob_multi16_cdfs"
+      "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 6)]");
+
+  cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = EOB_MAX_SYMS - 5;
+  optimize_cdf_table(
+      &fc.eob_multi32[0][0][0], probsfile, 3, cts_each_dim,
+      "static const aom_cdf_prob av1_default_eob_multi32_cdfs"
+      "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 5)]");
+
+  cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = EOB_MAX_SYMS - 4;
+  optimize_cdf_table(
+      &fc.eob_multi64[0][0][0], probsfile, 3, cts_each_dim,
+      "static const aom_cdf_prob av1_default_eob_multi64_cdfs"
+      "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 4)]");
+
+  cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = EOB_MAX_SYMS - 3;
+  optimize_cdf_table(
+      &fc.eob_multi128[0][0][0], probsfile, 3, cts_each_dim,
+      "static const aom_cdf_prob av1_default_eob_multi128_cdfs"
+      "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 3)]");
+
+  cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = EOB_MAX_SYMS - 2;
+  optimize_cdf_table(
+      &fc.eob_multi256[0][0][0], probsfile, 3, cts_each_dim,
+      "static const aom_cdf_prob av1_default_eob_multi256_cdfs"
+      "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 2)]");
+
+  cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = EOB_MAX_SYMS - 1;
+  optimize_cdf_table(
+      &fc.eob_multi512[0][0][0], probsfile, 3, cts_each_dim,
+      "static const aom_cdf_prob av1_default_eob_multi512_cdfs"
+      "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 1)]");
+
+  cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
+  cts_each_dim[1] = PLANE_TYPES;
+  cts_each_dim[2] = EOB_MAX_SYMS;
+  optimize_cdf_table(&fc.eob_multi1024[0][0][0], probsfile, 3, cts_each_dim,
+                     "static const aom_cdf_prob av1_default_eob_multi1024_cdfs"
+                     "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS)]");
+#else
   cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
   cts_each_dim[1] = PLANE_TYPES;
   cts_each_dim[2] = 2;
@@ -1038,8 +1148,9 @@
   optimize_cdf_table(&fc.eob_multi1024[0][0][0][0], probsfile, 4, cts_each_dim,
                      "static const aom_cdf_prob av1_default_eob_multi1024_cdfs"
                      "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(11)]");
+#endif  // CONFIG_ATC_DCTX_ALIGNED
 
-#if CONFIG_ATC_COEFCODING
+#if CONFIG_ATC
   cts_each_dim[0] = TOKEN_CDF_Q_CTXS;
   cts_each_dim[1] = TX_SIZES;
   cts_each_dim[2] = PLANE_TYPES;
diff --git a/tools/convexhull_framework/bin/AOM_CWG_AS_CTC_v9.7.1.xlsm b/tools/convexhull_framework/bin/AOM_CWG_AS_CTC_v9.7.1.xlsm
new file mode 100644
index 0000000..ce27081
--- /dev/null
+++ b/tools/convexhull_framework/bin/AOM_CWG_AS_CTC_v9.7.1.xlsm
Binary files differ
diff --git a/tools/convexhull_framework/bin/AOM_CWG_Regular_CTCv4_v7.3.2.xlsm b/tools/convexhull_framework/bin/AOM_CWG_Regular_CTCv4_v7.3.2.xlsm
new file mode 100644
index 0000000..6311b9b
--- /dev/null
+++ b/tools/convexhull_framework/bin/AOM_CWG_Regular_CTCv4_v7.3.2.xlsm
Binary files differ
diff --git a/tools/convexhull_framework/src/AV2CTCProgress.py b/tools/convexhull_framework/src/AV2CTCProgress.py
index 97ef091..f422cb5 100644
--- a/tools/convexhull_framework/src/AV2CTCProgress.py
+++ b/tools/convexhull_framework/src/AV2CTCProgress.py
@@ -22,6 +22,7 @@
 import matplotlib.pyplot as plt
 from matplotlib.backends.backend_pdf import PdfPages
 from CalcBDRate import BD_RATE
+from itertools import cycle
 
 qtys = ["psnr_y", "psnr_u", "psnr_v", "overall_psnr", "ssim_y", "ms_ssim_y",
         "vmaf", "vmaf_neg", "psnr_hvs","ciede2k", "apsnr_y", "apsnr_u",
@@ -50,51 +51,19 @@
 csv_files = {
     "v1.0.0":
     {
-        "AI":     "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv",
-        "LD":     "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv",
-        "RA":     "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv",
-        "Still":  "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv",
-        "AS":     "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv",
+        "AI":     "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv",
+        "LD":     "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv",
+        "RA":     "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv",
+        "Still":  "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv",
+        "AS":     "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv",
     },
-    "v1.0.1":
+    "v4.0.0":
     {
-        "AI":     "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv",
-        "LD":     "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv",
-        "RA":     "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv",
-        "Still":  "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv",
-        "AS":     "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv",
-    },
-    "B034":
-    {
-        "AI":     "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv",
-        "LD":     "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv",
-        "RA":     "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv",
-        "Still":  "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv",
-        "AS":     "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv",
-    },
-    "ext-quant":
-    {
-        "AI":     "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv",
-        "LD":     "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv",
-        "RA":     "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv",
-        "Still":  "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv",
-        "AS":     "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv",
-    },
-    "sdp-off":
-    {
-        "AI":     "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv",
-        "LD":     "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv",
-        "RA":     "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv",
-        "Still":  "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv",
-        "AS":     "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv",
-    },
-    "sdp-on":
-    {
-        "AI":     "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv",
-        "LD":     "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv",
-        "RA":     "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv",
-        "Still":  "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv",
-        "AS":     "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv",
+        "AI":     "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv",
+        "LD":     "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv",
+        "RA":     "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv",
+        "Still":  "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv",
+        "AS":     "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv",
     },
 }
 
@@ -108,15 +77,7 @@
 
 formats = {
     "v1.0.0":       ['r', '-', 'o'],
-    "v1.0.1":       ['g', '-', '*'],
-    "B034":         ['k', '-', '^'],
-    "ext-quant":    ['r', '-', '*'],
-    "sdp-off":      ['b', '-', '+'],
-    "sdp-on":       ['r', '-', '<'],
-    "HM_CloseGOP":     ['r', '-', 'o'],
-    "HM_OpenGOP":      ['b', '-', '+'],
-    "AV1_CloseGOP":     ['g', '-', '>'],
-    "AV1_OpenGOP":      ['k', '-', '*'],
+    "v4.0.0":       ['g', '-', '*'],
 }
 
 AS_formats = {
@@ -130,6 +91,8 @@
 
 anchor = "v1.0.0"
 rd_curve_pdf = "rdcurve.pdf"
+colors = cycle('bgrycmk')
+markers = cycle('o*^+<x')
 
 def WriteSheet(csv_file, sht, start_row):
     csv = open(csv_file, 'rt')
@@ -179,7 +142,7 @@
 
                 wb.save(xls_file)
 
-def DrawRDCurve(records, anchor, pdf):
+def DrawIndividualRDCurve(records, anchor, pdf):
     with PdfPages(pdf) as export_pdf:
         for cfg in records[anchor].keys():
             videos = records[anchor][cfg].keys()
@@ -212,7 +175,7 @@
                             Int_RDPoints[tag] += int_rdpnts
                             plot_rd_curve(br[res], apsnr[res], "overall_apsnr", res, "bitrate(Kbps)",
                                           AS_formats[res][0], AS_formats[res][1], AS_formats[res][2])
-                        plt.legend()
+                        plt.legend(loc='lower right')
                         plt.grid(True)
                         export_pdf.savefig()
                         plt.close()
@@ -224,9 +187,9 @@
                         lower, upper = convex_hull(Int_RDPoints[tag])
                         br    = [h[0] for h in upper]
                         apsnr = [h[1] for h in upper]
-                        plot_rd_curve(br, apsnr, "overall_apsnr", tag, "bitrate(Kbps)",
+                        plot_rd_curve(br, apsnr, "overall_apsnr(dB)", tag, "bitrate(kbps)",
                                       formats[tag][0], formats[tag][1], formats[tag][2])
-                    plt.legend()
+                    plt.legend(loc='lower right')
                     plt.grid(True)
                     export_pdf.savefig()
                     plt.close()
@@ -238,13 +201,102 @@
                         record = records[tag][cfg][video]
                         br    = [record[key].bitrate for key in record.keys()]
                         apsnr = [record[key].overall_apsnr for key in record.keys()]
-                        plot_rd_curve(br, apsnr, "overall_apsnr", tag, "bitrate(Kbps)",
+                        plot_rd_curve(br, apsnr, "overall_apsnr(dB)", tag, "bitrate(kbps)",
                                       formats[tag][0], formats[tag][1], formats[tag][2])
-                    plt.legend()
+                    plt.legend(loc='lower right')
                     plt.grid(True)
                     export_pdf.savefig()
                     plt.close()
 
+
+def DrawCombinedRDCurve(records):
+    pdf = "combined_rdcurve.pdf"
+    with PdfPages(pdf) as export_pdf:
+        for tag in csv_files.keys():
+            for cfg in csv_files[tag].keys():
+                videos = records[tag][cfg].keys()
+                plt.figure(figsize=(30, 30))
+                plt.suptitle("%s : %s" % (tag, cfg))
+
+                for video in videos:
+                    short_name = video.split('_')[0]
+                    if cfg == "AS":
+                        Int_RDPoints = []
+                        record = records[tag][cfg][video]
+                        br = {};
+                        apsnr = {}
+                        for key in record.keys():
+                            res = re.split('_', key)[0]
+                            if res not in br.keys():
+                                br[res] = []
+                                apsnr[res] = []
+                            br[res].append(record[key].bitrate)
+                            apsnr[res].append(record[key].overall_apsnr)
+
+                        for res in br.keys():
+                            rdpnts = [(brt, qty) for brt, qty in zip(br[res], apsnr[res])]
+                            if UsePCHIPInterpolation:
+                                int_rdpnts = Interpolate_PCHIP(rdpnts, QPs['AS'][:], InterpolatePieces, True)
+                            else:
+                                int_rdpnts = Interpolate_Bilinear(rdpnts, QPs['AS'][:], InterpolatePieces, True)
+                            Int_RDPoints += int_rdpnts
+
+                        # draw convex hull
+                        lower, upper = convex_hull(Int_RDPoints)
+                        br = [h[0] for h in upper]
+                        apsnr = [h[1] for h in upper]
+                        plot_rd_curve(br, apsnr, "overall_apsnr(dB)", short_name, "bitrate(kbps)",
+                                      next(colors), '-', next(markers))
+                    else:
+                        record = records[tag][cfg][video]
+                        br = [record[key].bitrate for key in record.keys()]
+                        apsnr = [record[key].overall_apsnr for key in record.keys()]
+                        plot_rd_curve(br, apsnr, "overall_apsnr(dB)", short_name, "bitrate(kbps)",
+                                      next(colors), '-', next(markers))
+
+                plt.legend(loc='lower right')
+                plt.grid(True)
+                export_pdf.savefig()
+                plt.close()
+
+def DrawCombinedRuntime(records):
+    pdf = "combined_runtime.pdf"
+    with PdfPages(pdf) as export_pdf:
+        for tag in csv_files.keys():
+            for cfg in csv_files[tag].keys():
+                videos = records[tag][cfg].keys()
+                plt.figure(figsize=(30, 30))
+                plt.suptitle("%s : %s" % (tag, cfg))
+
+                for video in videos:
+                    short_name = video.split('_')[0]
+                    if cfg == "AS":
+                        record = records[tag][cfg][video]
+                        br = {};
+                        enc_time = {}
+                        for key in record.keys():
+                            res = re.split('_', key)[0]
+                            if res not in br.keys():
+                                br[res] = []
+                                enc_time[res] = []
+                            br[res].append(record[key].bitrate)
+                            enc_time[res].append(record[key].enc_time)
+
+                        for res in br.keys():
+                            plot_rd_curve(br[res], enc_time[res], "enc_time(s)", short_name+'_'+res, "bitrate(kbps)",
+                                          next(colors), '-', next(markers))
+                    else:
+                        record = records[tag][cfg][video]
+                        br = [record[key].bitrate for key in record.keys()]
+                        enc_time = [record[key].enc_time for key in record.keys()]
+                        plot_rd_curve(br, enc_time, "enc_time(s)", short_name, "bitrate(kbps)",
+                                      next(colors), '-', next(markers))
+
+                plt.legend(loc='lower right')
+                plt.grid(True)
+                export_pdf.savefig()
+                plt.close()
+
 def GetQty(record, qty):
     qtys = []
     for key in record.keys():
@@ -399,11 +451,13 @@
             records[tag][test_cfg] = ParseCSVFile(csv_files[tag][test_cfg])
 
     FillXlsFile()
-    DrawRDCurve(records, anchor, rd_curve_pdf)
+    DrawCombinedRDCurve(records)
+    DrawCombinedRuntime(records)
+    DrawIndividualRDCurve(records, anchor, rd_curve_pdf)
 
     #Calculate BDRate and collect total time
     for test_cfg in csv_files[anchor].keys():
         (bdrate, seq_time, seq_instr) = CalcFullBDRate(test_cfg)
         #Write output summary xls file
-        filename = "Summary-HEVC-AV1-%s"%test_cfg
+        filename = "Summary-AV1-vs-AV2_v4.0_%s"%test_cfg
         WriteSummaryXlsFile(bdrate, seq_time, seq_instr, filename)
diff --git a/tools/convexhull_framework/src/AV2CTCTest.py b/tools/convexhull_framework/src/AV2CTCTest.py
index f8fcc50..42ad0f7 100644
--- a/tools/convexhull_framework/src/AV2CTCTest.py
+++ b/tools/convexhull_framework/src/AV2CTCTest.py
@@ -119,13 +119,10 @@
 
     csv_file, perframe_csvfile = GetRDResultCsvFile(EncodeMethod, CodecName, EncodePreset, test_cfg)
     csv = open(csv_file, 'wt')
-    # "TestCfg,EncodeMethod,CodecName,EncodePreset,Class,OrigRes,Name,FPS,Bit Depth,CodedRes,QP,Bitrate(kbps)")
+    # "TestCfg,EncodeMethod,CodecName,EncodePreset,Class,OrigRes,Name,FPS,BitDepth,CodedRes,QP,Bitrate(kbps)")
     csv.write("TestCfg,EncodeMethod,CodecName,EncodePreset,Class,Name,OrigRes,FPS,"\
-              "Bit Depth,CodecRes,QP,")
-    if (test_cfg == "STILL"):
-        csv.write("FileSize(bytes)")
-    else:
-        csv.write("Bitrate(kbps)")
+              "BitDepth,CodedRes,QP,")
+    csv.write("Bitrate(kbps)")
     for qty in QualityList:
         csv.write(',' + qty)
     csv.write(",EncT[s],DecT[s]")
@@ -138,7 +135,7 @@
     perframe_csv = open(perframe_csvfile, 'wt')
 
     perframe_csv.write("TestCfg,EncodeMethod,CodecName,EncodePreset,Class,Name,Res,FPS," \
-                       "Bit Depth,QP,POC,FrameType,Level,qindex,FrameSize")
+                       "BitDepth,QP,POC,FrameType,Level,qindex,FrameSize")
     for qty in QualityList:
         if (qty != "Overall_PSNR" and qty != "Overall_APSNR" and not qty.startswith("APSNR")):
             perframe_csv.write(',' + qty)
diff --git a/tools/convexhull_framework/src/AV2CTCVideo.py b/tools/convexhull_framework/src/AV2CTCVideo.py
index 400dfdf..36cb305 100644
--- a/tools/convexhull_framework/src/AV2CTCVideo.py
+++ b/tools/convexhull_framework/src/AV2CTCVideo.py
@@ -255,7 +255,7 @@
                                                           "/A1_4k_720p/NocturneDance_1280x720p_10bit_60fps.y4m",
                                                           "/A1_4k_540p/NocturneDance_960x540p_10bit_60fps.y4m",
                                                           "/A1_4k_360p/NocturneDance_640x360p_10bit_60fps.y4m"],
-"PierSeaSide_3840x2160_2997fps_10bit_420"               :["/A1_4k_1440p/PierSeaSide_2560x1440_2997fps_10bit_420_v2.y4m",
+"PierSeaSide_3840x2160_2997fps_10bit_420_v2"            :["/A1_4k_1440p/PierSeaSide_2560x1440_2997fps_10bit_420_v2.y4m",
                                                           "/A1_4k_1080p/PierSeaSide_1920x1080_2997fps_10bit_420_v2.y4m",
                                                           "/A1_4k_720p/PierSeaSide_1280x720_2997fps_10bit_420_v2.y4m",
                                                           "/A1_4k_540p/PierSeaSide_960x540_2997fps_10bit_420_v2.y4m",
diff --git a/tools/convexhull_framework/src/Config.py b/tools/convexhull_framework/src/Config.py
index 47b6ba0..5482c2c 100644
--- a/tools/convexhull_framework/src/Config.py
+++ b/tools/convexhull_framework/src/Config.py
@@ -46,8 +46,8 @@
 APSNR_V_WEIGHT = 1.0
 
 if CTC_VERSION == '4.0':
-    CTC_RegularXLSTemplate = os.path.join(BinPath, 'AOM_CWG_Regular_CTCv4_v7.3.xlsm')
-    CTC_ASXLSTemplate = os.path.join(BinPath, 'AOM_CWG_AS_CTC_v9.8.xlsm')
+    CTC_RegularXLSTemplate = os.path.join(BinPath, 'AOM_CWG_Regular_CTCv4_v7.3.2.xlsm')
+    CTC_ASXLSTemplate = os.path.join(BinPath, 'AOM_CWG_AS_CTC_v9.7.1.xlsm')
 elif CTC_VERSION == '3.0':
     CTC_RegularXLSTemplate = os.path.join(BinPath, 'AOM_CWG_Regular_CTC_v7.2.xlsm')
     CTC_ASXLSTemplate = os.path.join(BinPath, 'AOM_CWG_AS_CTC_v9.7.xlsm')
diff --git a/tools/convexhull_framework/src/ConvexHullTest.py b/tools/convexhull_framework/src/ConvexHullTest.py
index 72bcf94..abbc931 100755
--- a/tools/convexhull_framework/src/ConvexHullTest.py
+++ b/tools/convexhull_framework/src/ConvexHullTest.py
@@ -284,7 +284,7 @@
                 quality, perframe_vmaf_log = GatherQualityMetrics(reconyuv, Path_QualityLog)
                 qualities.append(quality)
 
-                #"TestCfg,EncodeMethod,CodecName,EncodePreset,Class,OrigRes,Name,FPS,Bit Depth,CodedRes,QP,Bitrate(kbps)")
+                #"TestCfg,EncodeMethod,CodecName,EncodePreset,Class,OrigRes,Name,FPS,BitDepth,CodedRes,QP,Bitrate(kbps)")
                 csv.write("%s,%s,%s,%s,%s,%s,%s,%.4f,%d,%s,%d,%f"%
                           ("AS", EncodeMethod, CodecName, EncodePreset, clip.file_class,contentname,
                            str(clip.width)+"x"+str(clip.height), clip.fps,clip.bit_depth,
@@ -460,7 +460,7 @@
         csv_file, perframe_csvfile = GetRDResultCsvFile(EncodeMethod, CodecName, EncodePreset, "AS")
         csv = open(csv_file, "wt")
         csv.write("TestCfg,EncodeMethod,CodecName,EncodePreset,Class,Name,OrigRes,FPS," \
-                  "Bit Depth,CodedRes,QP,Bitrate(kbps)")
+                  "BitDepth,CodedRes,QP,Bitrate(kbps)")
         for qty in QualityList:
             csv.write(',' + qty)
         csv.write(",EncT[s],DecT[s]")
@@ -473,7 +473,7 @@
 
         perframe_csv = open(perframe_csvfile, 'wt')
         perframe_csv.write("TestCfg,EncodeMethod,CodecName,EncodePreset,Class,Name,Res,FPS," \
-                           "Bit Depth,QP,POC,FrameType,Level,qindex,FrameSize")
+                           "BitDepth,QP,POC,FrameType,Level,qindex,FrameSize")
         for qty in QualityList:
             if not qty.startswith("APSNR"):
                 perframe_csv.write(',' + qty)
diff --git a/tools/convexhull_framework/src/Utils.py b/tools/convexhull_framework/src/Utils.py
index 7d19e34..418ff0f 100755
--- a/tools/convexhull_framework/src/Utils.py
+++ b/tools/convexhull_framework/src/Utils.py
@@ -13,6 +13,7 @@
 import os
 import re
 import sys
+from csv import DictReader
 import subprocess
 import time
 import logging
@@ -81,6 +82,7 @@
     apsnr_u = 0.0
     apsnr_v = 0.0
     overall_apsnr = 0.0
+    cambi = 0.0
     enc_time = 0.0
     dec_time = 0.0
     enc_instr = 0.0
@@ -91,7 +93,7 @@
     def __init__(self, test_cfg, encode_mode , codec_name, encode_preset, file_class, file_name,
                  orig_res, fps, bit_depth, coded_res, qp, bitrate, psnr_y, psnr_u, psnr_v,
                  ssim_y, ms_ssim_y, vmaf_y, vmaf_y_neg, psnr_hvs, ciede2k, apsnr_y, apsnr_u,
-                 apsnr_v, enc_time, dec_time, enc_instr, dec_instr, enc_cycle, dec_cycle):
+                 apsnr_v, cambi, enc_time, dec_time, enc_instr, dec_instr, enc_cycle, dec_cycle):
 
         self.test_cfg = test_cfg
         self.encode_mode = encode_mode
@@ -124,6 +126,7 @@
                                                    APSNR_U_WEIGHT/pow(10, (self.apsnr_u / 10)) +
                                                    APSNR_V_WEIGHT/pow(10, (self.apsnr_v / 10))) /
                                               (APSNR_Y_WEIGHT + APSNR_U_WEIGHT + APSNR_V_WEIGHT)))
+        self.cambi = float(cambi)
         self.enc_time = float(enc_time)
         self.dec_time = float(dec_time)
         self.enc_instr = float(enc_instr)
@@ -133,20 +136,22 @@
 
 def ParseCSVFile(csv_file):
     records = {}
-    csv = open(csv_file, 'rt')
-    for line in csv:
-        if not line.startswith('TestCfg'):
-            words = re.split(',', line.strip())
-            record = Record(words[0], words[1], words[2], words[3], words[4], words[5], words[6], words[7], words[8],
-                            words[9], words[10], words[11], words[12], words[13], words[14], words[15], words[16],
-                            words[17],words[18], words[19], words[20], words[21], words[22], words[23], words[24],
-                            words[25], words[26],words[27], words[28],words[29])
-            key = record.coded_res + "_" + record.qp
-            if record.file_name not in records.keys():
-                records[record.file_name] = {}
-            records[record.file_name][key] = record
+    with open(csv_file, 'r') as f:
+        list_of_data = list(DictReader(f))
+        for data in list_of_data:
+            key = data['CodedRes'] + "_" + data['QP']
+            name = data['Name']
+            record = Record(data['TestCfg'], data['EncodeMethod'], data['CodecName'], data['EncodePreset'],
+                            data['Class'], data['Name'], data['OrigRes'], data['FPS'], data['BitDepth'],
+                            data['CodedRes'], data['QP'], data['Bitrate(kbps)'], data['PSNR_Y'], data['PSNR_U'],
+                            data['PSNR_V'], data['SSIM_Y(dB)'], data['MS-SSIM_Y(dB)'], data['VMAF_Y'],
+                            data['VMAF_Y-NEG'], data['PSNR-HVS'], data['CIEDE2000'], data['APSNR_Y'],
+                            data['APSNR_U'], data['APSNR_V'], data['CAMBI'], data['EncT[s]'], data['DecT[s]'],
+                            data['EncInstr'], data['DecInstr'], data['EncCycles'], data['DecCycles'])
 
-    csv.close()
+            if name not in records.keys():
+                records[name] = {}
+            records[name][key] = record
     return records
 
 def Cleanfolder(folder):