Merge v5 anchor into research-block256 Performance relative to v5 anchor: | CONFIG | b2 | PSNR_YUV | Enc Time | Dec Time | | :----: | :--: | :------: | :------: | :------: | | AI | w/o | +0.04% | 102% | 109% | | | only | +0.02% | 104% | 109% | | | | | | | | RA | w/o | -0.47% | 116% | 105% | | | only | -0.16% | 132% | 104% | | | | | | | | LD | w/o | -0.41% | 118% | 107% | | | only | -0.47% | 129% | 103% |
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4ea7422..7b4fbe2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml
@@ -39,13 +39,23 @@ # Run clang-format check. for f in $(git diff --diff-filter=ACMR --name-only $DIFF_REF '*.[hc]pp' '*.cc' '*.[ch]' \ | grep -v third_party); do - clang-format -i --style=file $f -n -Werror + clang-format -i --style=file $f -n -Werror || exit_code=$? + if [ ${exit_code} -ne 0 ]; then + echo "Please format your code by following instructions here:" + echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#style-check" + exit 1 + fi done - | # Run cmake-format check. for f in $(git diff --diff-filter=ACMR --name-only $DIFF_REF '*.cmake' 'CMakeLists.txt' \ | grep -v third_party); do - cmake-format --check $f + cmake-format --check $f || exit_code=$? + if [ ${exit_code} -ne 0 ]; then + echo "Please format your code by following instructions here:" + echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#style-check" + exit 1 + fi done rules: - if: '$CI_PIPELINE_SOURCE == "schedule"' @@ -237,7 +247,16 @@ echo "Extra CMake Flags: $EXTRA_CMAKE_FLAGS" echo "Configuration: $AOM_BUILD_CONFIG" - cmake -B aom_build -S . -GNinja -DCMAKE_BUILD_TYPE=Release $CMAKE_FLAGS $EXTRA_CMAKE_FLAGS - - cmake --build aom_build -j 2 + - cmake --build aom_build -j 2 || exit_code=$? + - | + if [ ${exit_code} -ne 0 ]; then + echo "You may reproduce the compile failure by following instructions here:" + echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#build-avm-in-various-configurations" + echo "Using following values:" + echo "CMAKE_FLAGS = ${CMAKE_FLAGS}" + echo "EXTRA_CMAKE_FLAGS = ${EXTRA_CMAKE_FLAGS}" + exit 1 + fi - cmake --build aom_build --target dist - DESTDIR="${CI_PROJECT_DIR}/${INSTALLROOT_FOLDER}" cmake --build aom_build --target install/strip needs: [] @@ -549,14 +568,20 @@ - | # Looking for sanitizer output in log... grep -q "\(ERROR\|WARNING\): \(Address\|Thread\|Memory\|Leak\)Sanitizer:" sanitizer.log && { - echo "Found sanitizer errors or warnings, check the log:" + echo "Found sanitizer errors or warnings, check the log below:" cat sanitizer.log + echo "You may reproduce sanitizer builds and tests by following instructions below: " + echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#build-unit-tests-with-sanitizers and" + echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#run-unit-tests-with-sanitizers" exit 1 } # Looking for UBSan output in log (differs from the common format) grep -q ":[[:digit:]]\+:[[:digit:]]\+: runtime error:" sanitizer.log && { - echo "Found sanitizer errors or warnings, check the log:" + echo "Found sanitizer errors or warnings, check the log below:" cat sanitizer.log + echo "You may reproduce sanitizer builds and tests by following instructions below: " + echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#build-unit-tests-with-sanitizers and" + echo "https://gitlab.com/AOMediaCodec/avm/-/wikis/Reproducing-CI-Test-Failures-Locally#run-unit-tests-with-sanitizers" exit 1 } echo "No sanitizer errors found" @@ -621,7 +646,7 @@ interruptible: true variables: AOMENC_LIMIT: 30 - AOMENC_QP: 128 + AOMENC_QP: 210 AOMENC_INPUT: Vertical_Bayshore_270x480_2997.y4m AOMENC: installroot/usr/local/bin/aomenc before_script: @@ -757,6 +782,67 @@ needs: - 'Previous Build (x86_64-linux-gcc): [encode-only]' +# Decode encoded streams and verify that number of frames is as expected. +.dec-run-common: + stage: test + interruptible: true + variables: + AOMENC_LIMIT: 30 # Should match the same variable in `.enc-run-common` + AOMDEC: installroot/usr/local/bin/aomdec + script: + - ${AOMDEC} ${AOMENC_OUTPUT}.obu -o ${AOMDEC_OUTPUT}.decoded.y4m --summary + 2>&1 | tee "${AOMDEC_OUTPUT}.summary.log" + - '[ -f "${AOMDEC_OUTPUT}.decoded.y4m" ] || exit 1' + - '[ -f "${AOMDEC_OUTPUT}.summary.log" ] || exit 1' + - | + for str in 'decoded frames' 'showed frames'; do + frame_count=$(grep -E -o "[0-9]+ ${str}" "${AOMDEC_OUTPUT}.summary.log" | sed -E "s/([0-9]+) ${str}/\1/g") + echo "${str} = ${frame_count}" + if [[ ${frame_count} -ne ${AOMENC_LIMIT} ]]; then + echo "ERROR: Unexpected number of ${str}. Got ${frame_count}, expected ${AOMENC_LIMIT}" + exit 1 + fi + done + artifacts: + when: always + paths: + - ${AOMDEC_OUTPUT}.* + rules: + - if: '$CI_PIPELINE_SOURCE == "merge_request_event"' + - if: '$CI_PIPELINE_SOURCE == "schedule"' + when: never + + +Dec Run (All-intra): + extends: .dec-run-common + variables: + AOMENC_OUTPUT: all-intra + AOMDEC_OUTPUT: dec-all-intra + needs: + - 'Enc Run (All-intra)' + - 'Build (x86_64-linux-gcc): [decode-only]' + + +Dec Run (Random Access): + extends: .dec-run-common + variables: + AOMENC_OUTPUT: random-access + AOMDEC_OUTPUT: dec-random-access + needs: + - 'Enc Run (Random Access)' + - 'Build (x86_64-linux-gcc): [decode-only]' + + +Dec Run (Low-delay): + extends: .dec-run-common + variables: + AOMENC_OUTPUT: low-delay + AOMDEC_OUTPUT: dec-low-delay + needs: + - 'Enc Run (Low-delay)' + - 'Build (x86_64-linux-gcc): [decode-only]' + + Enc compare: stage: report interruptible: true
diff --git a/.gitlab/UBSan.supp b/.gitlab/UBSan.supp index b0000e8..357d7d1 100644 --- a/.gitlab/UBSan.supp +++ b/.gitlab/UBSan.supp
@@ -38,8 +38,6 @@ implicit-signed-integer-truncation:av1_fdct8x64_new_sse2 # nullptr-with-offset warnings. -pointer-overflow:file_read -pointer-overflow:av1_pack_bitstream pointer-overflow:vfilter8 pointer-overflow:highbd_vfilter8
diff --git a/.gitlab/ci_nightly.yml b/.gitlab/ci_nightly.yml index d5adad8..fd0544d 100644 --- a/.gitlab/ci_nightly.yml +++ b/.gitlab/ci_nightly.yml
@@ -261,6 +261,7 @@ Linux Sanitizer (thread) Test Nightly: extends: .sanitizer-common-nigtly + parallel: 16 variables: AOM_SANITIZER_TYPE: thread needs:
diff --git a/aom/aom_encoder.h b/aom/aom_encoder.h index 63aa16c..b544fa3 100644 --- a/aom/aom_encoder.h +++ b/aom/aom_encoder.h
@@ -122,8 +122,12 @@ int partition_id; /*!\brief size of the visible frame in this packet */ size_t vis_frame_size; - } frame; /**< data for compressed frame packet */ - aom_fixed_buf_t twopass_stats; /**< data for two-pass packet */ +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + /*!\brief the number of frames in this packet */ + int frame_count; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + } frame; /**< data for compressed frame packet */ + aom_fixed_buf_t twopass_stats; /**< data for two-pass packet */ aom_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */ struct aom_psnr_pkt { unsigned int samples[4]; /**< Number of samples, total/y/u/v */ @@ -324,6 +328,18 @@ */ unsigned int enable_bawp; #endif // CONFIG_BAWP +#if CONFIG_CWP + /*!\brief enable compound weighted prediction + * + */ + unsigned int enable_cwp; +#endif // CONFIG_BAWP +#if CONFIG_D071_IMP_MSK_BLD + /*!\brief enable implicit maksed blending + * + */ + unsigned int enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD /*!\brief enable Forward skip coding * */ @@ -334,6 +350,12 @@ */ unsigned int enable_orip; #endif // CONFIG_ORIP +#if CONFIG_IDIF + /*!\brief enable Intra Directional Interpolation Filter + * + */ + unsigned int enable_idif; +#endif // CONFIG_IDIF /*!\brief enable Intra secondary transform * */ @@ -372,6 +394,13 @@ */ unsigned int enable_joint_mvd; #endif +#if CONFIG_REFINEMV + /*!\brief enable refine MV mode + * + */ + unsigned int enable_refinemv; +#endif // CONFIG_REFINEMV + /*!\brief enable flip and identity transform type * */ @@ -539,6 +568,12 @@ * */ unsigned int explicit_ref_frame_map; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + /*!\brief enable frame output order derivation based on order hint + * + */ + unsigned int enable_frame_output_order; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT /*!\brief use reduced transform type set * */
diff --git a/aom/aomcx.h b/aom/aomcx.h index 3d1150f..d8ed487 100644 --- a/aom/aomcx.h +++ b/aom/aomcx.h
@@ -1261,6 +1261,11 @@ /*!\brief Control to get frame info */ AV1E_GET_FRAME_INFO = 165, +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + /*!\brief Control to set frame output order derivation method + */ + AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION = 166, +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT }; /*!\brief aom 1-D scaling mode @@ -1751,6 +1756,11 @@ AOM_CTRL_USE_TYPE(AV1E_SET_SUBGOP_CONFIG_PATH, const char *) #define AOM_CTRL_AV1E_SET_SUBGOP_CONFIG_PATH +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT +AOM_CTRL_USE_TYPE(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, int) +#define AOM_CTRL_AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + /*!\endcond */ /*! @} - end defgroup aom_encoder */ #ifdef __cplusplus
diff --git a/aom/aomdx.h b/aom/aomdx.h index d8f3579..3755132 100644 --- a/aom/aomdx.h +++ b/aom/aomdx.h
@@ -63,8 +63,10 @@ * context. */ typedef struct aom_inspect_init { - /*! Inspection callback. */ + /*! Inspection callback (per frame). */ aom_inspect_cb inspect_cb; + /*! Inspection callback (per superblock). */ + aom_inspect_cb inspect_sb_cb; /*! Inspection context. */ void *inspect_ctx;
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index e340390..89b6b7b 100755 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -263,6 +263,22 @@ specialize "aom_highbd_sad8x8", qw/sse2/; specialize qw/aom_highbd_sad8x8 sse2/; +add_proto qw/unsigned int/, "aom_highbd_sad8x16", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride"; +specialize "aom_highbd_sad8x16", qw/sse2/; +specialize qw/aom_highbd_sad8x16 sse2/; + +add_proto qw/unsigned int/, "aom_highbd_sad16x8", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride"; +if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") { +specialize "aom_highbd_sad16x8", qw/sse2/; +specialize qw/aom_highbd_sad16x8 sse2/; +} + +add_proto qw/unsigned int/, "aom_highbd_sad16x16", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride"; +if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") { +specialize "aom_highbd_sad16x16", qw/sse2/; +specialize qw/aom_highbd_sad16x16 sse2/; +} + if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { add_proto qw/void/, "aom_get_blk_sse_sum", "const int16_t *data, int stride, int bw, int bh, int *x_sum, int64_t *x2_sum"; specialize qw/aom_get_blk_sse_sum sse2 avx2/; @@ -296,8 +312,15 @@ add_proto qw/unsigned int/, "aom_highbd_sad_skip_${w}x${h}", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride"; add_proto qw/unsigned int/, "aom_highbd_sad${w}x${h}_avg", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride, const uint16_t *second_pred"; if ($w != 128 && $h != 128 && $w != 4 && $w != 256 && $h != 256) { - specialize "aom_highbd_sad${w}x${h}", qw/sse2/; - specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + if (!($w == 16 && $h == 16) && !($w == 16 && $h == 8) && !($w == 16 && $h == 4)) { + specialize "aom_highbd_sad${w}x${h}", qw/sse2/; + specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/; + } + } else { + specialize "aom_highbd_sad${w}x${h}", qw/sse2/; + specialize "aom_highbd_sad${w}x${h}_avg", qw/sse2/; + } # CONFIG_UNEVEN_4WAY } add_proto qw/unsigned int/, "aom_highbd_dist_wtd_sad${w}x${h}_avg", "const uint16_t *src_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride, const uint16_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param"; } @@ -313,8 +336,15 @@ specialize qw/aom_highbd_sad32x32 avx2 sse2/; specialize qw/aom_highbd_sad32x16 avx2 sse2/; specialize qw/aom_highbd_sad16x32 avx2 sse2/; - specialize qw/aom_highbd_sad16x16 avx2 sse2/; - specialize qw/aom_highbd_sad16x8 avx2 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_sad16x16 avx2/; + specialize qw/aom_highbd_sad16x8 avx2/; + specialize qw/aom_highbd_sad16x4 avx2/; + } else { + specialize qw/aom_highbd_sad16x16 avx2 sse2/; + specialize qw/aom_highbd_sad16x8 avx2 sse2/; + specialize qw/aom_highbd_sad16x4 avx2 sse2/; + } # CONFIG_UNEVEN_4WAY specialize qw/aom_highbd_sad8x16 sse2/; specialize qw/aom_highbd_sad8x8 sse2/; specialize qw/aom_highbd_sad8x4 sse2/; @@ -322,7 +352,6 @@ specialize qw/aom_highbd_sad4x4 sse2/; specialize qw/aom_highbd_sad4x16 sse2/; - specialize qw/aom_highbd_sad16x4 avx2 sse2/; specialize qw/aom_highbd_sad8x32 sse2/; specialize qw/aom_highbd_sad32x8 avx2 sse2/; specialize qw/aom_highbd_sad16x64 avx2 sse2/; @@ -340,8 +369,13 @@ specialize qw/aom_highbd_sad_skip_32x32 avx2 sse2/; specialize qw/aom_highbd_sad_skip_32x16 avx2 sse2/; specialize qw/aom_highbd_sad_skip_16x32 avx2 sse2/; - specialize qw/aom_highbd_sad_skip_16x16 avx2 sse2/; - specialize qw/aom_highbd_sad_skip_16x8 avx2 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_sad_skip_16x16 avx2/; + specialize qw/aom_highbd_sad_skip_16x8 avx2/; + } else { + specialize qw/aom_highbd_sad_skip_16x16 avx2 sse2/; + specialize qw/aom_highbd_sad_skip_16x8 avx2 sse2/; + } specialize qw/aom_highbd_sad_skip_8x16 sse2/; specialize qw/aom_highbd_sad_skip_8x8 sse2/; specialize qw/aom_highbd_sad_skip_4x8 sse2/; @@ -364,8 +398,13 @@ specialize qw/aom_highbd_sad32x32_avg avx2 sse2/; specialize qw/aom_highbd_sad32x16_avg avx2 sse2/; specialize qw/aom_highbd_sad16x32_avg avx2 sse2/; - specialize qw/aom_highbd_sad16x16_avg avx2 sse2/; - specialize qw/aom_highbd_sad16x8_avg avx2 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_sad16x16_avg avx2/; + specialize qw/aom_highbd_sad16x8_avg avx2/; + } else { + specialize qw/aom_highbd_sad16x16_avg avx2 sse2/; + specialize qw/aom_highbd_sad16x8_avg avx2 sse2/; + } specialize qw/aom_highbd_sad8x4_avg sse2/; specialize qw/aom_highbd_sad4x8_avg sse2/; specialize qw/aom_highbd_sad4x4_avg sse2/; @@ -558,14 +597,22 @@ # TODO(rachelbarker): When ext-partition-types is enabled, we currently # don't have vectorized 4x16 highbd variance functions if ($w == 4 && $h == 4) { - specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1"; + specialize "aom_highbd_${bd}_variance${w}x${h}", "sse4_1"; specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", "sse4_1"; specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", "sse4_1"; } - if ($w != 128 && $h != 128 && $w != 4 && $w != 256 && $h != 256) { - specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/; - specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/; - } + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + if ($w != 128 && $h != 128 && $w != 4 && !($w == 16 && $h == 16) && + !($w == 16 && $h == 8) && !($w == 16 && $h == 4) && $w != 256 && $h != 256) { + specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/; + specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/; + } + } else { + if ($w != 128 && $h != 128 && $w != 4 && $w != 256 && $h != 256) { + specialize "aom_highbd_${bd}_sub_pixel_variance${w}x${h}", qw/sse2/; + specialize "aom_highbd_${bd}_sub_pixel_avg_variance${w}x${h}", qw/sse2/; + } + } # CONFIG_UNEVEN_4WAY add_proto qw/uint32_t/, "aom_highbd_${bd}_dist_wtd_sub_pixel_avg_variance${w}x${h}", "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred, const DIST_WTD_COMP_PARAMS* jcp_param"; } @@ -874,11 +921,19 @@ specialize qw/aom_highbd_12_sub_pixel_variance16x32 sse2/; add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; - # specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2 avx2/; - specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + # specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/; + } else { + # specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2 avx2/; + specialize qw/aom_highbd_12_sub_pixel_variance16x16 sse2/; + } add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2 avx2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_12_sub_pixel_variance16x8 avx2/; + } else { + specialize qw/aom_highbd_12_sub_pixel_variance16x8 sse2 avx2/; + } add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/aom_highbd_12_sub_pixel_variance8x16 sse2/; @@ -906,8 +961,11 @@ specialize qw/aom_highbd_12_sub_pixel_variance32x8 sse2/; add_proto qw/uint32_t aom_highbd_12_sub_pixel_variance16x4/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/aom_highbd_12_sub_pixel_variance16x4 sse2 avx2/; - + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_12_sub_pixel_variance16x4 avx2/; + } else { + specialize qw/aom_highbd_12_sub_pixel_variance16x4 sse2 avx2/; + } if (aom_config("CONFIG_BLOCK_256") eq "yes"){ specialize qw/aom_highbd_10_sub_pixel_variance256x256 avx2/; @@ -943,10 +1001,18 @@ specialize qw/aom_highbd_10_sub_pixel_variance16x32 sse2 avx2/; add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2 avx2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_10_sub_pixel_variance16x16 avx2/; + } else { + specialize qw/aom_highbd_10_sub_pixel_variance16x16 sse2 avx2/; + } add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2 avx2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_10_sub_pixel_variance16x8 avx2/; + } else { + specialize qw/aom_highbd_10_sub_pixel_variance16x8 sse2 avx2/; + } add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/aom_highbd_10_sub_pixel_variance8x16 sse2/; @@ -971,7 +1037,11 @@ specialize qw/aom_highbd_10_sub_pixel_variance32x8 sse2 avx2/; add_proto qw/uint32_t aom_highbd_10_sub_pixel_variance16x4/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/aom_highbd_10_sub_pixel_variance16x4 sse2 avx2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_10_sub_pixel_variance16x4 avx2/; + } else { + specialize qw/aom_highbd_10_sub_pixel_variance16x4 sse2 avx2/; + } if (aom_config("CONFIG_BLOCK_256") eq "yes"){ @@ -1008,10 +1078,18 @@ specialize qw/aom_highbd_8_sub_pixel_variance16x32 sse2 avx2/; add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2 avx2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_8_sub_pixel_variance16x16 avx2/; + } else { + specialize qw/aom_highbd_8_sub_pixel_variance16x16 sse2 avx2/; + } add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2 avx2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_8_sub_pixel_variance16x8 avx2/; + } else { + specialize qw/aom_highbd_8_sub_pixel_variance16x8 sse2 avx2/; + } add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; specialize qw/aom_highbd_8_sub_pixel_variance8x16 sse2/; @@ -1036,7 +1114,11 @@ specialize qw/aom_highbd_8_sub_pixel_variance32x8 sse2 avx2/; add_proto qw/uint32_t aom_highbd_8_sub_pixel_variance16x4/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse"; - specialize qw/aom_highbd_8_sub_pixel_variance16x4 sse2 avx2/; + if (aom_config("CONFIG_UNEVEN_4WAY") eq "yes") { + specialize qw/aom_highbd_8_sub_pixel_variance16x4 avx2/; + } else { + specialize qw/aom_highbd_8_sub_pixel_variance16x4 sse2 avx2/; + } # # Subpixel Avg Variance @@ -1061,10 +1143,14 @@ specialize qw/aom_highbd_12_sub_pixel_avg_variance16x32 sse2/; add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred"; - specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") { + specialize qw/aom_highbd_12_sub_pixel_avg_variance16x16 sse2/; + } add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred"; - specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") { + specialize qw/aom_highbd_12_sub_pixel_avg_variance16x8 sse2/; + } add_proto qw/uint32_t aom_highbd_12_sub_pixel_avg_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred"; specialize qw/aom_highbd_12_sub_pixel_avg_variance8x16 sse2/; @@ -1097,10 +1183,14 @@ specialize qw/aom_highbd_10_sub_pixel_avg_variance16x32 sse2/; add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred"; - specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") { + specialize qw/aom_highbd_10_sub_pixel_avg_variance16x16 sse2/; + } add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred"; - specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") { + specialize qw/aom_highbd_10_sub_pixel_avg_variance16x8 sse2/; + } add_proto qw/uint32_t aom_highbd_10_sub_pixel_avg_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred"; specialize qw/aom_highbd_10_sub_pixel_avg_variance8x16 sse2/; @@ -1133,10 +1223,14 @@ specialize qw/aom_highbd_8_sub_pixel_avg_variance16x32 sse2/; add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred"; - specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") { + specialize qw/aom_highbd_8_sub_pixel_avg_variance16x16 sse2/; + } add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance16x8/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred"; - specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/; + if (aom_config("CONFIG_UNEVEN_4WAY") ne "yes") { + specialize qw/aom_highbd_8_sub_pixel_avg_variance16x8 sse2/; + } add_proto qw/uint32_t aom_highbd_8_sub_pixel_avg_variance8x16/, "const uint16_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint16_t *ref_ptr, int ref_stride, uint32_t *sse, const uint16_t *second_pred"; specialize qw/aom_highbd_8_sub_pixel_avg_variance8x16 sse2/;
diff --git a/aom_dsp/binary_codes_reader.c b/aom_dsp/binary_codes_reader.c index b9172b0..486b623 100644 --- a/aom_dsp/binary_codes_reader.c +++ b/aom_dsp/binary_codes_reader.c
@@ -15,18 +15,18 @@ #include "av1/common/common.h" uint16_t aom_read_primitive_quniform_(aom_reader *r, - uint16_t n ACCT_STR_PARAM) { + uint16_t n ACCT_INFO_PARAM) { if (n <= 1) return 0; const int l = get_msb(n) + 1; const int m = (1 << l) - n; - const int v = aom_read_literal(r, l - 1, ACCT_STR_NAME); - return v < m ? v : (v << 1) - m + aom_read_bit(r, ACCT_STR_NAME); + const int v = aom_read_literal(r, l - 1, ACCT_INFO_NAME); + return v < m ? v : (v << 1) - m + aom_read_bit(r, ACCT_INFO_NAME); } // Decode finite subexponential code that for a symbol v in [0, n-1] with // parameter k uint16_t aom_read_primitive_subexpfin_(aom_reader *r, uint16_t n, - uint16_t k ACCT_STR_PARAM) { + uint16_t k ACCT_INFO_PARAM) { int i = 0; int mk = 0; @@ -35,11 +35,11 @@ int a = (1 << b); if (n <= mk + 3 * a) { - return aom_read_primitive_quniform(r, n - mk, ACCT_STR_NAME) + mk; + return aom_read_primitive_quniform(r, n - mk, ACCT_INFO_NAME) + mk; } - if (!aom_read_bit(r, ACCT_STR_NAME)) { - return aom_read_literal(r, b, ACCT_STR_NAME) + mk; + if (!aom_read_bit(r, ACCT_INFO_NAME)) { + return aom_read_literal(r, b, ACCT_INFO_NAME) + mk; } i = i + 1; @@ -51,7 +51,7 @@ } uint16_t aom_read_primitive_refsubexpfin_(aom_reader *r, uint16_t n, uint16_t k, - uint16_t ref ACCT_STR_PARAM) { + uint16_t ref ACCT_INFO_PARAM) { return inv_recenter_finite_nonneg( - n, ref, aom_read_primitive_subexpfin(r, n, k, ACCT_STR_NAME)); + n, ref, aom_read_primitive_subexpfin(r, n, k, ACCT_INFO_NAME)); }
diff --git a/aom_dsp/binary_codes_reader.h b/aom_dsp/binary_codes_reader.h index 4559ff2..4e35483 100644 --- a/aom_dsp/binary_codes_reader.h +++ b/aom_dsp/binary_codes_reader.h
@@ -25,18 +25,19 @@ #include "aom_dsp/bitreader.h" #include "aom_dsp/bitreader_buffer.h" -#define aom_read_primitive_quniform(r, n, ACCT_STR_NAME) \ - aom_read_primitive_quniform_(r, n ACCT_STR_ARG(ACCT_STR_NAME)) -#define aom_read_primitive_subexpfin(r, n, k, ACCT_STR_NAME) \ - aom_read_primitive_subexpfin_(r, n, k ACCT_STR_ARG(ACCT_STR_NAME)) -#define aom_read_primitive_refsubexpfin(r, n, k, ref, ACCT_STR_NAME) \ - aom_read_primitive_refsubexpfin_(r, n, k, ref ACCT_STR_ARG(ACCT_STR_NAME)) +#define aom_read_primitive_quniform(r, n, ACCT_INFO_NAME) \ + aom_read_primitive_quniform_(r, n ACCT_INFO_ARG(ACCT_INFO_NAME)) +#define aom_read_primitive_subexpfin(r, n, k, ACCT_INFO_NAME) \ + aom_read_primitive_subexpfin_(r, n, k ACCT_INFO_ARG(ACCT_INFO_NAME)) +#define aom_read_primitive_refsubexpfin(r, n, k, ref, ACCT_INFO_NAME) \ + aom_read_primitive_refsubexpfin_(r, n, k, ref ACCT_INFO_ARG(ACCT_INFO_NAME)) -uint16_t aom_read_primitive_quniform_(aom_reader *r, uint16_t n ACCT_STR_PARAM); +uint16_t aom_read_primitive_quniform_(aom_reader *r, + uint16_t n ACCT_INFO_PARAM); uint16_t aom_read_primitive_subexpfin_(aom_reader *r, uint16_t n, - uint16_t k ACCT_STR_PARAM); + uint16_t k ACCT_INFO_PARAM); uint16_t aom_read_primitive_refsubexpfin_(aom_reader *r, uint16_t n, uint16_t k, - uint16_t ref ACCT_STR_PARAM); + uint16_t ref ACCT_INFO_PARAM); #ifdef __cplusplus } // extern "C"
diff --git a/aom_dsp/bitreader.c b/aom_dsp/bitreader.c index 96fc999..b1c346e 100644 --- a/aom_dsp/bitreader.c +++ b/aom_dsp/bitreader.c
@@ -31,7 +31,7 @@ uint32_t aom_reader_tell(const aom_reader *r) { return od_ec_dec_tell(&r->ec); } -uint32_t aom_reader_tell_frac(const aom_reader *r) { +uint64_t aom_reader_tell_frac(const aom_reader *r) { return od_ec_dec_tell_frac(&r->ec); }
diff --git a/aom_dsp/bitreader.h b/aom_dsp/bitreader.h index ee999ba..1a03823 100644 --- a/aom_dsp/bitreader.h +++ b/aom_dsp/bitreader.h
@@ -33,41 +33,42 @@ #if CONFIG_ACCOUNTING #include "av1/decoder/accounting.h" -#define ACCT_STR_NAME acct_str -#define ACCT_STR_PARAM , const char *ACCT_STR_NAME -#define ACCT_STR_ARG(s) , s +#define ACCT_INFO_NAME acct_info +#define ACCT_INFO_PARAM , AccountingSymbolInfo acct_info +#define ACCT_INFO_ARG(s) , s #else -#define ACCT_STR_PARAM -#define ACCT_STR_ARG(s) +#define ACCT_INFO_PARAM +#define ACCT_INFO_ARG(s) #endif -#define aom_read(r, prob, ACCT_STR_NAME) \ - aom_read_(r, prob ACCT_STR_ARG(ACCT_STR_NAME)) +#define aom_read(r, prob, ACCT_INFO_NAME) \ + aom_read_(r, prob ACCT_INFO_ARG(ACCT_INFO_NAME)) #if CONFIG_BYPASS_IMPROVEMENT -#define aom_read_bypass(r, ACCT_STR_NAME) \ - aom_read_bypass_(r ACCT_STR_ARG(ACCT_STR_NAME)) +#define aom_read_bypass(r, ACCT_INFO_NAME) \ + aom_read_bypass_(r ACCT_INFO_ARG(ACCT_INFO_NAME)) #endif // CONFIG_BYPASS_IMPROVEMENT -#define aom_read_bit(r, ACCT_STR_NAME) \ - aom_read_bit_(r ACCT_STR_ARG(ACCT_STR_NAME)) -#define aom_read_tree(r, tree, probs, ACCT_STR_NAME) \ - aom_read_tree_(r, tree, probs ACCT_STR_ARG(ACCT_STR_NAME)) -#define aom_read_literal(r, bits, ACCT_STR_NAME) \ - aom_read_literal_(r, bits ACCT_STR_ARG(ACCT_STR_NAME)) -#define aom_read_cdf(r, cdf, nsymbs, ACCT_STR_NAME) \ - aom_read_cdf_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME)) -#define aom_read_symbol(r, cdf, nsymbs, ACCT_STR_NAME) \ - aom_read_symbol_(r, cdf, nsymbs ACCT_STR_ARG(ACCT_STR_NAME)) +#define aom_read_bit(r, ACCT_INFO_NAME) \ + aom_read_bit_(r ACCT_INFO_ARG(ACCT_INFO_NAME)) +#define aom_read_tree(r, tree, probs, ACCT_INFO_NAME) \ + aom_read_tree_(r, tree, probs ACCT_INFO_ARG(ACCT_INFO_NAME)) +#define aom_read_literal(r, bits, ACCT_INFO_NAME) \ + aom_read_literal_(r, bits ACCT_INFO_ARG(ACCT_INFO_NAME)) +#define aom_read_cdf(r, cdf, nsymbs, ACCT_INFO_NAME) \ + aom_read_cdf_(r, cdf, nsymbs ACCT_INFO_ARG(ACCT_INFO_NAME)) +#define aom_read_symbol(r, cdf, nsymbs, ACCT_INFO_NAME) \ + aom_read_symbol_(r, cdf, nsymbs ACCT_INFO_ARG(ACCT_INFO_NAME)) #if CONFIG_BYPASS_IMPROVEMENT -#define aom_read_unary(r, bits, ACCT_STR_NAME) \ - aom_read_unary_(r, bits ACCT_STR_ARG(ACCT_STR_NAME)) +#define aom_read_unary(r, bits, ACCT_INFO_NAME) \ + aom_read_unary_(r, bits ACCT_INFO_ARG(ACCT_INFO_NAME)) #endif // CONFIG_BYPASS_IMPROVEMENT #if ENABLE_LR_4PART_CODE -#define aom_read_4part(r, cdf, nsymb_bits, ACCT_STR_NAME) \ - aom_read_4part_(r, cdf, nsymb_bits ACCT_STR_ARG(ACCT_STR_NAME)) -#define aom_read_4part_wref(r, ref_symb, cdf, nsymb_bits, ACCT_STR_NAME) \ - aom_read_4part_wref_(r, ref_symb, cdf, nsymb_bits ACCT_STR_ARG(ACCT_STR_NAME)) +#define aom_read_4part(r, cdf, nsymb_bits, ACCT_INFO_NAME) \ + aom_read_4part_(r, cdf, nsymb_bits ACCT_INFO_ARG(ACCT_INFO_NAME)) +#define aom_read_4part_wref(r, ref_symb, cdf, nsymb_bits, ACCT_INFO_NAME) \ + aom_read_4part_wref_(r, ref_symb, cdf, \ + nsymb_bits ACCT_INFO_ARG(ACCT_INFO_NAME)) #endif // ENABLE_LR_4PART_CODE #ifdef __cplusplus @@ -99,15 +100,17 @@ // Returns the position in the bit reader in bits. uint32_t aom_reader_tell(const aom_reader *r); -// Returns the position in the bit reader in 1/8th bits. -uint32_t aom_reader_tell_frac(const aom_reader *r); +// Returns the position in the bit reader in 1/65536th bits. +uint64_t aom_reader_tell_frac(const aom_reader *r); #if CONFIG_ACCOUNTING -static INLINE void aom_process_accounting(const aom_reader *r ACCT_STR_PARAM) { +static INLINE void aom_process_accounting(const aom_reader *r, int value, + SYMBOL_CODING_MODE coding_mode + ACCT_INFO_PARAM) { if (r->accounting != NULL) { - uint32_t tell_frac; + uint64_t tell_frac; tell_frac = aom_reader_tell_frac(r); - aom_accounting_record(r->accounting, ACCT_STR_NAME, + aom_accounting_record(r->accounting, value, coding_mode, ACCT_INFO_NAME, tell_frac - r->accounting->last_tell_frac); r->accounting->last_tell_frac = tell_frac; } @@ -134,7 +137,7 @@ } #endif -static INLINE int aom_read_(aom_reader *r, int prob ACCT_STR_PARAM) { +static INLINE int aom_read_(aom_reader *r, int prob ACCT_INFO_PARAM) { int p = (0x7FFFFF - (prob << 15) + prob) >> 8; int bit = od_ec_decode_bool_q15(&r->ec, p); @@ -173,7 +176,8 @@ #endif // CONFIG_BITSTREAM_DEBUG #if CONFIG_ACCOUNTING - if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME); + if (ACCT_INFO_NAME.c_file) + aom_process_accounting(r, bit, SYMBOL_BIT, ACCT_INFO_NAME); #if CONFIG_THROUGHPUT_ANALYSIS aom_update_symb_counts(r, 1, 0, 1); #else @@ -223,13 +227,14 @@ #endif // CONFIG_BITSTREAM_DEBUG #if CONFIG_BYPASS_IMPROVEMENT -static INLINE int aom_read_bypass_(aom_reader *r ACCT_STR_PARAM) { +static INLINE int aom_read_bypass_(aom_reader *r ACCT_INFO_PARAM) { int ret = od_ec_decode_literal_bypass(&r->ec, 1); #if CONFIG_BITSTREAM_DEBUG bitstream_queue_pop_literal(ret, 1); #endif // CONFIG_BITSTREAM_DEBUG #if CONFIG_ACCOUNTING - if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME); + if (ACCT_INFO_NAME.c_file) + aom_process_accounting(r, ret, SYMBOL_BIT_BYPASS, ACCT_INFO_NAME); #if CONFIG_THROUGHPUT_ANALYSIS aom_update_symb_counts(r, 1, 0, 1); #else @@ -240,20 +245,21 @@ } #endif // CONFIG_BYPASS_IMPROVEMENT -static INLINE int aom_read_bit_(aom_reader *r ACCT_STR_PARAM) { +static INLINE int aom_read_bit_(aom_reader *r ACCT_INFO_PARAM) { int ret; #if CONFIG_BYPASS_IMPROVEMENT - ret = aom_read_bypass(r, NULL); + ret = aom_read_bypass(r, ACCT_INFO_NAME); #else - ret = aom_read(r, 128, NULL); // aom_prob_half + ret = aom_read(r, 128, ACCT_INFO_NAME); // aom_prob_half #endif // CONFIG_BYPASS_IMPROVEMENT #if CONFIG_ACCOUNTING - if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME); + if (ACCT_INFO_NAME.c_file) + aom_process_accounting(r, ret, SYMBOL_BIT_BYPASS, ACCT_INFO_NAME); #endif return ret; } -static INLINE int aom_read_literal_(aom_reader *r, int bits ACCT_STR_PARAM) { +static INLINE int aom_read_literal_(aom_reader *r, int bits ACCT_INFO_PARAM) { #if CONFIG_BYPASS_IMPROVEMENT int literal = 0; int n_bits = bits; @@ -268,7 +274,8 @@ bitstream_queue_pop_literal(literal, bits); #endif // CONFIG_BITSTREAM_DEBUG #if CONFIG_ACCOUNTING - if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME); + if (ACCT_INFO_NAME.c_file) + aom_process_accounting(r, literal, SYMBOL_LITERAL_BYPASS, ACCT_INFO_NAME); #if CONFIG_THROUGHPUT_ANALYSIS aom_update_symb_counts(r, 1, 0, bits); #else @@ -285,7 +292,8 @@ #if CONFIG_BYPASS_IMPROVEMENT // Deocode unary coded symbol with truncation at max_nbits. -static INLINE int aom_read_unary_(aom_reader *r, int max_nbits ACCT_STR_PARAM) { +static INLINE int aom_read_unary_(aom_reader *r, + int max_nbits ACCT_INFO_PARAM) { int ret = od_ec_decode_unary_bypass(&r->ec, max_nbits); #if CONFIG_BITSTREAM_DEBUG int nbits = ret < max_nbits ? ret + 1 : max_nbits; @@ -294,7 +302,8 @@ #endif // CONFIG_BITSTREAM_DEBUG #if CONFIG_ACCOUNTING int n_bits = ret < max_nbits ? ret + 1 : max_nbits; - if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME); + if (ACCT_INFO_NAME.c_file) + aom_process_accounting(r, ret, SYMBOL_UNARY, ACCT_INFO_NAME); #if CONFIG_THROUGHPUT_ANALYSIS aom_update_symb_counts(r, 1, 0, n_bits); #else @@ -306,7 +315,7 @@ #endif // CONFIG_BYPASS_IMPROVEMENT static INLINE int aom_read_cdf_(aom_reader *r, const aom_cdf_prob *cdf, - int nsymbs ACCT_STR_PARAM) { + int nsymbs ACCT_INFO_PARAM) { int symb; assert(cdf != NULL); symb = od_ec_decode_cdf_q15(&r->ec, cdf, nsymbs); @@ -351,7 +360,8 @@ #endif // CONFIG_BITSTREAM_DEBUG #if CONFIG_ACCOUNTING - if (ACCT_STR_NAME) aom_process_accounting(r, ACCT_STR_NAME); + if (ACCT_INFO_NAME.c_file) + aom_process_accounting(r, symb, SYMBOL_CDF, ACCT_INFO_NAME); #if CONFIG_THROUGHPUT_ANALYSIS aom_update_symb_counts(r, (nsymbs == 2), 1, 1); #else @@ -362,9 +372,9 @@ } static INLINE int aom_read_symbol_(aom_reader *r, aom_cdf_prob *cdf, - int nsymbs ACCT_STR_PARAM) { + int nsymbs ACCT_INFO_PARAM) { int ret; - ret = aom_read_cdf(r, cdf, nsymbs, ACCT_STR_NAME); + ret = aom_read_cdf(r, cdf, nsymbs, ACCT_INFO_NAME); if (r->allow_update_cdf) update_cdf(cdf, ret, nsymbs); return ret; } @@ -378,22 +388,22 @@ // (nsymb_bits - 3), (nsymb_bits - 3), (nsymb_bits - 2) or (nsymb_bits - 1) // bits, depending on the part. static INLINE int aom_read_4part_(aom_reader *r, aom_cdf_prob *cdf, - int nsymb_bits ACCT_STR_PARAM) { + int nsymb_bits ACCT_INFO_PARAM) { assert(nsymb_bits >= 3); int part_bits[4] = { (nsymb_bits - 3), (nsymb_bits - 3), (nsymb_bits - 2), (nsymb_bits - 1) }; int part_offs[4] = { 0, 1 << (nsymb_bits - 3), 1 << (nsymb_bits - 2), 1 << (nsymb_bits - 1) }; - const int part = aom_read_symbol(r, cdf, 4, ACCT_STR_NAME); - return aom_read_literal(r, part_bits[part], ACCT_STR_NAME) + part_offs[part]; + const int part = aom_read_symbol(r, cdf, 4, ACCT_INFO_NAME); + return aom_read_literal(r, part_bits[part], ACCT_INFO_NAME) + part_offs[part]; } // Implements a nsymb_bits bit 4-part code that codes a symbol symb given a // reference ref_symb after recentering symb around ref_symb. static INLINE int aom_read_4part_wref_(aom_reader *r, int ref_symb, aom_cdf_prob *cdf, - int nsymb_bits ACCT_STR_PARAM) { - const int symb = aom_read_4part(r, cdf, nsymb_bits, ACCT_STR_NAME); + int nsymb_bits ACCT_INFO_PARAM) { + const int symb = aom_read_4part(r, cdf, nsymb_bits, ACCT_INFO_NAME); return inv_recenter_finite_nonneg(1 << nsymb_bits, ref_symb, symb); } #endif // ENABLE_LR_4PART_CODE
diff --git a/aom_dsp/bitwriter_buffer.c b/aom_dsp/bitwriter_buffer.c index 7e41949..5f76f15 100644 --- a/aom_dsp/bitwriter_buffer.c +++ b/aom_dsp/bitwriter_buffer.c
@@ -92,6 +92,13 @@ void aom_wb_write_primitive_quniform(struct aom_write_bit_buffer *wb, uint16_t n, uint16_t v) { if (n <= 1) return; + assert(v < n); + // Split the valid range into two. + // The encoded value is in the range [0, n), but in order to map a range + // which may not be a power of 2 onto a binary code, we split into the + // sub-ranges [0, m) and [m, n), where m is an intermediate point. + // Values in the range [0, m) then use one fewer bit than values in + // the range [m, n). const int l = get_msb(n) + 1; const int m = (1 << l) - n; if (v < m) {
diff --git a/aom_dsp/entcode.c b/aom_dsp/entcode.c index c794edb..a49ae0d 100644 --- a/aom_dsp/entcode.c +++ b/aom_dsp/entcode.c
@@ -21,9 +21,9 @@ Return: The number of bits scaled by 2**OD_BITRES. This will always be slightly larger than the exact value (e.g., all rounding error is in the positive direction).*/ -uint32_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) { - uint32_t nbits; - int l; +uint64_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng) { + uint64_t nbits; + int64_t l; int i; /*To handle the non-integral number of bits still left in the encoder/decoder state, we compute the worst-case number of bits of val that must be @@ -37,7 +37,7 @@ probability of 1/(1 << n) might sometimes appear to use more than n bits. This may help explain the surprising result that a newly initialized encoder or decoder claims to have used 1 bit.*/ - nbits = nbits_total << OD_BITRES; + nbits = (uint64_t)nbits_total << OD_BITRES; l = 0; for (i = OD_BITRES; i-- > 0;) { int b;
diff --git a/aom_dsp/entcode.h b/aom_dsp/entcode.h index 0227c52..2e95945 100644 --- a/aom_dsp/entcode.h +++ b/aom_dsp/entcode.h
@@ -33,14 +33,14 @@ #define OD_EC_WINDOW_SIZE ((int)sizeof(od_ec_window) * CHAR_BIT) /*The resolution of fractional-precision bit usage measurements, i.e., - 3 => 1/8th bits.*/ -#define OD_BITRES (3) + 16 => 1/65536th bits.*/ +#define OD_BITRES (16) #define OD_ICDF AOM_ICDF /*See entcode.c for further documentation.*/ -OD_WARN_UNUSED_RESULT uint32_t od_ec_tell_frac(uint32_t nbits_total, +OD_WARN_UNUSED_RESULT uint64_t od_ec_tell_frac(uint32_t nbits_total, uint32_t rng); #endif // AOM_AOM_DSP_ENTCODE_H_
diff --git a/aom_dsp/entdec.c b/aom_dsp/entdec.c index 02188ec..8cfb826 100644 --- a/aom_dsp/entdec.c +++ b/aom_dsp/entdec.c
@@ -352,6 +352,6 @@ Return: The number of bits scaled by 2**OD_BITRES. This will always be slightly larger than the exact value (e.g., all rounding error is in the positive direction).*/ -uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec) { +uint64_t od_ec_dec_tell_frac(const od_ec_dec *dec) { return od_ec_tell_frac(od_ec_dec_tell(dec), dec->rng); }
diff --git a/aom_dsp/entdec.h b/aom_dsp/entdec.h index 7ca2f29..442ae2e 100644 --- a/aom_dsp/entdec.h +++ b/aom_dsp/entdec.h
@@ -82,7 +82,7 @@ OD_WARN_UNUSED_RESULT int od_ec_dec_tell(const od_ec_dec *dec) OD_ARG_NONNULL(1); -OD_WARN_UNUSED_RESULT uint32_t od_ec_dec_tell_frac(const od_ec_dec *dec) +OD_WARN_UNUSED_RESULT uint64_t od_ec_dec_tell_frac(const od_ec_dec *dec) OD_ARG_NONNULL(1); #ifdef __cplusplus
diff --git a/aom_dsp/entenc.c b/aom_dsp/entenc.c index 8517cee..5f19c0e 100644 --- a/aom_dsp/entenc.c +++ b/aom_dsp/entenc.c
@@ -467,7 +467,7 @@ Return: The number of bits scaled by 2**OD_BITRES. This will always be slightly larger than the exact value (e.g., all rounding error is in the positive direction).*/ -uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc) { +uint64_t od_ec_enc_tell_frac(const od_ec_enc *enc) { return od_ec_tell_frac(od_ec_enc_tell(enc), enc->rng); }
diff --git a/aom_dsp/entenc.h b/aom_dsp/entenc.h index fbb35a0..9831601 100644 --- a/aom_dsp/entenc.h +++ b/aom_dsp/entenc.h
@@ -78,7 +78,7 @@ OD_WARN_UNUSED_RESULT int od_ec_enc_tell(const od_ec_enc *enc) OD_ARG_NONNULL(1); -OD_WARN_UNUSED_RESULT uint32_t od_ec_enc_tell_frac(const od_ec_enc *enc) +OD_WARN_UNUSED_RESULT uint64_t od_ec_enc_tell_frac(const od_ec_enc *enc) OD_ARG_NONNULL(1); void od_ec_enc_checkpoint(od_ec_enc *dst, const od_ec_enc *src);
diff --git a/aom_dsp/intrapred.c b/aom_dsp/intrapred.c index 5d39cd0..225d5a4 100644 --- a/aom_dsp/intrapred.c +++ b/aom_dsp/intrapred.c
@@ -296,11 +296,51 @@ } } +#if CONFIG_BLEND_MODE +#define BLEND_WEIGHT_MAX 32 +static const uint8_t blk_size_log2[65] = { + 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6 +}; +#endif // CONFIG_BLEND_MODE + static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int bd) { (void)bd; +#if CONFIG_BLEND_MODE + const uint16_t bl = left[bh]; // estimated by bottom-left pixel + const uint16_t tr = above[bw]; // estimated by top-right pixel + + uint16_t *pred = dst; + const int scale = + ROUND_POWER_OF_TWO((blk_size_log2[bh] - 2 + blk_size_log2[bw] - 2), 2); + assert(scale >= 0 && scale <= BLEND_WEIGHT_MAX - 1); + for (int r = 0; r < bh; r++) { + const int s_top = + BLEND_WEIGHT_MAX >> + AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((r << 1) >> scale)); + const uint32_t l = left[r]; + for (int c = 0; c < bw; c++) { + const int s_left = + BLEND_WEIGHT_MAX >> + AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((c << 1) >> scale)); + const uint32_t top = above[c]; + uint32_t predv = (above[c] * (bh - 1 - r) + bl * (r + 1)) * bw; + uint32_t predh = (left[r] * (bw - 1 - c) + tr * (c + 1)) * bh; + predv = (s_top * top * bw * bh + (BLEND_WEIGHT_MAX * 2 - s_top) * predv); + assert(predv < UINT_MAX); + predh = (s_left * l * bw * bh + (BLEND_WEIGHT_MAX * 2 - s_left) * predh); + assert(predh < UINT_MAX); + + const int bits = 1 + 6 + blk_size_log2[bh] + blk_size_log2[bw]; + pred[c] = divide_round((predv + predh), bits); + } + pred += stride; + } +#else const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel const uint8_t *const sm_weights_w = sm_weight_arrays + bw; @@ -327,6 +367,7 @@ } dst += stride; } +#endif // CONFIG_BLEND_MODE } static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride, @@ -334,6 +375,29 @@ const uint16_t *above, const uint16_t *left, int bd) { (void)bd; +#if CONFIG_BLEND_MODE + const uint16_t bl = left[bh]; // estimated by bottom-left pixel + + uint16_t *pred = dst; + const int scale = + ROUND_POWER_OF_TWO((blk_size_log2[bh] - 2 + blk_size_log2[bw] - 2), 2); + assert(scale >= 0 && scale <= BLEND_WEIGHT_MAX - 1); + for (int r = 0; r < bh; ++r) { + const int s_top = + BLEND_WEIGHT_MAX >> + AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((r << 1) >> scale)); + for (int c = 0; c < bw; ++c) { + const uint32_t top = above[c]; + uint32_t predv = (above[c] * (bh - 1 - r) + bl * (r + 1)) * bw; + assert(predv < UINT_MAX); + const int bits = 6 + blk_size_log2[bh] + blk_size_log2[bw]; + pred[c] = divide_round( + (s_top * top * bw * bh + (BLEND_WEIGHT_MAX * 2 - s_top) * predv), + bits); + } + pred += stride; + } +#else const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel const uint8_t *const sm_weights = sm_weight_arrays + bh; // scale = 2^sm_weight_log2_scale @@ -358,6 +422,7 @@ } dst += stride; } +#endif // CONFIG_BLEND_MODE } static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride, @@ -365,6 +430,29 @@ const uint16_t *above, const uint16_t *left, int bd) { (void)bd; +#if CONFIG_BLEND_MODE + const uint16_t tr = above[bw]; // estimated by top-right pixel + + uint16_t *pred = dst; + const int scale = + ROUND_POWER_OF_TWO((blk_size_log2[bh] - 2 + blk_size_log2[bw] - 2), 2); + assert(scale >= 0 && scale <= BLEND_WEIGHT_MAX - 1); + for (int r = 0; r < bh; r++) { + const uint32_t l = left[r]; + for (int c = 0; c < bw; c++) { + const int s_left = + BLEND_WEIGHT_MAX >> + AOMMIN(blk_size_log2[BLEND_WEIGHT_MAX << 1], ((c << 1) >> scale)); + uint32_t predh = (left[r] * (bw - 1 - c) + tr * (c + 1)) * bh; + assert(predh < UINT_MAX); + const int bits = 6 + blk_size_log2[bh] + blk_size_log2[bw]; + pred[c] = divide_round( + (s_left * l * (bw * bh) + (BLEND_WEIGHT_MAX * 2 - s_left) * predh), + bits); + } + pred += stride; + } +#else const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel const uint8_t *const sm_weights = sm_weight_arrays + bw; // scale = 2^sm_weight_log2_scale @@ -389,6 +477,7 @@ } dst += stride; } +#endif // CONFIG_BLEND_MODE } static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
diff --git a/aom_dsp/psnr.c b/aom_dsp/psnr.c index d6241e5..3220358 100644 --- a/aom_dsp/psnr.c +++ b/aom_dsp/psnr.c
@@ -18,13 +18,15 @@ #include "aom_dsp/psnr.h" #include "aom_scale/yv12config.h" +#define MIN_SSE 0.5 + double aom_sse_to_psnr(double samples, double peak, double sse) { - if (sse > 0.0) { - const double psnr = 10.0 * log10(samples * peak * peak / sse); - return psnr > MAX_PSNR ? MAX_PSNR : psnr; - } else { - return MAX_PSNR; - } + const bool zero_sse = (sse < MIN_SSE); + if (zero_sse) sse = MIN_SSE; + assert(sse > 0.0); + double psnr = 10.0 * log10(samples * peak * peak / sse); + if (zero_sse) psnr = ceil(psnr); + return psnr; } static void encoder_highbd_variance64(const uint16_t *a, int a_stride,
diff --git a/aom_dsp/x86/highbd_intrapred_asm_sse2.asm b/aom_dsp/x86/highbd_intrapred_asm_sse2.asm index 8ea91d2..022c60e 100644 --- a/aom_dsp/x86/highbd_intrapred_asm_sse2.asm +++ b/aom_dsp/x86/highbd_intrapred_asm_sse2.asm
@@ -106,14 +106,14 @@ pshuflw m0, m0, 0x0 punpcklqdq m0, m0 .loop: - mova [dstq ], m0 - mova [dstq +16], m0 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2 +16], m0 - mova [dstq+strideq*4 ], m0 - mova [dstq+strideq*4 +16], m0 - mova [dstq+stride3q*2 ], m0 - mova [dstq+stride3q*2+16], m0 + movu [dstq ], m0 + movu [dstq +16], m0 + movu [dstq+strideq*2 ], m0 + movu [dstq+strideq*2 +16], m0 + movu [dstq+strideq*4 ], m0 + movu [dstq+strideq*4 +16], m0 + movu [dstq+stride3q*2 ], m0 + movu [dstq+stride3q*2+16], m0 lea dstq, [dstq+strideq*8] dec lines4d jnz .loop @@ -214,14 +214,14 @@ lea stride3q, [strideq*3] mov nlines4d, 4 .loop: - mova [dstq ], m0 - mova [dstq +16], m1 - mova [dstq+strideq*2 ], m0 - mova [dstq+strideq*2 +16], m1 - mova [dstq+strideq*4 ], m0 - mova [dstq+strideq*4 +16], m1 - mova [dstq+stride3q*2 ], m0 - mova [dstq+stride3q*2+16], m1 + movu [dstq ], m0 + movu [dstq +16], m1 + movu [dstq+strideq*2 ], m0 + movu [dstq+strideq*2 +16], m1 + movu [dstq+strideq*4 ], m0 + movu [dstq+strideq*4 +16], m1 + movu [dstq+stride3q*2 ], m0 + movu [dstq+stride3q*2+16], m1 lea dstq, [dstq+strideq*8] dec nlines4d jnz .loop
diff --git a/aom_dsp/x86/highbd_intrapred_sse2.c b/aom_dsp/x86/highbd_intrapred_sse2.c index 4cc07a9..da7d2f3 100644 --- a/aom_dsp/x86/highbd_intrapred_sse2.c +++ b/aom_dsp/x86/highbd_intrapred_sse2.c
@@ -107,16 +107,16 @@ static INLINE void h_store_16_unpacklo(uint16_t **dst, const ptrdiff_t stride, const __m128i *row) { const __m128i val = _mm_unpacklo_epi64(*row, *row); - _mm_store_si128((__m128i *)*dst, val); - _mm_store_si128((__m128i *)(*dst + 8), val); + _mm_storeu_si128((__m128i *)*dst, val); + _mm_storeu_si128((__m128i *)(*dst + 8), val); *dst += stride; } static INLINE void h_store_16_unpackhi(uint16_t **dst, const ptrdiff_t stride, const __m128i *row) { const __m128i val = _mm_unpackhi_epi64(*row, *row); - _mm_store_si128((__m128i *)(*dst), val); - _mm_store_si128((__m128i *)(*dst + 8), val); + _mm_storeu_si128((__m128i *)(*dst), val); + _mm_storeu_si128((__m128i *)(*dst + 8), val); *dst += stride; } @@ -483,8 +483,8 @@ const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo); int i; for (i = 0; i < height; ++i, dst += stride) { - _mm_store_si128((__m128i *)dst, dc_dup); - _mm_store_si128((__m128i *)(dst + 8), dc_dup); + _mm_storeu_si128((__m128i *)dst, dc_dup); + _mm_storeu_si128((__m128i *)(dst + 8), dc_dup); } } @@ -739,17 +739,17 @@ const __m128i above1_u16 = _mm_load_si128((const __m128i *)(above + 8)); int i; for (i = 0; i < 2; ++i) { - _mm_store_si128((__m128i *)dst, above0_u16); - _mm_store_si128((__m128i *)(dst + 8), above1_u16); + _mm_storeu_si128((__m128i *)dst, above0_u16); + _mm_storeu_si128((__m128i *)(dst + 8), above1_u16); dst += stride; - _mm_store_si128((__m128i *)dst, above0_u16); - _mm_store_si128((__m128i *)(dst + 8), above1_u16); + _mm_storeu_si128((__m128i *)dst, above0_u16); + _mm_storeu_si128((__m128i *)(dst + 8), above1_u16); dst += stride; - _mm_store_si128((__m128i *)dst, above0_u16); - _mm_store_si128((__m128i *)(dst + 8), above1_u16); + _mm_storeu_si128((__m128i *)dst, above0_u16); + _mm_storeu_si128((__m128i *)(dst + 8), above1_u16); dst += stride; - _mm_store_si128((__m128i *)dst, above0_u16); - _mm_store_si128((__m128i *)(dst + 8), above1_u16); + _mm_storeu_si128((__m128i *)dst, above0_u16); + _mm_storeu_si128((__m128i *)(dst + 8), above1_u16); dst += stride; } } @@ -901,17 +901,17 @@ const __m128i row = _mm_set1_epi16((uint16_t)sum32); int i; for (i = 0; i < 2; ++i) { - _mm_store_si128((__m128i *)dst, row); - _mm_store_si128((__m128i *)(dst + 8), row); + _mm_storeu_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)(dst + 8), row); dst += stride; - _mm_store_si128((__m128i *)dst, row); - _mm_store_si128((__m128i *)(dst + 8), row); + _mm_storeu_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)(dst + 8), row); dst += stride; - _mm_store_si128((__m128i *)dst, row); - _mm_store_si128((__m128i *)(dst + 8), row); + _mm_storeu_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)(dst + 8), row); dst += stride; - _mm_store_si128((__m128i *)dst, row); - _mm_store_si128((__m128i *)(dst + 8), row); + _mm_storeu_si128((__m128i *)dst, row); + _mm_storeu_si128((__m128i *)(dst + 8), row); dst += stride; } }
diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c index 1e29924..d288ef7 100644 --- a/aom_dsp/x86/highbd_variance_sse2.c +++ b/aom_dsp/x86/highbd_variance_sse2.c
@@ -251,6 +251,7 @@ const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ const uint16_t *dst, ptrdiff_t dst_stride, int height, \ unsigned int *sse, void *unused0, void *unused); + #define DECLS(opt) \ DECL(8, opt); \ DECL(16, opt) @@ -394,6 +395,26 @@ return (var >= 0) ? (uint32_t)var : 0; \ } +#if CONFIG_UNEVEN_4WAY +// TODO(any): Add back 16X16, 16X8, 16X4 after fixing alignment issues. +#define FNS(opt) \ + FN(128, 128, 16, 7, 7, opt, (int64_t)); \ + FN(128, 64, 16, 7, 6, opt, (int64_t)); \ + FN(64, 128, 16, 6, 7, opt, (int64_t)); \ + FN(64, 64, 16, 6, 6, opt, (int64_t)); \ + FN(64, 32, 16, 6, 5, opt, (int64_t)); \ + FN(32, 64, 16, 5, 6, opt, (int64_t)); \ + FN(32, 32, 16, 5, 5, opt, (int64_t)); \ + FN(32, 16, 16, 5, 4, opt, (int64_t)); \ + FN(16, 32, 16, 4, 5, opt, (int64_t)); \ + FN(8, 16, 8, 3, 4, opt, (int64_t)); \ + FN(8, 8, 8, 3, 3, opt, (int64_t)); \ + FN(8, 4, 8, 3, 2, opt, (int64_t)); \ + FN(8, 32, 8, 3, 5, opt, (int64_t)); \ + FN(32, 8, 16, 5, 3, opt, (int64_t)); \ + FN(16, 64, 16, 4, 6, opt, (int64_t)); \ + FN(64, 16, 16, 6, 4, opt, (int64_t)) +#else #define FNS(opt) \ FN(128, 128, 16, 7, 7, opt, (int64_t)); \ FN(128, 64, 16, 7, 6, opt, (int64_t)); \ @@ -414,6 +435,7 @@ FN(32, 8, 16, 5, 3, opt, (int64_t)); \ FN(16, 64, 16, 4, 6, opt, (int64_t)); \ FN(64, 16, 16, 6, 4, opt, (int64_t)) +#endif // CONFIG_UNEVEN_4WAY FNS(sse2); @@ -552,6 +574,23 @@ return (var >= 0) ? (uint32_t)var : 0; \ } +#if CONFIG_UNEVEN_4WAY +// TODO(any): Add back 16X16, 16X8, 16X4 after fixing alignment issues. +#define FNS(opt) \ + FN(64, 64, 16, 6, 6, opt, (int64_t)); \ + FN(64, 32, 16, 6, 5, opt, (int64_t)); \ + FN(32, 64, 16, 5, 6, opt, (int64_t)); \ + FN(32, 32, 16, 5, 5, opt, (int64_t)); \ + FN(32, 16, 16, 5, 4, opt, (int64_t)); \ + FN(16, 32, 16, 4, 5, opt, (int64_t)); \ + FN(8, 16, 8, 3, 4, opt, (int64_t)); \ + FN(8, 8, 8, 3, 3, opt, (int64_t)); \ + FN(8, 4, 8, 3, 2, opt, (int64_t)); \ + FN(8, 32, 8, 3, 5, opt, (int64_t)); \ + FN(32, 8, 16, 5, 3, opt, (int64_t)); \ + FN(16, 64, 16, 4, 6, opt, (int64_t)); \ + FN(64, 16, 16, 6, 4, opt, (int64_t)); +#else #define FNS(opt) \ FN(64, 64, 16, 6, 6, opt, (int64_t)); \ FN(64, 32, 16, 6, 5, opt, (int64_t)); \ @@ -569,6 +608,7 @@ FN(32, 8, 16, 5, 3, opt, (int64_t)); \ FN(16, 64, 16, 4, 6, opt, (int64_t)); \ FN(64, 16, 16, 6, 4, opt, (int64_t)); +#endif // CONFIG_UNEVEN_4WAY FNS(sse2);
diff --git a/aom_dsp/x86/intrapred_avx2.c b/aom_dsp/x86/intrapred_avx2.c index bbc2d51..aeb08e3 100644 --- a/aom_dsp/x86/intrapred_avx2.c +++ b/aom_dsp/x86/intrapred_avx2.c
@@ -15,6 +15,9 @@ #include "config/aom_dsp_rtcd.h" #include "aom_dsp/x86/intrapred_x86.h" #include "aom_dsp/x86/lpf_common_sse2.h" +#if CONFIG_IDIF +#include "av1/common/reconintra.h" +#endif // CONFIG_IDIF static DECLARE_ALIGNED(16, uint8_t, HighbdLoadMaskx[8][16]) = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, @@ -2692,3 +2695,2059 @@ } return; } + +#if CONFIG_IDIF + +static INLINE __m256i highbd_clamp_epi16_avx2(__m256i u, int bd) { + const __m256i zero = _mm256_setzero_si256(); + const int max_i = ((1 << bd) - 1) << POWER_DR_INTERP_FILTER; + const __m256i max = _mm256_set1_epi16(max_i); + __m256i t, clamped; + + t = _mm256_max_epi16(u, zero); + clamped = _mm256_min_epi16(t, max); + + return clamped; +} + +static INLINE __m256i highbd_clamp_epi32_avx2(__m256i u, int bd) { + const __m256i zero = _mm256_setzero_si256(); + const int max_i = ((1 << bd) - 1) << POWER_DR_INTERP_FILTER; + const __m256i max = _mm256_set1_epi32(max_i); + __m256i t, clamped; + + t = _mm256_max_epi32(u, zero); + clamped = _mm256_min_epi32(t, max); + + return clamped; +} + +static AOM_FORCE_INLINE void highbd_dr_prediction_z1_4xN_internal_idif_avx2( + int N, __m128i *dst, const uint16_t *above, int dx, int mrl_index, int bd) { + const int frac_bits = 6; + const int max_base_x = ((N + 4) - 1 + (mrl_index << 1)); + + assert(dx > 0); + __m256i a0, a1, a2, a3; + __m256i val0, val1; + __m128i a_mbase_x, max_base_x128, base_inc128, mask128; + __m256i f0, f1, f2, f3; + + __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm_set1_epi16(above[max_base_x]); + max_base_x128 = _mm_set1_epi16(max_base_x); + + int shift_i; + int x = dx * (1 + mrl_index); + for (int r = 0; r < N; r++) { + __m128i res1; + + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + dst[i] = a_mbase_x; // save 4 values + } + return; + } + + // load refs + a0 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base - 1))); + a1 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base))); + a2 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base + 1))); + a3 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base + 2))); + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]); + + // multiply and sum + val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0), + _mm256_mullo_epi16(a1, f1)); + val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2), + _mm256_mullo_epi16(a3, f3)); + val0 = _mm256_adds_epi16(val0, val1); + + val0 = highbd_clamp_epi16_avx2(val0, bd); + val0 = _mm256_adds_epi16(val0, rnding); + val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER); + + // discard values + res1 = _mm256_castsi256_si128(val0); + base_inc128 = _mm_setr_epi16(base, base + 1, base + 2, base + 3, base + 4, + base + 5, base + 6, base + 7); + mask128 = _mm_cmpgt_epi16(max_base_x128, base_inc128); + dst[r] = _mm_blendv_epi8(a_mbase_x, res1, mask128); + x += dx; + } +} + +static AOM_FORCE_INLINE void +highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(int N, __m128i *dst, + const uint16_t *above, + int dx, int mrl_index, + int bd) { + const int frac_bits = 6; + const int max_base_x = ((N + 4) - 1 + (mrl_index << 1)); + + assert(dx > 0); + __m256i a0, a1, a2, a3; + __m256i val0, val1; + __m128i a_mbase_x, max_base_x128, base_inc128, mask128; + __m256i f0, f1, f2, f3; + + __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm_set1_epi16(above[max_base_x]); + max_base_x128 = _mm_set1_epi32(max_base_x); + + int x = dx * (1 + mrl_index); + int shift_i; + for (int r = 0; r < N; r++) { + __m128i res1; + + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + dst[i] = a_mbase_x; // save 4 values + } + return; + } + + // load refs + a0 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base - 1))); + a1 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base))); + a2 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 1))); + a3 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 2))); + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]); + + // multiply and sum + val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0), + _mm256_mullo_epi32(a1, f1)); + val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2), + _mm256_mullo_epi32(a3, f3)); + val0 = _mm256_add_epi32(val0, val1); + + // round shift + val0 = highbd_clamp_epi32_avx2(val0, bd); + val0 = _mm256_add_epi32(val0, rnding); + val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER); + + // discard values + res1 = _mm256_castsi256_si128(val0); + res1 = _mm_packus_epi32(res1, res1); + + base_inc128 = _mm_setr_epi32(base, base + 1, base + 2, base + 3); + mask128 = _mm_cmpgt_epi32(max_base_x128, base_inc128); + mask128 = _mm_packs_epi32(mask128, mask128); // goto 16 bit + dst[r] = _mm_blendv_epi8(a_mbase_x, res1, mask128); + x += dx; + } +} + +static void highbd_dr_prediction_z1_4xN_idif_avx2( + uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, + const uint16_t *left, int dx, int dy, int bd, int mrl_index) { + (void)dy; + (void)left; + (void)bw; + assert(bw == 4); + int N = bh; + __m128i dstvec[16]; + + if (bd < 10) { + highbd_dr_prediction_z1_4xN_internal_idif_avx2(N, dstvec, above, dx, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(N, dstvec, above, dx, + mrl_index, bd); + } + for (int i = 0; i < N; i++) { + _mm_storel_epi64((__m128i *)(dst + stride * i), dstvec[i]); + } +} + +static AOM_FORCE_INLINE void highbd_dr_prediction_z1_8xN_internal_idif_avx2( + int N, __m128i *dst, const uint16_t *above, int dx, int mrl_index, int bd) { + const int frac_bits = 6; + const int max_base_x = ((N + 8) - 1 + (mrl_index << 1)); + + assert(dx > 0); + __m256i a0, a1, a2, a3; + __m256i val0, val1; + __m256i a_mbase_x, max_base_x256, base_inc256, mask256; + __m256i f0, f1, f2, f3; + + __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm256_set1_epi16(above[max_base_x]); + max_base_x256 = _mm256_set1_epi16(max_base_x); + + int shift_i; + int x = dx * (1 + mrl_index); + for (int r = 0; r < N; r++) { + __m256i res1; + + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + dst[i] = _mm256_castsi256_si128(a_mbase_x); // save 8 values + } + return; + } + + // load refs + a0 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base - 1))); + a1 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base))); + a2 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base + 1))); + a3 = _mm256_castsi128_si256(_mm_loadu_si128((__m128i *)(above + base + 2))); + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]); + + val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0), + _mm256_mullo_epi16(a1, f1)); + val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2), + _mm256_mullo_epi16(a3, f3)); + val0 = _mm256_adds_epi16(val0, val1); + + // round-shift + val0 = highbd_clamp_epi16_avx2(val0, bd); + val0 = _mm256_adds_epi16(val0, rnding); + val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER); + + base_inc256 = + _mm256_setr_epi16(base, base + 1, base + 2, base + 3, base + 4, + base + 5, base + 6, base + 7, 0, 0, 0, 0, 0, 0, 0, 0); + + mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256); + res1 = _mm256_blendv_epi8(a_mbase_x, val0, mask256); + dst[r] = _mm256_castsi256_si128(res1); + x += dx; + } +} + +static AOM_FORCE_INLINE void +highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(int N, __m128i *dst, + const uint16_t *above, + int dx, int mrl_index, + int bd) { + const int frac_bits = 6; + const int max_base_x = ((N + 8) - 1 + (mrl_index << 1)); + + assert(dx > 0); + __m256i a0, a1, a2, a3; + __m256i val0, val1; + __m256i a_mbase_x, max_base_x256, base_inc256, mask256; + __m256i f0, f1, f2, f3; + + __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm256_set1_epi16(above[max_base_x]); + max_base_x256 = _mm256_set1_epi32(max_base_x); + + int shift_i; + int x = dx * (1 + mrl_index); + for (int r = 0; r < N; r++) { + __m256i res1; + + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + dst[i] = _mm256_castsi256_si128(a_mbase_x); // save 8 values + } + return; + } + + // load refs + a0 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base - 1))); + a1 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base))); + a2 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 1))); + a3 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 2))); + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]); + + // multiply and sum + val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0), + _mm256_mullo_epi32(a1, f1)); + val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2), + _mm256_mullo_epi32(a3, f3)); + val0 = _mm256_add_epi32(val0, val1); + + // round shift + val0 = highbd_clamp_epi32_avx2(val0, bd); + val0 = _mm256_add_epi32(val0, rnding); + val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER); + + res1 = _mm256_packus_epi32( + val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1))); + + base_inc256 = _mm256_setr_epi32(base, base + 1, base + 2, base + 3, + base + 4, base + 5, base + 6, base + 7); + + mask256 = _mm256_cmpgt_epi32(max_base_x256, base_inc256); + mask256 = _mm256_packs_epi32( + mask256, _mm256_castsi128_si256( + _mm256_extracti128_si256(mask256, 1))); // go to 16 bit + res1 = _mm256_blendv_epi8(a_mbase_x, res1, mask256); + dst[r] = _mm256_castsi256_si128(res1); + x += dx; + } +} + +void highbd_dr_prediction_z1_8xN_idif_avx2(uint16_t *dst, ptrdiff_t stride, + int bw, int bh, + const uint16_t *above, + const uint16_t *left, int dx, int dy, + int bd, int mrl_index) { + (void)left; + (void)dy; + (void)bw; + assert(bw == 8); + int N = bh; + __m128i dstvec[32]; + if (bd < 10) { + highbd_dr_prediction_z1_8xN_internal_idif_avx2(N, dstvec, above, dx, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(N, dstvec, above, dx, + mrl_index, bd); + } + for (int i = 0; i < N; i++) { + _mm_storeu_si128((__m128i *)(dst + stride * i), dstvec[i]); + } +} + +static AOM_FORCE_INLINE void highbd_dr_prediction_z1_16xN_internal_idif_avx2( + int N, __m256i *dstvec, const uint16_t *above, int dx, int mrl_index, + int bd) { + const int frac_bits = 6; + const int max_base_x = ((16 + N) - 1 + (mrl_index << 1)); + + __m256i a_mbase_x, max_base_x256, base_inc256, mask256; + + __m256i a0, a1, a2, a3; + __m256i val0, val1; + __m256i f0, f1, f2, f3; + + __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm256_set1_epi16(above[max_base_x]); + max_base_x256 = _mm256_set1_epi16(max_base_x); + + int shift_i; + int x = dx * (1 + mrl_index); + for (int r = 0; r < N; r++) { + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + dstvec[i] = a_mbase_x; // save 16 values + } + return; + } + + // load refs + a0 = _mm256_loadu_si256((__m256i *)(above + base - 1)); + a1 = _mm256_loadu_si256((__m256i *)(above + base)); + a2 = _mm256_loadu_si256((__m256i *)(above + base + 1)); + a3 = _mm256_loadu_si256((__m256i *)(above + base + 2)); + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]); + + val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0), + _mm256_mullo_epi16(a1, f1)); + val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2), + _mm256_mullo_epi16(a3, f3)); + val0 = _mm256_adds_epi16(val0, val1); + + // clamp and round-shift + val0 = highbd_clamp_epi16_avx2(val0, bd); + val0 = _mm256_adds_epi16(val0, rnding); + val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER); + + base_inc256 = _mm256_setr_epi16(base, base + 1, base + 2, base + 3, + base + 4, base + 5, base + 6, base + 7, + base + 8, base + 9, base + 10, base + 11, + base + 12, base + 13, base + 14, base + 15); + + mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256); + dstvec[r] = _mm256_blendv_epi8(a_mbase_x, val0, mask256); + x += dx; + } +} + +static AOM_FORCE_INLINE void +highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(int N, __m256i *dstvec, + const uint16_t *above, + int dx, int mrl_index, + int bd) { + const int frac_bits = 6; + const int max_base_x = ((16 + N) - 1 + (mrl_index << 1)); + __m256i a0, a1, a2, a3; + __m256i val0, val1; + __m256i f0, f1, f2, f3; + __m256i a_mbase_x, max_base_x256, base_inc256, mask256; + + __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm256_set1_epi16(above[max_base_x]); + max_base_x256 = _mm256_set1_epi16(max_base_x); + + int shift_i; + int x = dx * (1 + mrl_index); + for (int r = 0; r < N; r++) { + __m256i res[2], res1; + + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + dstvec[i] = a_mbase_x; // save 16 values + } + return; + } + + a0 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base - 1))); + a1 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base))); + a2 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 1))); + a3 = _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 2))); + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]); + + // multiply and sum + val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0), + _mm256_mullo_epi32(a1, f1)); + val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2), + _mm256_mullo_epi32(a3, f3)); + val0 = _mm256_add_epi32(val0, val1); + + // round shift + val0 = highbd_clamp_epi32_avx2(val0, bd); + val0 = _mm256_add_epi32(val0, rnding); + val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER); + + res[0] = _mm256_packus_epi32( + val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1))); + + int mdif = max_base_x - base; + if (mdif > 8) { + a0 = + _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 7))); + a1 = + _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 8))); + a2 = + _mm256_cvtepu16_epi32(_mm_loadu_si128((__m128i *)(above + base + 9))); + a3 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 10))); + + // multiply and sum + val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0), + _mm256_mullo_epi32(a1, f1)); + val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2), + _mm256_mullo_epi32(a3, f3)); + val0 = _mm256_add_epi32(val0, val1); + + // round shift + val0 = highbd_clamp_epi32_avx2(val0, bd); + val0 = _mm256_add_epi32(val0, rnding); + val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER); + + res[1] = _mm256_packus_epi32( + val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1))); + } else { + res[1] = a_mbase_x; + } + res1 = _mm256_inserti128_si256(res[0], _mm256_castsi256_si128(res[1]), + 1); // 16 16bit values + + base_inc256 = _mm256_setr_epi16(base, base + 1, base + 2, base + 3, + base + 4, base + 5, base + 6, base + 7, + base + 8, base + 9, base + 10, base + 11, + base + 12, base + 13, base + 14, base + 15); + mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256); + dstvec[r] = _mm256_blendv_epi8(a_mbase_x, res1, mask256); + x += dx; + } +} + +static void highbd_dr_prediction_z1_16xN_idif_avx2( + uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, + const uint16_t *left, int dx, int dy, int bd, int mrl_index) { + (void)left; + (void)dy; + (void)bw; + assert(bw == 16); + int N = bh; + __m256i dstvec[64]; + if (bd < 10) { + highbd_dr_prediction_z1_16xN_internal_idif_avx2(N, dstvec, above, dx, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(N, dstvec, above, dx, + mrl_index, bd); + } + for (int i = 0; i < N; i++) { + _mm256_storeu_si256((__m256i *)(dst + stride * i), dstvec[i]); + } +} + +static AOM_FORCE_INLINE void highbd_dr_prediction_z1_32xN_internal_idif_avx2( + int N, __m256i *dstvec, const uint16_t *above, int dx, int mrl_index, + int bd) { + const int frac_bits = 6; + const int max_base_x = ((32 + N) - 1 + (mrl_index << 1)); + + __m256i a_mbase_x, max_base_x256, base_inc256, mask256; + + __m256i a0, a1, a2, a3; + __m256i val0, val1; + __m256i f0, f1, f2, f3; + + __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm256_set1_epi16(above[max_base_x]); + max_base_x256 = _mm256_set1_epi16(max_base_x); + + int shift_i; + int x = dx * (1 + mrl_index); + for (int r = 0; r < N; r++) { + __m256i res; + + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + dstvec[i] = a_mbase_x; // save 32 values + dstvec[i + N] = a_mbase_x; + } + return; + } + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]); + + for (int j = 0; j < 32; j += 16) { + int mdif = max_base_x - (base + j); + if (mdif <= 0) { + res = a_mbase_x; + } else { + // load refs + a0 = _mm256_loadu_si256((__m256i *)(above + base - 1 + j)); + a1 = _mm256_loadu_si256((__m256i *)(above + base + j)); + a2 = _mm256_loadu_si256((__m256i *)(above + base + 1 + j)); + a3 = _mm256_loadu_si256((__m256i *)(above + base + 2 + j)); + + val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0), + _mm256_mullo_epi16(a1, f1)); + val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2), + _mm256_mullo_epi16(a3, f3)); + val0 = _mm256_adds_epi16(val0, val1); + + // clamp and round-shift + val0 = highbd_clamp_epi16_avx2(val0, bd); + val0 = _mm256_adds_epi16(val0, rnding); + val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER); + + base_inc256 = _mm256_setr_epi16( + base + j, base + j + 1, base + j + 2, base + j + 3, base + j + 4, + base + j + 5, base + j + 6, base + j + 7, base + j + 8, + base + j + 9, base + j + 10, base + j + 11, base + j + 12, + base + j + 13, base + j + 14, base + j + 15); + + mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256); + res = _mm256_blendv_epi8(a_mbase_x, val0, mask256); + } + if (!j) { + dstvec[r] = res; + } else { + dstvec[r + N] = res; + } + } + x += dx; + } +} + +static AOM_FORCE_INLINE void +highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(int N, __m256i *dstvec, + const uint16_t *above, + int dx, int mrl_index, + int bd) { + const int frac_bits = 6; + const int max_base_x = ((32 + N) - 1 + (mrl_index << 1)); + + __m256i a_mbase_x, max_base_x256, base_inc256, mask256; + + __m256i a0, a1, a2, a3; + __m256i val0, val1; + __m256i f0, f1, f2, f3; + + __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm256_set1_epi16(above[max_base_x]); + max_base_x256 = _mm256_set1_epi16(max_base_x); + + int shift_i; + int x = dx * (1 + mrl_index); + for (int r = 0; r < N; r++) { + __m256i res[2], res1; + + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + dstvec[i] = a_mbase_x; // save 32 values + dstvec[i + N] = a_mbase_x; + } + return; + } + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]); + + for (int j = 0; j < 32; j += 16) { + int mdif = max_base_x - (base + j); + if (mdif <= 0) { + res1 = a_mbase_x; + } else { + a0 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base - 1 + j))); + a1 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + j))); + a2 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 1 + j))); + a3 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 2 + j))); + + // multiply and sum + val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0), + _mm256_mullo_epi32(a1, f1)); + val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2), + _mm256_mullo_epi32(a3, f3)); + val0 = _mm256_add_epi32(val0, val1); + + // round shift + val0 = highbd_clamp_epi32_avx2(val0, bd); + val0 = _mm256_add_epi32(val0, rnding); + val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER); + + res[0] = _mm256_packus_epi32( + val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1))); + + if (mdif > 8) { + a0 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 7 + j))); + a1 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 8 + j))); + a2 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 9 + j))); + a3 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 10 + j))); + + // multiply and sum + val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0), + _mm256_mullo_epi32(a1, f1)); + val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2), + _mm256_mullo_epi32(a3, f3)); + val0 = _mm256_add_epi32(val0, val1); + + // round shift + val0 = highbd_clamp_epi32_avx2(val0, bd); + val0 = _mm256_add_epi32(val0, rnding); + val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER); + + res[1] = _mm256_packus_epi32( + val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1))); + } else { + res[1] = a_mbase_x; + } + res1 = _mm256_inserti128_si256(res[0], _mm256_castsi256_si128(res[1]), + 1); // 16 16bit values + base_inc256 = _mm256_setr_epi16( + base + j, base + j + 1, base + j + 2, base + j + 3, base + j + 4, + base + j + 5, base + j + 6, base + j + 7, base + j + 8, + base + j + 9, base + j + 10, base + j + 11, base + j + 12, + base + j + 13, base + j + 14, base + j + 15); + + mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256); + res1 = _mm256_blendv_epi8(a_mbase_x, res1, mask256); + } + if (!j) { + dstvec[r] = res1; + } else { + dstvec[r + N] = res1; + } + } + x += dx; + } +} + +static void highbd_dr_prediction_z1_32xN_idif_avx2(int N, uint16_t *dst, + ptrdiff_t stride, + const uint16_t *above, + int dx, int bd, + int mrl_index) { + __m256i dstvec[128]; + if (bd < 10) { + highbd_dr_prediction_z1_32xN_internal_idif_avx2(N, dstvec, above, dx, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(N, dstvec, above, dx, + mrl_index, bd); + } + for (int i = 0; i < N; i++) { + _mm256_storeu_si256((__m256i *)(dst + stride * i), dstvec[i]); + _mm256_storeu_si256((__m256i *)(dst + stride * i + 16), dstvec[i + N]); + } +} + +static void highbd_dr_prediction_z1_64xN_internal_idif_avx2( + int N, uint16_t *dst, ptrdiff_t stride, const uint16_t *above, int dx, + int mrl_index, int bd) { + const int frac_bits = 6; + const int max_base_x = ((64 + N) - 1 + (mrl_index << 1)); + + __m256i a_mbase_x, max_base_x256, base_inc256, mask256; + + __m256i a0, a1, a2, a3; + __m256i val0, val1; + __m256i f0, f1, f2, f3; + + __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm256_set1_epi16(above[max_base_x]); + max_base_x256 = _mm256_set1_epi16(max_base_x); + + int shift_i; + int x = dx * (1 + mrl_index); + for (int r = 0; r < N; r++, dst += stride) { + __m256i res; + + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + _mm256_storeu_si256((__m256i *)dst, a_mbase_x); // save 32 values + _mm256_storeu_si256((__m256i *)(dst + 16), a_mbase_x); + _mm256_storeu_si256((__m256i *)(dst + 32), a_mbase_x); + _mm256_storeu_si256((__m256i *)(dst + 48), a_mbase_x); + dst += stride; + } + return; + } + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]); + + for (int j = 0; j < 64; j += 16) { + int mdif = max_base_x - (base + j); + if (mdif <= 0) { + _mm256_storeu_si256((__m256i *)(dst + j), a_mbase_x); + } else { + // load refs + a0 = _mm256_loadu_si256((__m256i *)(above + base - 1 + j)); + a1 = _mm256_loadu_si256((__m256i *)(above + base + j)); + a2 = _mm256_loadu_si256((__m256i *)(above + base + 1 + j)); + a3 = _mm256_loadu_si256((__m256i *)(above + base + 2 + j)); + + val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0, f0), + _mm256_mullo_epi16(a1, f1)); + val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2, f2), + _mm256_mullo_epi16(a3, f3)); + val0 = _mm256_adds_epi16(val0, val1); + + // clamp and round-shift + val0 = highbd_clamp_epi16_avx2(val0, bd); + val0 = _mm256_adds_epi16(val0, rnding); + val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER); + + base_inc256 = _mm256_setr_epi16( + base + j, base + j + 1, base + j + 2, base + j + 3, base + j + 4, + base + j + 5, base + j + 6, base + j + 7, base + j + 8, + base + j + 9, base + j + 10, base + j + 11, base + j + 12, + base + j + 13, base + j + 14, base + j + 15); + + mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256); + res = _mm256_blendv_epi8(a_mbase_x, val0, mask256); + _mm256_storeu_si256((__m256i *)(dst + j), res); // 16 16bit values + } + } + x += dx; + } +} + +static void highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2( + int N, uint16_t *dst, ptrdiff_t stride, const uint16_t *above, int dx, + int mrl_index, int bd) { + const int frac_bits = 6; + const int max_base_x = ((64 + N) - 1 + (mrl_index << 1)); + + __m256i a0, a1, a2, a3; + + __m256i a_mbase_x, max_base_x256, base_inc256, mask256; + + __m256i val0, val1; + __m256i f0, f1, f2, f3; + + __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1)); + + a_mbase_x = _mm256_set1_epi16(above[max_base_x]); + max_base_x256 = _mm256_set1_epi16(max_base_x); + + int shift_i; + int x = dx * (1 + mrl_index); + for (int r = 0; r < N; r++, dst += stride) { + __m256i res[2], res1; + + int base = x >> frac_bits; + if (base >= max_base_x) { + for (int i = r; i < N; ++i) { + _mm256_storeu_si256((__m256i *)dst, a_mbase_x); // save 32 values + _mm256_storeu_si256((__m256i *)(dst + 16), a_mbase_x); + _mm256_storeu_si256((__m256i *)(dst + 32), a_mbase_x); + _mm256_storeu_si256((__m256i *)(dst + 48), a_mbase_x); + dst += stride; + } + return; + } + + // load filter + shift_i = (x & 0x3F) >> 1; + f0 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]); + f1 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]); + f2 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]); + f3 = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]); + + for (int j = 0; j < 64; j += 16) { + int mdif = max_base_x - (base + j); + if (mdif <= 0) { + _mm256_storeu_si256((__m256i *)(dst + j), a_mbase_x); + } else { + a0 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base - 1 + j))); + a1 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + j))); + a2 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 1 + j))); + a3 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 2 + j))); + + // multiply and sum + val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0), + _mm256_mullo_epi32(a1, f1)); + val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2), + _mm256_mullo_epi32(a3, f3)); + val0 = _mm256_add_epi32(val0, val1); + + // round shift + val0 = highbd_clamp_epi32_avx2(val0, bd); + val0 = _mm256_add_epi32(val0, rnding); + val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER); + + res[0] = _mm256_packus_epi32( + val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1))); + + if (mdif > 8) { + a0 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 7 + j))); + a1 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 8 + j))); + a2 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 9 + j))); + a3 = _mm256_cvtepu16_epi32( + _mm_loadu_si128((__m128i *)(above + base + 10 + j))); + + // multiply and sum + val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0, f0), + _mm256_mullo_epi32(a1, f1)); + val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2, f2), + _mm256_mullo_epi32(a3, f3)); + val0 = _mm256_add_epi32(val0, val1); + + // round shift + val0 = highbd_clamp_epi32_avx2(val0, bd); + val0 = _mm256_add_epi32(val0, rnding); + val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER); + + res[1] = _mm256_packus_epi32( + val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1))); + } else { + res[1] = a_mbase_x; + } + res1 = _mm256_inserti128_si256(res[0], _mm256_castsi256_si128(res[1]), + 1); // 16 16bit values + base_inc256 = _mm256_setr_epi16( + base + j, base + j + 1, base + j + 2, base + j + 3, base + j + 4, + base + j + 5, base + j + 6, base + j + 7, base + j + 8, + base + j + 9, base + j + 10, base + j + 11, base + j + 12, + base + j + 13, base + j + 14, base + j + 15); + + mask256 = _mm256_cmpgt_epi16(max_base_x256, base_inc256); + res1 = _mm256_blendv_epi8(a_mbase_x, res1, mask256); + _mm256_storeu_si256((__m256i *)(dst + j), res1); + } + } + x += dx; + } +} + +static void highbd_dr_prediction_z1_64xN_idif_avx2( + uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, + const uint16_t *left, int dx, int dy, int bd, int mrl_index) { + (void)left; + (void)dy; + (void)bw; + assert(bw == 64); + if (bd < 10) { + highbd_dr_prediction_z1_64xN_internal_idif_avx2(bh, dst, stride, above, dx, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2( + bh, dst, stride, above, dx, mrl_index, bd); + } +} + +void av1_highbd_dr_prediction_z1_idif_avx2(uint16_t *dst, ptrdiff_t stride, + int bw, int bh, + const uint16_t *above, + const uint16_t *left, int dx, int dy, + int bd, int mrl_index) { + switch (bw) { + case 4: + highbd_dr_prediction_z1_4xN_idif_avx2(dst, stride, bw, bh, above, left, + dx, dy, bd, mrl_index); + break; + case 8: + highbd_dr_prediction_z1_8xN_idif_avx2(dst, stride, bw, bh, above, left, + dx, dy, bd, mrl_index); + break; + case 16: + highbd_dr_prediction_z1_16xN_idif_avx2(dst, stride, bw, bh, above, left, + dx, dy, bd, mrl_index); + break; + case 32: + highbd_dr_prediction_z1_32xN_idif_avx2(bh, dst, stride, above, dx, bd, + mrl_index); + break; + case 64: + highbd_dr_prediction_z1_64xN_idif_avx2(dst, stride, bw, bh, above, left, + dx, dy, bd, mrl_index); + break; + default: break; + } + return; +} + +static AOM_FORCE_INLINE __m256i highbd_dr_row8_idif_avx2(const uint16_t *above, + const __m256i *filter, + int base_x, + int base_shift, + int bd) { + // load refs + __m128i a0_x128, a1_x128, a2_x128, a3_x128; + a0_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift - 1)); + a1_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift)); + a2_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 1)); + a3_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 2)); + + // load mask + a0_x128 = _mm_shuffle_epi8(a0_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + a1_x128 = _mm_shuffle_epi8(a1_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + a2_x128 = _mm_shuffle_epi8(a2_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + a3_x128 = _mm_shuffle_epi8(a3_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + + __m256i a0_x, a1_x, a2_x, a3_x; + a0_x = _mm256_castsi128_si256(a0_x128); + a1_x = _mm256_castsi128_si256(a1_x128); + a2_x = _mm256_castsi128_si256(a2_x128); + a3_x = _mm256_castsi128_si256(a3_x128); + + // multiply and sum + __m256i val0, val1; + val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0_x, filter[0]), + _mm256_mullo_epi16(a1_x, filter[1])); + val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2_x, filter[2]), + _mm256_mullo_epi16(a3_x, filter[3])); + val0 = _mm256_adds_epi16(val0, val1); + + // round shift + val0 = highbd_clamp_epi16_avx2(val0, bd); + const __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1)); + val0 = _mm256_adds_epi16(val0, rnding); + val0 = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER); + + return val0; +} + +static AOM_FORCE_INLINE __m256i +highbd_dr_row8_32bit_idif_avx2(const uint16_t *above, const __m256i *filter, + int base_x, int base_shift, int bd) { + // load refs + __m128i a0_x128, a1_x128, a2_x128, a3_x128; + a0_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift - 1)); + a1_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift)); + a2_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 1)); + a3_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 2)); + + // load mask + a0_x128 = _mm_shuffle_epi8(a0_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + a1_x128 = _mm_shuffle_epi8(a1_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + a2_x128 = _mm_shuffle_epi8(a2_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + a3_x128 = _mm_shuffle_epi8(a3_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + + __m256i a0_x, a1_x, a2_x, a3_x; + a0_x = _mm256_cvtepu16_epi32(a0_x128); + a1_x = _mm256_cvtepu16_epi32(a1_x128); + a2_x = _mm256_cvtepu16_epi32(a2_x128); + a3_x = _mm256_cvtepu16_epi32(a3_x128); + + // multiply and sum + __m256i val0, val1; + val0 = _mm256_add_epi32(_mm256_mullo_epi32(a0_x, filter[0]), + _mm256_mullo_epi32(a1_x, filter[1])); + val1 = _mm256_add_epi32(_mm256_mullo_epi32(a2_x, filter[2]), + _mm256_mullo_epi32(a3_x, filter[3])); + val0 = _mm256_add_epi32(val0, val1); + + // round shift + val0 = highbd_clamp_epi32_avx2(val0, bd); + __m256i rnding = _mm256_set1_epi32(1 << (POWER_DR_INTERP_FILTER - 1)); + val0 = _mm256_add_epi32(val0, rnding); + val0 = _mm256_srli_epi32(val0, POWER_DR_INTERP_FILTER); + + __m256i resx = _mm256_packus_epi32( + val0, _mm256_castsi128_si256(_mm256_extracti128_si256(val0, 1))); + return resx; +} + +static INLINE void highbd_dr_z2_8x8_idif_avx2(int H, int W, + const uint16_t *above, + __m128i *dest, int r, int j, + int dx, int mrl_index, int bd) { + const int min_base_x = -((1 + mrl_index)); + const int frac_bits_x = 6; + + __m256i res; + __m128i resx; + int min_h = (H == 4) ? 4 : 8; + int min_w = (W == 4) ? 4 : 8; + + for (int i = r; i < r + min_h; i++) { + assert(i < H); + assert(j < W); + + int y = i + 1; + int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x; + int base_shift = 0; + if (base_x < (min_base_x - 1)) { + base_shift = (min_base_x - base_x - 1); + } + + if (base_shift > min_w - 1) { + resx = _mm_setzero_si128(); + } else { + // load filter + int shift_i = ((-(y + mrl_index) * dx) & 0x3F) >> 1; + __m256i f[4]; + f[0] = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][0]); + f[1] = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][1]); + f[2] = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][2]); + f[3] = _mm256_set1_epi16(av1_dr_interp_filter[shift_i][3]); + + res = highbd_dr_row8_idif_avx2(above, f, base_x, base_shift, bd); + resx = _mm256_castsi256_si128(res); + } + dest[i - r] = resx; + } +} + +static INLINE void highbd_dr_z2_32bit_8x8_idif_avx2(int H, int W, + const uint16_t *above, + __m128i *dest, int r, int j, + int dx, int mrl_index, + int bd) { + const int min_base_x = -((1 + mrl_index)); + const int frac_bits_x = 6; + + __m256i res; + __m128i resx; + // adapt if size is 4 + int min_h = (H == 4) ? 4 : 8; + int min_w = (W == 4) ? 4 : 8; + + for (int i = r; i < r + min_h; i++) { + assert(i < H); + assert(j < W); + + int y = i + 1; + int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x; + int base_shift = 0; + if (base_x < (min_base_x - 1)) { + base_shift = (min_base_x - base_x - 1); + } + + if (base_shift > min_w - 1) { + resx = _mm_setzero_si128(); + } else { + // load filter + int shift_i = ((-(y + mrl_index) * dx) & 0x3F) >> 1; + __m256i f[4]; + f[0] = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][0]); + f[1] = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][1]); + f[2] = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][2]); + f[3] = _mm256_set1_epi32(av1_dr_interp_filter[shift_i][3]); + + res = highbd_dr_row8_32bit_idif_avx2(above, f, base_x, base_shift, bd); + resx = _mm256_castsi256_si128(res); + } + dest[i - r] = resx; + } +} + +static void highbd_dr_32bit_z2_8x8_tiling_idif_avx2( + int H, int W, uint16_t *dst, ptrdiff_t stride, const uint16_t *above, + const uint16_t *left, int dx, int dy, int mrl_index, int bd) { + // Directional prediction in a 8x8 tile. + // Sizes of 4x4, 4x8 and 8x4 are supported as well. + // Step 1. Predict from above. + // Step 2. Predict from left and transpose. + // Step 3. Merge results. + + const int min_base_x = -((1 + mrl_index)); + const int frac_bits_x = 6; + + __m128i x_pred[8]; + __m128i y_pred[8]; + __m128i _y_pred[8]; + + for (int i = 0; i < 8; i++) { + x_pred[i] = _mm_setzero_si128(); + y_pred[i] = _mm_setzero_si128(); + _y_pred[i] = _mm_setzero_si128(); + } + + int min_h = (H == 4) ? 4 : 8; + int min_w = (W == 4) ? 4 : 8; + + for (int r = 0; r < H; r += 8) { + for (int j = 0; j < W; j += min_w) { + assert((W - j) >= min_w); + assert((H - r) >= min_h); + + if (bd < 10) { + highbd_dr_z2_8x8_idif_avx2(H, W, above, x_pred, r, j, dx, mrl_index, + bd); + highbd_dr_z2_8x8_idif_avx2(W, H, left, _y_pred, j, r, dy, mrl_index, + bd); + } else { + highbd_dr_z2_32bit_8x8_idif_avx2(H, W, above, x_pred, r, j, dx, + mrl_index, bd); + highbd_dr_z2_32bit_8x8_idif_avx2(W, H, left, _y_pred, j, r, dy, + mrl_index, bd); + } + highbd_transpose8x8_sse2(&_y_pred[0], &_y_pred[1], &_y_pred[2], + &_y_pred[3], &_y_pred[4], &_y_pred[5], + &_y_pred[6], &_y_pred[7], &y_pred[0], &y_pred[1], + &y_pred[2], &y_pred[3], &y_pred[4], &y_pred[5], + &y_pred[6], &y_pred[7]); + + for (int k = 0; k < min_h; ++k) { + int y = r + k + 1; + int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x; + int base_min_diff = (min_base_x - base_x); + if (base_min_diff > min_w) { + base_min_diff = min_w; + } else { + if (base_min_diff < 0) base_min_diff = 0; + } + + __m128i resx, resy, resxy; + resx = x_pred[k]; + resy = y_pred[k]; + + resxy = _mm_blendv_epi8(resx, resy, + *(__m128i *)HighbdBaseMask[base_min_diff]); + + if (min_w == 8) { + _mm_storeu_si128((__m128i *)(dst + k * stride + j), resxy); + } else { + _mm_storel_epi64((__m128i *)(dst + k * stride + j), resxy); + } + } + } + if (r + 8 < H) dst += 8 * stride; + } +} + +static void highbd_dr_z2_16x16_idif_avx2(int H, int W, const uint16_t *above, + __m256i *dest, int r, int j, int dx, + int mrl_index, int bd) { + (void)H; + (void)W; + + const int min_base_x = -(1 + mrl_index); + const int frac_bits_x = 6; + + __m128i a0_x128, a1_x128, a2_x128, a3_x128; + __m256i a0_x, a1_x, a2_x, a3_x; + __m256i f0_x, f1_x, f2_x, f3_x; + __m256i rnding = _mm256_set1_epi16(1 << (POWER_DR_INTERP_FILTER - 1)); + __m256i val0, val1; + + for (int i = r; i < r + 16; ++i) { + assert(i < H); + assert(j < W); + int y = i + 1; + + int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x; + int base_shift = 0; + if ((base_x) < (min_base_x - 1)) { + base_shift = (min_base_x - (base_x)-1); + } + + if (base_shift < 8) { + a0_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift - 1)); + a1_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift)); + a2_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 1)); + a3_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift + 2)); + + a0_x128 = + _mm_shuffle_epi8(a0_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + a1_x128 = + _mm_shuffle_epi8(a1_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + a2_x128 = + _mm_shuffle_epi8(a2_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + a3_x128 = + _mm_shuffle_epi8(a3_x128, *(__m128i *)HighbdLoadMaskx[base_shift]); + + a0_x = _mm256_castsi128_si256(a0_x128); + a1_x = _mm256_castsi128_si256(a1_x128); + a2_x = _mm256_castsi128_si256(a2_x128); + a3_x = _mm256_castsi128_si256(a3_x128); + } else { + a0_x = _mm256_setzero_si256(); + a1_x = _mm256_setzero_si256(); + a2_x = _mm256_setzero_si256(); + a3_x = _mm256_setzero_si256(); + } + + int base_shift1 = 0; + if (base_shift > 8) { + base_shift1 = base_shift - 8; + } + if (base_shift1 < 8) { + a0_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift1 + 7)); + a1_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift1 + 8)); + a2_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift1 + 9)); + a3_x128 = _mm_loadu_si128((__m128i *)(above + base_x + base_shift1 + 10)); + + a0_x128 = + _mm_shuffle_epi8(a0_x128, *(__m128i *)HighbdLoadMaskx[base_shift1]); + a1_x128 = + _mm_shuffle_epi8(a1_x128, *(__m128i *)HighbdLoadMaskx[base_shift1]); + a2_x128 = + _mm_shuffle_epi8(a2_x128, *(__m128i *)HighbdLoadMaskx[base_shift1]); + a3_x128 = + _mm_shuffle_epi8(a3_x128, *(__m128i *)HighbdLoadMaskx[base_shift1]); + + a0_x = _mm256_inserti128_si256(a0_x, a0_x128, 1); + a1_x = _mm256_inserti128_si256(a1_x, a1_x128, 1); + a2_x = _mm256_inserti128_si256(a2_x, a2_x128, 1); + a3_x = _mm256_inserti128_si256(a3_x, a3_x128, 1); + } + if ((base_shift < 8) || base_shift1 < 8) { + // load filter + int shift_x = ((-(i + 1 + mrl_index) * dx) & 0x3F) >> 1; + f0_x = _mm256_set1_epi16(av1_dr_interp_filter[shift_x][0]); + f1_x = _mm256_set1_epi16(av1_dr_interp_filter[shift_x][1]); + f2_x = _mm256_set1_epi16(av1_dr_interp_filter[shift_x][2]); + f3_x = _mm256_set1_epi16(av1_dr_interp_filter[shift_x][3]); + + val0 = _mm256_adds_epi16(_mm256_mullo_epi16(a0_x, f0_x), + _mm256_mullo_epi16(a1_x, f1_x)); + val1 = _mm256_adds_epi16(_mm256_mullo_epi16(a2_x, f2_x), + _mm256_mullo_epi16(a3_x, f3_x)); + val0 = _mm256_adds_epi16(val0, val1); + + val0 = highbd_clamp_epi16_avx2(val0, bd); + val0 = _mm256_adds_epi16(val0, rnding); + dest[i - r] = _mm256_srli_epi16(val0, POWER_DR_INTERP_FILTER); + } else { + dest[i - r] = _mm256_setzero_si256(); + } + } +} + +static void highbd_dr_prediction_z2_HxW_idif_avx2( + int H, int W, uint16_t *dst, ptrdiff_t stride, const uint16_t *above, + const uint16_t *left, int dx, int dy, int mrl_index, int bd) { + // Directional prediction in 16x16 tiles. + // Step 1. Predict from above. + // Step 2. Predict from left and transpose. + // Step 3. Merge results. + + const int min_base_x = -(1 + mrl_index); + const int frac_bits_x = 6; + + __m256i x_pred[16]; + __m256i y_pred[16]; + + for (int r = 0; r < H; r += 16) { + for (int j = 0; j < W; j += 16) { + assert((W - j) >= 16); + assert((H - r) >= 16); + // x calc + highbd_dr_z2_16x16_idif_avx2(H, W, above, x_pred, r, j, dx, mrl_index, + bd); + + // y calc + highbd_dr_z2_16x16_idif_avx2(W, H, left, y_pred, j, r, dy, mrl_index, bd); + highbd_transpose16x16_avx2(y_pred, y_pred); + + // merge results + for (int k = 0; k < 16; ++k) { + int y = k + r + 1; + int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x; + int base_min_diff = (min_base_x - base_x); + if (base_min_diff > 16) { + base_min_diff = 16; + } else { + if (base_min_diff < 0) base_min_diff = 0; + } + + __m256i resx, resy, resxy; + resx = x_pred[k]; + resy = y_pred[k]; + + resxy = _mm256_blendv_epi8(resx, resy, + *(__m256i *)HighbdBaseMask[base_min_diff]); + _mm256_storeu_si256((__m256i *)(dst + k * stride + j), resxy); + } + } // for j + if (r + 16 < H) dst += 16 * stride; + } +} + +static void highbd_dr_z2_16x16_32bit_idif_avx2(int H, int W, + const uint16_t *above, + __m256i *dest, int r, int j, + int dx, int mrl_index, int bd) { + (void)H; + (void)W; + const int min_base_x = -(1 + mrl_index); + const int frac_bits_x = 6; + __m256i resx[2]; + + for (int i = r; i < r + 16; ++i) { + assert(i < H); + assert(j < W); + + int y = i + 1; + + int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x; + int base_shift = 0; + if ((base_x) < (min_base_x - 1)) { + base_shift = (min_base_x - (base_x)-1); + } + + // load filter + int shift_x = ((-(i + 1 + mrl_index) * dx) & 0x3F) >> 1; + __m256i f[4]; + f[0] = _mm256_set1_epi32(av1_dr_interp_filter[shift_x][0]); + f[1] = _mm256_set1_epi32(av1_dr_interp_filter[shift_x][1]); + f[2] = _mm256_set1_epi32(av1_dr_interp_filter[shift_x][2]); + f[3] = _mm256_set1_epi32(av1_dr_interp_filter[shift_x][3]); + + if (base_shift < 8) { + resx[0] = + highbd_dr_row8_32bit_idif_avx2(above, f, base_x, base_shift, bd); + + } else { + resx[0] = _mm256_setzero_si256(); + } + + int base_shift1 = 0; + if (base_shift > 8) { + base_shift1 = base_shift - 8; + } + if (base_shift1 < 8) { + resx[1] = + highbd_dr_row8_32bit_idif_avx2(above, f, base_x + 8, base_shift1, bd); + } + if ((base_shift < 8) || base_shift1 < 8) { + dest[i - r] = + _mm256_inserti128_si256(resx[0], _mm256_castsi256_si128(resx[1]), + 1); // 16 16bit values + } else { + dest[i - r] = _mm256_setzero_si256(); + } + } +} + +static void highbd_dr_prediction_32bit_z2_HxW_idif_avx2( + int H, int W, uint16_t *dst, ptrdiff_t stride, const uint16_t *above, + const uint16_t *left, int dx, int dy, int mrl_index, int bd) { + // Directional prediction in 16x16 tiles. + // Step 1. Predict from above. + // Step 2. Predict from left and transpose. + // Step 3. Merge results. + + const int min_base_x = -(1 + mrl_index); + const int frac_bits_x = 6; + + __m256i x_pred[16]; + __m256i y_pred[16]; + + for (int r = 0; r < H; r += 16) { + for (int j = 0; j < W; j += 16) { + assert((W - j) >= 16); + assert((H - r) >= 16); + + // x calc + highbd_dr_z2_16x16_32bit_idif_avx2(H, W, above, x_pred, r, j, dx, + mrl_index, bd); + + // y calc + highbd_dr_z2_16x16_32bit_idif_avx2(W, H, left, y_pred, j, r, dy, + mrl_index, bd); + highbd_transpose16x16_avx2(y_pred, y_pred); + // merge results + for (int k = 0; k < 16; ++k) { + int y = k + r + 1; + int base_x = ((j << 6) - (y + mrl_index) * dx) >> frac_bits_x; + int base_min_diff = (min_base_x - base_x); + if (base_min_diff > 16) { + base_min_diff = 16; + } else { + if (base_min_diff < 0) base_min_diff = 0; + } + + __m256i resx, resy, resxy; + resx = x_pred[k]; + resy = y_pred[k]; + + resxy = _mm256_blendv_epi8(resx, resy, + *(__m256i *)HighbdBaseMask[base_min_diff]); + _mm256_storeu_si256((__m256i *)(dst + k * stride + j), resxy); + } + } // for j + if (r + 16 < H) dst += 16 * stride; + } +} + +// Directional prediction, zone 2: 90 < angle < 180 using IDIF +void av1_highbd_dr_prediction_z2_idif_avx2(uint16_t *dst, ptrdiff_t stride, + int bw, int bh, + const uint16_t *above, + const uint16_t *left, int dx, int dy, + int bd, int mrl_index) { + assert(dx > 0); + assert(dy > 0); + switch (bw) { + case 4: + highbd_dr_32bit_z2_8x8_tiling_idif_avx2(bh, bw, dst, stride, above, left, + dx, dy, mrl_index, bd); + break; + case 8: + highbd_dr_32bit_z2_8x8_tiling_idif_avx2(bh, bw, dst, stride, above, left, + dx, dy, mrl_index, bd); + break; + default: + if (bh < 16) { + highbd_dr_32bit_z2_8x8_tiling_idif_avx2(bh, bw, dst, stride, above, + left, dx, dy, mrl_index, bd); + } else { + if (bd < 10) { + highbd_dr_prediction_z2_HxW_idif_avx2(bh, bw, dst, stride, above, + left, dx, dy, mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z2_HxW_idif_avx2( + bh, bw, dst, stride, above, left, dx, dy, mrl_index, bd); + } + } + break; + } +} + +// Directional prediction, zone 3 functions +static void highbd_dr_prediction_z3_4x4_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m128i dstvec[4], d[4]; + if (bd < 10) { + highbd_dr_prediction_z1_4xN_internal_idif_avx2(4, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(4, dstvec, left, dy, + mrl_index, bd); + } + highbd_transpose4x8_8x4_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2], + &dstvec[3], &d[0], &d[1], &d[2], &d[3]); + _mm_storel_epi64((__m128i *)(dst + 0 * stride), d[0]); + _mm_storel_epi64((__m128i *)(dst + 1 * stride), d[1]); + _mm_storel_epi64((__m128i *)(dst + 2 * stride), d[2]); + _mm_storel_epi64((__m128i *)(dst + 3 * stride), d[3]); + return; +} + +static void highbd_dr_prediction_z3_8x8_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m128i dstvec[8], d[8]; + if (bd < 10) { + highbd_dr_prediction_z1_8xN_internal_idif_avx2(8, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(8, dstvec, left, dy, + mrl_index, bd); + } + highbd_transpose8x8_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3], + &dstvec[4], &dstvec[5], &dstvec[6], &dstvec[7], + &d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], + &d[7]); + for (int i = 0; i < 8; i++) { + _mm_storeu_si128((__m128i *)(dst + i * stride), d[i]); + } +} + +static void highbd_dr_prediction_z3_16x16_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, + int dy, int bd, + int mrl_index) { + __m256i dstvec[16], d[16]; + if (bd < 10) { + highbd_dr_prediction_z1_16xN_internal_idif_avx2(16, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(16, dstvec, left, dy, + mrl_index, bd); + } + + highbd_transpose16x16_avx2(dstvec, d); + + for (int i = 0; i < 16; i++) { + _mm256_storeu_si256((__m256i *)(dst + i * stride), d[i]); + } +} + +static void highbd_dr_prediction_z3_32x32_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, + int dy, int bd, + int mrl_index) { + __m256i dstvec[64], d[16]; + if (bd < 10) { + highbd_dr_prediction_z1_32xN_internal_idif_avx2(32, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(32, dstvec, left, dy, + mrl_index, bd); + } + highbd_transpose16x16_avx2(dstvec, d); + for (int j = 0; j < 16; j++) { + _mm256_storeu_si256((__m256i *)(dst + j * stride), d[j]); + } + highbd_transpose16x16_avx2(dstvec + 16, d); + for (int j = 0; j < 16; j++) { + _mm256_storeu_si256((__m256i *)(dst + j * stride + 16), d[j]); + } + highbd_transpose16x16_avx2(dstvec + 32, d); + for (int j = 0; j < 16; j++) { + _mm256_storeu_si256((__m256i *)(dst + (j + 16) * stride), d[j]); + } + highbd_transpose16x16_avx2(dstvec + 48, d); + for (int j = 0; j < 16; j++) { + _mm256_storeu_si256((__m256i *)(dst + (j + 16) * stride + 16), d[j]); + } +} + +static void highbd_dr_prediction_z3_64x64_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, + int dy, int bd, + int mrl_index) { + DECLARE_ALIGNED(16, uint16_t, dstT[64 * 64]); + if (bd < 10) { + highbd_dr_prediction_z1_64xN_internal_idif_avx2(64, dstT, 64, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2(64, dstT, 64, left, + dy, mrl_index, bd); + } + highbd_transpose(dstT, 64, dst, stride, 64, 64); +} + +static void highbd_dr_prediction_z3_4x8_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m128i dstvec[4], d[8]; + if (bd < 10) { + highbd_dr_prediction_z1_8xN_internal_idif_avx2(4, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(4, dstvec, left, dy, + mrl_index, bd); + } + + highbd_transpose4x8_8x4_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3], + &d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], + &d[7]); + for (int i = 0; i < 8; i++) { + _mm_storel_epi64((__m128i *)(dst + i * stride), d[i]); + } +} + +static void highbd_dr_prediction_z3_8x4_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m128i dstvec[8], d[4]; + if (bd < 10) { + highbd_dr_prediction_z1_4xN_internal_idif_avx2(8, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(8, dstvec, left, dy, + mrl_index, bd); + } + + highbd_transpose8x8_low_sse2(&dstvec[0], &dstvec[1], &dstvec[2], &dstvec[3], + &dstvec[4], &dstvec[5], &dstvec[6], &dstvec[7], + &d[0], &d[1], &d[2], &d[3]); + _mm_storeu_si128((__m128i *)(dst + 0 * stride), d[0]); + _mm_storeu_si128((__m128i *)(dst + 1 * stride), d[1]); + _mm_storeu_si128((__m128i *)(dst + 2 * stride), d[2]); + _mm_storeu_si128((__m128i *)(dst + 3 * stride), d[3]); +} + +static void highbd_dr_prediction_z3_8x16_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m256i dstvec[8], d[8]; + if (bd < 10) { + highbd_dr_prediction_z1_16xN_internal_idif_avx2(8, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(8, dstvec, left, dy, + mrl_index, bd); + } + highbd_transpose8x16_16x8_avx2(dstvec, d); + for (int i = 0; i < 8; i++) { + _mm_storeu_si128((__m128i *)(dst + i * stride), + _mm256_castsi256_si128(d[i])); + } + for (int i = 8; i < 16; i++) { + _mm_storeu_si128((__m128i *)(dst + i * stride), + _mm256_extracti128_si256(d[i - 8], 1)); + } +} + +static void highbd_dr_prediction_z3_16x8_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m128i dstvec[16], d[16]; + if (bd < 10) { + highbd_dr_prediction_z1_8xN_internal_idif_avx2(16, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(16, dstvec, left, dy, + mrl_index, bd); + } + for (int i = 0; i < 16; i += 8) { + highbd_transpose8x8_sse2(&dstvec[0 + i], &dstvec[1 + i], &dstvec[2 + i], + &dstvec[3 + i], &dstvec[4 + i], &dstvec[5 + i], + &dstvec[6 + i], &dstvec[7 + i], &d[0 + i], + &d[1 + i], &d[2 + i], &d[3 + i], &d[4 + i], + &d[5 + i], &d[6 + i], &d[7 + i]); + } + for (int i = 0; i < 8; i++) { + _mm_storeu_si128((__m128i *)(dst + i * stride), d[i]); + _mm_storeu_si128((__m128i *)(dst + i * stride + 8), d[i + 8]); + } +} + +static void highbd_dr_prediction_z3_4x16_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m256i dstvec[4], d[4], d1; + if (bd < 10) { + highbd_dr_prediction_z1_16xN_internal_idif_avx2(4, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(4, dstvec, left, dy, + mrl_index, bd); + } + highbd_transpose4x16_avx2(dstvec, d); + for (int i = 0; i < 4; i++) { + _mm_storel_epi64((__m128i *)(dst + i * stride), + _mm256_castsi256_si128(d[i])); + d1 = _mm256_bsrli_epi128(d[i], 8); + _mm_storel_epi64((__m128i *)(dst + (i + 4) * stride), + _mm256_castsi256_si128(d1)); + _mm_storel_epi64((__m128i *)(dst + (i + 8) * stride), + _mm256_extracti128_si256(d[i], 1)); + _mm_storel_epi64((__m128i *)(dst + (i + 12) * stride), + _mm256_extracti128_si256(d1, 1)); + } +} + +static void highbd_dr_prediction_z3_16x4_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m128i dstvec[16], d[8]; + if (bd < 10) { + highbd_dr_prediction_z1_4xN_internal_idif_avx2(16, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_4xN_internal_idif_avx2(16, dstvec, left, dy, + mrl_index, bd); + } + highbd_transpose16x4_8x8_sse2(dstvec, d); + + _mm_storeu_si128((__m128i *)(dst + 0 * stride), d[0]); + _mm_storeu_si128((__m128i *)(dst + 0 * stride + 8), d[1]); + _mm_storeu_si128((__m128i *)(dst + 1 * stride), d[2]); + _mm_storeu_si128((__m128i *)(dst + 1 * stride + 8), d[3]); + _mm_storeu_si128((__m128i *)(dst + 2 * stride), d[4]); + _mm_storeu_si128((__m128i *)(dst + 2 * stride + 8), d[5]); + _mm_storeu_si128((__m128i *)(dst + 3 * stride), d[6]); + _mm_storeu_si128((__m128i *)(dst + 3 * stride + 8), d[7]); +} + +static void highbd_dr_prediction_z3_8x32_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m256i dstvec[16], d[16]; + if (bd < 10) { + highbd_dr_prediction_z1_32xN_internal_idif_avx2(8, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(8, dstvec, left, dy, + mrl_index, bd); + } + + for (int i = 0; i < 16; i += 8) { + highbd_transpose8x16_16x8_avx2(dstvec + i, d + i); + } + + for (int i = 0; i < 8; i++) { + _mm_storeu_si128((__m128i *)(dst + i * stride), + _mm256_castsi256_si128(d[i])); + } + for (int i = 0; i < 8; i++) { + _mm_storeu_si128((__m128i *)(dst + (i + 8) * stride), + _mm256_extracti128_si256(d[i], 1)); + } + for (int i = 8; i < 16; i++) { + _mm_storeu_si128((__m128i *)(dst + (i + 8) * stride), + _mm256_castsi256_si128(d[i])); + } + for (int i = 8; i < 16; i++) { + _mm_storeu_si128((__m128i *)(dst + (i + 16) * stride), + _mm256_extracti128_si256(d[i], 1)); + } +} + +static void highbd_dr_prediction_z3_32x8_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, int dy, + int bd, int mrl_index) { + __m128i dstvec[32], d[32]; + if (bd < 10) { + highbd_dr_prediction_z1_8xN_internal_idif_avx2(32, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_8xN_internal_idif_avx2(32, dstvec, left, dy, + mrl_index, bd); + } + + for (int i = 0; i < 32; i += 8) { + highbd_transpose8x8_sse2(&dstvec[0 + i], &dstvec[1 + i], &dstvec[2 + i], + &dstvec[3 + i], &dstvec[4 + i], &dstvec[5 + i], + &dstvec[6 + i], &dstvec[7 + i], &d[0 + i], + &d[1 + i], &d[2 + i], &d[3 + i], &d[4 + i], + &d[5 + i], &d[6 + i], &d[7 + i]); + } + for (int i = 0; i < 8; i++) { + _mm_storeu_si128((__m128i *)(dst + i * stride), d[i]); + _mm_storeu_si128((__m128i *)(dst + i * stride + 8), d[i + 8]); + _mm_storeu_si128((__m128i *)(dst + i * stride + 16), d[i + 16]); + _mm_storeu_si128((__m128i *)(dst + i * stride + 24), d[i + 24]); + } +} + +static void highbd_dr_prediction_z3_16x32_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, + int dy, int bd, + int mrl_index) { + __m256i dstvec[32], d[32]; + if (bd < 10) { + highbd_dr_prediction_z1_32xN_internal_idif_avx2(16, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_32xN_internal_idif_avx2(16, dstvec, left, dy, + mrl_index, bd); + } + for (int i = 0; i < 32; i += 8) { + highbd_transpose8x16_16x8_avx2(dstvec + i, d + i); + } + // store + for (int j = 0; j < 32; j += 16) { + for (int i = 0; i < 8; i++) { + _mm_storeu_si128((__m128i *)(dst + (i + j) * stride), + _mm256_castsi256_si128(d[(i + j)])); + } + for (int i = 0; i < 8; i++) { + _mm_storeu_si128((__m128i *)(dst + (i + j) * stride + 8), + _mm256_castsi256_si128(d[(i + j) + 8])); + } + for (int i = 8; i < 16; i++) { + _mm256_storeu_si256( + (__m256i *)(dst + (i + j) * stride), + _mm256_inserti128_si256( + d[(i + j)], _mm256_extracti128_si256(d[(i + j) - 8], 1), 0)); + } + } +} + +static void highbd_dr_prediction_z3_32x16_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, + int dy, int bd, + int mrl_index) { + __m256i dstvec[32], d[16]; + if (bd < 10) { + highbd_dr_prediction_z1_16xN_internal_idif_avx2(32, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(32, dstvec, left, dy, + mrl_index, bd); + } + for (int i = 0; i < 32; i += 16) { + highbd_transpose16x16_avx2((dstvec + i), d); + for (int j = 0; j < 16; j++) { + _mm256_storeu_si256((__m256i *)(dst + j * stride + i), d[j]); + } + } +} + +static void highbd_dr_prediction_z3_32x64_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, + int dy, int bd, + int mrl_index) { + uint16_t dstT[64 * 32]; + if (bd < 10) { + highbd_dr_prediction_z1_64xN_internal_idif_avx2(32, dstT, 64, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2(32, dstT, 64, left, + dy, mrl_index, bd); + } + highbd_transpose(dstT, 64, dst, stride, 32, 64); +} + +static void highbd_dr_prediction_z3_64x32_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, + int dy, int bd, + int mrl_index) { + DECLARE_ALIGNED(16, uint16_t, dstT[32 * 64]); + highbd_dr_prediction_z1_32xN_idif_avx2(64, dstT, 32, left, dy, bd, mrl_index); + highbd_transpose(dstT, 32, dst, stride, 64, 32); + return; +} + +static void highbd_dr_prediction_z3_16x64_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, + int dy, int bd, + int mrl_index) { + DECLARE_ALIGNED(16, uint16_t, dstT[64 * 16]); + if (bd < 10) { + highbd_dr_prediction_z1_64xN_internal_idif_avx2(16, dstT, 64, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_64xN_internal_idif_avx2(16, dstT, 64, left, + dy, mrl_index, bd); + } + highbd_transpose(dstT, 64, dst, stride, 16, 64); +} + +static void highbd_dr_prediction_z3_64x16_idif_avx2(uint16_t *dst, + ptrdiff_t stride, + const uint16_t *left, + int dy, int bd, + int mrl_index) { + __m256i dstvec[64], d[16]; + if (bd < 10) { + highbd_dr_prediction_z1_16xN_internal_idif_avx2(64, dstvec, left, dy, + mrl_index, bd); + } else { + highbd_dr_prediction_32bit_z1_16xN_internal_idif_avx2(64, dstvec, left, dy, + mrl_index, bd); + } + for (int i = 0; i < 64; i += 16) { + highbd_transpose16x16_avx2((dstvec + i), d); + for (int j = 0; j < 16; j++) { + _mm256_storeu_si256((__m256i *)(dst + j * stride + i), d[j]); + } + } +} + +void av1_highbd_dr_prediction_z3_idif_avx2(uint16_t *dst, ptrdiff_t stride, + int bw, int bh, + const uint16_t *above, + const uint16_t *left, int dx, int dy, + int bd, int mrl_index) { + (void)above; + (void)dx; + + assert(dx == 1); + assert(dy > 0); + + if (bw == bh) { + switch (bw) { + case 4: + highbd_dr_prediction_z3_4x4_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 8: + highbd_dr_prediction_z3_8x8_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 16: + highbd_dr_prediction_z3_16x16_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 32: + highbd_dr_prediction_z3_32x32_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 64: + highbd_dr_prediction_z3_64x64_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + } + } else { + if (bw < bh) { + if (bw + bw == bh) { + switch (bw) { + case 4: + highbd_dr_prediction_z3_4x8_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 8: + highbd_dr_prediction_z3_8x16_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 16: + highbd_dr_prediction_z3_16x32_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 32: + highbd_dr_prediction_z3_32x64_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + } + } else { + switch (bw) { + case 4: + highbd_dr_prediction_z3_4x16_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 8: + highbd_dr_prediction_z3_8x32_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 16: + highbd_dr_prediction_z3_16x64_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + } + } + } else { + if (bh + bh == bw) { + switch (bh) { + case 4: + highbd_dr_prediction_z3_8x4_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 8: + highbd_dr_prediction_z3_16x8_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 16: + highbd_dr_prediction_z3_32x16_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 32: + highbd_dr_prediction_z3_64x32_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + } + } else { + switch (bh) { + case 4: + highbd_dr_prediction_z3_16x4_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 8: + highbd_dr_prediction_z3_32x8_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + case 16: + highbd_dr_prediction_z3_64x16_idif_avx2(dst, stride, left, dy, bd, + mrl_index); + break; + } + } + } + } + return; +} +#endif // CONFIG_IDIF
diff --git a/aom_dsp/x86/sad_highbd_avx2.c b/aom_dsp/x86/sad_highbd_avx2.c index 3fc91d8..bae6691 100644 --- a/aom_dsp/x86/sad_highbd_avx2.c +++ b/aom_dsp/x86/sad_highbd_avx2.c
@@ -20,7 +20,8 @@ #include "aom_ports/mem.h" // SAD -static INLINE unsigned int get_sad_from_mm256_epi32(const __m256i *v) { +static AOM_FORCE_INLINE unsigned int get_sad_from_mm256_epi32( + const __m256i *v) { // input 8 32-bit summation __m128i lo128, hi128; __m256i u = _mm256_srli_si256(*v, 8); @@ -38,8 +39,8 @@ return (unsigned int)_mm_cvtsi128_si32(lo128); } -static INLINE void highbd_sad16x4_core_avx2(__m256i *s, __m256i *r, - __m256i *sad_acc) { +static AOM_FORCE_INLINE void highbd_sad16x4_core_avx2(__m256i *s, __m256i *r, + __m256i *sad_acc) { const __m256i zero = _mm256_setzero_si256(); int i; for (i = 0; i < 4; i++) { @@ -59,9 +60,10 @@ } // If sec_ptr = 0, calculate regular SAD. Otherwise, calculate average SAD. -static INLINE void sad16x4(const uint16_t *src_ptr, int src_stride, - const uint16_t *ref_ptr, int ref_stride, - const uint16_t *sec_ptr, __m256i *sad_acc) { +static AOM_FORCE_INLINE void sad16x4(const uint16_t *src_ptr, int src_stride, + const uint16_t *ref_ptr, int ref_stride, + const uint16_t *sec_ptr, + __m256i *sad_acc) { __m256i s[4], r[4]; s[0] = _mm256_loadu_si256((const __m256i *)src_ptr); s[1] = _mm256_loadu_si256((const __m256i *)(src_ptr + src_stride)); @@ -98,9 +100,10 @@ return (unsigned int)get_sad_from_mm256_epi32(&sad); } -static void sad32x4(const uint16_t *src_ptr, int src_stride, - const uint16_t *ref_ptr, int ref_stride, - const uint16_t *sec_ptr, __m256i *sad_acc) { +static AOM_FORCE_INLINE void sad32x4(const uint16_t *src_ptr, int src_stride, + const uint16_t *ref_ptr, int ref_stride, + const uint16_t *sec_ptr, + __m256i *sad_acc) { __m256i s[4], r[4]; int row_sections = 0; @@ -149,9 +152,10 @@ return get_sad_from_mm256_epi32(&sad); } -static void sad64x2(const uint16_t *src_ptr, int src_stride, - const uint16_t *ref_ptr, int ref_stride, - const uint16_t *sec_ptr, __m256i *sad_acc) { +static AOM_FORCE_INLINE void sad64x2(const uint16_t *src_ptr, int src_stride, + const uint16_t *ref_ptr, int ref_stride, + const uint16_t *sec_ptr, + __m256i *sad_acc) { __m256i s[4], r[4]; int i; for (i = 0; i < 2; i++) { @@ -589,8 +593,8 @@ // SAD 4D // Combine 4 __m256i input vectors v to uint32_t result[4] -static INLINE void get_4d_sad_from_mm256_epi32(const __m256i *v, - uint32_t *res) { +static AOM_FORCE_INLINE void get_4d_sad_from_mm256_epi32(const __m256i *v, + uint32_t *res) { __m256i u0, u1, u2, u3; const __m256i mask = yy_set1_64_from_32i(UINT32_MAX); __m128i sad; @@ -628,7 +632,7 @@ _mm_storeu_si128((__m128i *)res, sad); } -static void init_sad(__m256i *s) { +static AOM_FORCE_INLINE void init_sad(__m256i *s) { s[0] = _mm256_setzero_si256(); s[1] = _mm256_setzero_si256(); s[2] = _mm256_setzero_si256();
diff --git a/apps/aomdec.c b/apps/aomdec.c index b9f3981..f8de253 100644 --- a/apps/aomdec.c +++ b/apps/aomdec.c
@@ -889,6 +889,11 @@ got_data = 0; while ((img = aom_codec_get_frame(&decoder, &iter))) { ++frame_out; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (frame_in < frame_out) { // No OBUs for show_existing_frame. + frame_in = frame_out; + } +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT got_data = 1; if (AOM_CODEC_CONTROL_TYPECHECKED(&decoder, AOMD_GET_FRAME_CORRUPTED,
diff --git a/apps/aomenc.c b/apps/aomenc.c index a3f88e8..88a8da3 100644 --- a/apps/aomenc.c +++ b/apps/aomenc.c
@@ -221,6 +221,9 @@ #endif AV1E_SET_SUBGOP_CONFIG_STR, AV1E_SET_SUBGOP_CONFIG_PATH, +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT 0 }; const arg_def_t *main_args[] = { &g_av1_codec_arg_defs.help, @@ -443,16 +446,28 @@ #if CONFIG_BAWP &g_av1_codec_arg_defs.enable_bawp, #endif // CONFIG_BAWP +#if CONFIG_CWP + &g_av1_codec_arg_defs.enable_cwp, +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + &g_av1_codec_arg_defs.enable_imp_msk_bld, +#endif // CONFIG_D071_IMP_MSK_BLD &g_av1_codec_arg_defs.enable_fsc, #if CONFIG_ORIP &g_av1_codec_arg_defs.enable_orip, #endif +#if CONFIG_IDIF + &g_av1_codec_arg_defs.enable_idif, +#endif // CONFIG_IDIF &g_av1_codec_arg_defs.enable_ist, #if CONFIG_CROSS_CHROMA_TX &g_av1_codec_arg_defs.enable_cctx, #endif // CONFIG_CROSS_CHROMA_TX &g_av1_codec_arg_defs.enable_ibp, &g_av1_codec_arg_defs.explicit_ref_frame_map, +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + &g_av1_codec_arg_defs.enable_frame_output_order, +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT &g_av1_codec_arg_defs.max_drl_refmvs, #if CONFIG_REF_MV_BANK &g_av1_codec_arg_defs.enable_refmvbank, @@ -481,6 +496,9 @@ #if CONFIG_JOINT_MVD &g_av1_codec_arg_defs.enable_joint_mvd, #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + &g_av1_codec_arg_defs.enable_refinemv, +#endif // CONFIG_REFINEMV #if CONFIG_PAR_HIDING &g_av1_codec_arg_defs.enable_parity_hiding, #endif // CONFIG_PAR_HIDING @@ -632,11 +650,7 @@ #if CONFIG_EXT_RECUR_PARTITIONS config->erp_pruning_level = 5; config->use_ml_erp_pruning = 0; -#if CONFIG_H_PARTITION config->enable_ext_partitions = 1; -#else - config->enable_ext_partitions = 0; -#endif // CONFIG_H_PARTITION #endif // CONFIG_EXT_RECUR_PARTITIONS config->enable_sdp = 1; config->enable_mrls = 1; @@ -646,10 +660,19 @@ #if CONFIG_BAWP config->enable_bawp = 1; #endif // CONFIG_BAWP +#if CONFIG_CWP + config->enable_cwp = 1; +#endif // CONFIG_BAWP +#if CONFIG_D071_IMP_MSK_BLD + config->enable_imp_msk_bld = 1; +#endif // CONFIG_D071_IMP_MSK_BLD config->enable_fsc = 1; #if CONFIG_ORIP config->enable_orip = 1; #endif +#if CONFIG_IDIF + config->enable_idif = 1; +#endif // CONFIG_IDIF config->enable_ist = 1; #if CONFIG_CROSS_CHROMA_TX config->enable_cctx = 1; @@ -667,6 +690,9 @@ #if CONFIG_JOINT_MVD config->enable_joint_mvd = 1; #endif +#if CONFIG_REFINEMV + config->enable_refinemv = 1; +#endif // CONFIG_REFINEMV config->enable_flip_idtx = 1; config->enable_deblocking = 1; config->enable_cdef = 1; @@ -710,6 +736,9 @@ config->enable_opfl_refine = 1; #endif // CONFIG_OPTFLOW_REFINEMENT config->explicit_ref_frame_map = 0; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + config->enable_frame_output_order = 1; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT config->enable_intra_edge_filter = 1; config->enable_tx64 = 1; config->enable_smooth_interintra = 1; @@ -1513,6 +1542,9 @@ #if CONFIG_ORIP ", ORIP(%d)" #endif // CONFIG_CONFIG_ORIP +#if CONFIG_IDIF + ", IDIF(%d)" +#endif // CONFIG_IDIF ", IBP(%d)" "\n", encoder_cfg->enable_intra_edge_filter, @@ -1522,8 +1554,18 @@ , encoder_cfg->enable_orip #endif // CONFIG_ORIP +#if CONFIG_IDIF + , + encoder_cfg->enable_idif +#endif // CONFIG_IDIF , encoder_cfg->enable_ibp); +#if CONFIG_ADAPTIVE_DS_FILTER + fprintf( + stdout, + " : Adaptive Down sample filter: (%d)\n", + encoder_cfg->enable_cfl_ds_filter); +#endif // CONFIG_ADAPTIVE_DS_FILTER fprintf(stdout, "Tool setting (Inter) : InterIntra (%d), OBMC (%d), " @@ -1549,6 +1591,15 @@ fprintf(stdout, " : BAWP (%d)\n", encoder_cfg->enable_bawp); #endif // CONFIG_BAWP +#if CONFIG_CWP + fprintf(stdout, " : CWP (%d)\n", + encoder_cfg->enable_cwp); +#endif // CONFIG_CWP + +#if CONFIG_D071_IMP_MSK_BLD + fprintf(stdout, " : ImpMskBld (%d)\n", + encoder_cfg->enable_imp_msk_bld); +#endif // CONFIG_D071_IMP_MSK_BLD fprintf(stdout, " : GlobalMotion (%d), " @@ -1570,16 +1621,14 @@ " : Flexible MV precisions: (%d)\n", encoder_cfg->enable_flex_mvres); #endif // CONFIG_FLEX_MVRES -#if CONFIG_ADAPTIVE_DS_FILTER - fprintf( - stdout, - " : Adaptive Down sample filter: (%d)\n", - encoder_cfg->enable_cfl_ds_filter); -#endif // CONFIG_ADAPTIVE_DS_FILTER #if CONFIG_JOINT_MVD fprintf(stdout, " : Joint MVD coding: (%d)\n", encoder_cfg->enable_joint_mvd); #endif +#if CONFIG_REFINEMV + fprintf(stdout, " : RefineMV mode: (%d)\n", + encoder_cfg->enable_refinemv); +#endif // CONFIG_REFINEMV fprintf(stdout, " : InterInterWedge (%d), " "InterIntraWedge (%d), RefFrameMv (%d)\n", @@ -1926,7 +1975,11 @@ switch (pkt->kind) { case AOM_CODEC_CX_FRAME_PKT: +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + stream->frames_out += pkt->data.frame.frame_count; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT ++stream->frames_out; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT update_rate_histogram(stream->rate_hist, cfg, pkt); #if CONFIG_WEBM_IO if (stream->config.write_webm) { @@ -2358,7 +2411,11 @@ } // Keep track of the total number of frames passed to the encoder. +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + unsigned int seen_frames = 0; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT int seen_frames = 0; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT // Does the encoder have queued data that needs retrieval? int got_data = 0; // Is there a frame available for processing? @@ -2425,6 +2482,11 @@ } } fflush(stdout); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + FOREACH_STREAM(stream, streams) { + if (stream->frames_out < seen_frames) got_data = 1; + } +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } if (stream_cnt > 1) fprintf(stderr, "\n");
diff --git a/av1/arg_defs.c b/av1/arg_defs.c index ca367cb..b34a95b 100644 --- a/av1/arg_defs.c +++ b/av1/arg_defs.c
@@ -410,6 +410,16 @@ "Enable block adaptive weighted prediction (BAWP)" "(0: false, 1: true (default))"), #endif // CONFIG_BAWP +#if CONFIG_CWP + .enable_cwp = ARG_DEF(NULL, "enable-cwp", 1, + "Enable compound weighted prediction (CWP)" + "(0: false, 1: true (default))"), +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + .enable_imp_msk_bld = ARG_DEF(NULL, "enable-imp-msk-bld", 1, + "Enable implicit maksed blending" + "(0:false), 1:true (default)"), +#endif // CONFIG_D071_IMP_MSK_BLD .enable_fsc = ARG_DEF(NULL, "enable-fsc", 1, "Enable forward skip coding" "(0: false, 1: true (default))"), @@ -418,6 +428,11 @@ "Enable Offset Based refinement of intra prediction" "(0: false, 1: true (default))"), #endif +#if CONFIG_IDIF + .enable_idif = ARG_DEF(NULL, "enable-idif", 1, + "Enable Intra Directional Interpolation Filter" + "(0: false, 1: true (default))"), +#endif // CONFIG_IDIF .enable_ist = ARG_DEF(NULL, "enable-ist", 1, "Enable intra secondary transform" "(0: false, 1: true (default))"), @@ -452,6 +467,12 @@ "Enable joint MVD coding" "(0: false, 1: true (default))"), #endif // CONFIG_JOINT_MVD + +#if CONFIG_REFINEMV + .enable_refinemv = ARG_DEF(NULL, "enable-refinemv", 1, + "Enable RefineMV mode" + "(0: false, 1: true (default))"), +#endif // CONFIG_REFINEMV .min_partition_size = ARG_DEF(NULL, "min-partition-size", 1, "Set min partition size " @@ -511,7 +532,7 @@ "(0: false, 1: true (default))"), .enable_global_motion = ARG_DEF(NULL, "enable-global-motion", 1, "Enable global motion " - "(0: false, 1: true (default))"), + "(0: false (default), 1: true)"), .enable_warped_motion = ARG_DEF(NULL, "enable-warped-motion", 1, "Enable local warped motion " "(0: false, 1: true (default))"), @@ -679,6 +700,12 @@ ARG_DEF(NULL, "explicit-ref-frame-map", 1, "Explicitly signal the reference frame mapping (0: off " "(default), 1: on)"), +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + .enable_frame_output_order = + ARG_DEF(NULL, "enable-frame-output-order", 1, + "Enable frame output order derivation based on order hint" + "(0: off, 1: on (default))"), +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT .target_seq_level_idx = ARG_DEF( NULL, "target-seq-level-idx", 1, "Target sequence level index. "
diff --git a/av1/arg_defs.h b/av1/arg_defs.h index ecc6d6b..0d6389a 100644 --- a/av1/arg_defs.h +++ b/av1/arg_defs.h
@@ -163,10 +163,19 @@ #if CONFIG_BAWP arg_def_t enable_bawp; #endif // CONFIG_BAWP +#if CONFIG_CWP + arg_def_t enable_cwp; +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + arg_def_t enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD arg_def_t enable_fsc; #if CONFIG_ORIP arg_def_t enable_orip; #endif +#if CONFIG_IDIF + arg_def_t enable_idif; +#endif // CONFIG_IDIF arg_def_t enable_ist; #if CONFIG_CROSS_CHROMA_TX arg_def_t enable_cctx; @@ -184,6 +193,9 @@ #if CONFIG_JOINT_MVD arg_def_t enable_joint_mvd; #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + arg_def_t enable_refinemv; +#endif // CONFIG_REFINEMV arg_def_t min_partition_size; arg_def_t max_partition_size; arg_def_t enable_chroma_deltaq; @@ -261,6 +273,9 @@ arg_def_t max_reference_frames; arg_def_t reduced_reference_set; arg_def_t explicit_ref_frame_map; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + arg_def_t enable_frame_output_order; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT arg_def_t target_seq_level_idx; arg_def_t set_min_cr; arg_def_t input_color_primaries;
diff --git a/av1/av1.cmake b/av1/av1.cmake index 87796e5..409084b 100644 --- a/av1/av1.cmake +++ b/av1/av1.cmake
@@ -283,7 +283,11 @@ "${AOM_ROOT}/third_party/vector/vector.c" "${AOM_ROOT}/third_party/vector/vector.h" "${AOM_ROOT}/av1/encoder/dwt.c" - "${AOM_ROOT}/av1/encoder/dwt.h") + "${AOM_ROOT}/av1/encoder/dwt.h" + "${AOM_ROOT}/common/md5_utils.c" + "${AOM_ROOT}/common/md5_utils.h" + "${AOM_ROOT}/common/rawenc.c" + "${AOM_ROOT}/common/rawenc.h") if(CONFIG_TUNE_VMAF) list(APPEND AOM_AV1_ENCODER_SOURCES "${AOM_ROOT}/av1/encoder/tune_vmaf.c"
diff --git a/av1/av1_cx_iface.c b/av1/av1_cx_iface.c index e9a969f..f949c19 100644 --- a/av1/av1_cx_iface.c +++ b/av1/av1_cx_iface.c
@@ -137,10 +137,19 @@ #if CONFIG_BAWP int enable_bawp; // enable block adaptive weighted prediction #endif // CONFIG_BAWP - int enable_fsc; // enable forward skip coding +#if CONFIG_CWP + int enable_cwp; // enable compound weighted prediction +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + int enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD + int enable_fsc; // enable forward skip coding #if CONFIG_ORIP int enable_orip; // enable ORIP #endif // CONFIG_ORIP +#if CONFIG_IDIF + int enable_idif; // enable IDIF +#endif // CONFIG_IDIF int enable_ist; // enable intra secondary transform #if CONFIG_CROSS_CHROMA_TX int enable_cctx; // enable cross-chroma component transform @@ -159,8 +168,11 @@ #endif // CONFIG_ADAPTIVE_DS_FILTER #if CONFIG_JOINT_MVD - int enable_joint_mvd; // enable joint MVD coding -#endif // CONFIG_ADAPTIVE_MVD + int enable_joint_mvd; // enable joint MVD coding +#endif // CONFIG_ADAPTIVE_MVD +#if CONFIG_REFINEMV + int enable_refinemv; // enable refineMV mode +#endif // CONFIG_REFINEMV int min_partition_size; // min partition size [4,8,16,32,64,128] int max_partition_size; // max partition size [4,8,16,32,64,128] int enable_intra_edge_filter; // enable intra-edge filter for sequence @@ -169,18 +181,22 @@ int enable_flip_idtx; // enable flip and identity transform types int max_reference_frames; // maximum number of references per frame int enable_reduced_reference_set; // enable reduced set of references - int explicit_ref_frame_map; // explicitly signal reference frame mapping - int enable_ref_frame_mvs; // sequence level - int allow_ref_frame_mvs; // frame level - int enable_masked_comp; // enable masked compound for sequence - int enable_onesided_comp; // enable one sided compound for sequence - int enable_interintra_comp; // enable interintra compound for sequence - int enable_smooth_interintra; // enable smooth interintra mode usage - int enable_diff_wtd_comp; // enable diff-wtd compound usage - int enable_interinter_wedge; // enable interinter-wedge compound usage - int enable_interintra_wedge; // enable interintra-wedge compound usage - int enable_global_motion; // enable global motion usage for sequence - int enable_warped_motion; // enable local warped motion for sequence + int explicit_ref_frame_map; // explicitly signal reference frame mapping +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + int enable_frame_output_order; // enable frame output order derivation based + // on order hint value +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + int enable_ref_frame_mvs; // sequence level + int allow_ref_frame_mvs; // frame level + int enable_masked_comp; // enable masked compound for sequence + int enable_onesided_comp; // enable one sided compound for sequence + int enable_interintra_comp; // enable interintra compound for sequence + int enable_smooth_interintra; // enable smooth interintra mode usage + int enable_diff_wtd_comp; // enable diff-wtd compound usage + int enable_interinter_wedge; // enable interinter-wedge compound usage + int enable_interintra_wedge; // enable interintra-wedge compound usage + int enable_global_motion; // enable global motion usage for sequence + int enable_warped_motion; // enable local warped motion for sequence #if CONFIG_EXTENDED_WARP_PREDICTION int enable_warped_causal; // enable spatial warp prediction for sequence int enable_warp_delta; // enable explicit warp models for sequence @@ -448,12 +464,8 @@ 1, // disable ML based partition speed up features 5, // aggressiveness for erp pruning 0, // use ml model for erp pruning -#if CONFIG_H_PARTITION 1, // enable extended partitions #else - 0, // enable extended partitions -#endif -#else 0, // disable ML based partition speed up features #endif 1, // enable rectangular partitions @@ -468,10 +480,19 @@ #if CONFIG_BAWP 1, // enable block adaptive weighted prediction (BAWP) #endif // CONFIG_BAWP +#if CONFIG_CWP + 1, // enable compound weighted prediction (CWP) +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + 1, // eanble implicit maksed blending +#endif // CONFIG_D071_IMP_MSK_BLD 1, // enable forward skip coding #if CONFIG_ORIP 1, // enable ORIP #endif // CONFIG_ORIP +#if CONFIG_IDIF + 1, // enable IDIF +#endif // CONFIG_IDIF 1, // enable intra secondary transform #if CONFIG_CROSS_CHROMA_TX 1, // enable cross-chroma component transform @@ -489,6 +510,9 @@ #if CONFIG_JOINT_MVD 1, // enable joint mvd coding #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + 1, // enable refineMV mode +#endif // CONFIG_REFINEMV 4, // min_partition_size #if CONFIG_BLOCK_256 256, // max_partition_size @@ -503,17 +527,20 @@ 7, // max_reference_frames 0, // enable_reduced_reference_set 0, // explicit_ref_frame_map - 1, // enable_ref_frame_mvs sequence level - 1, // allow ref_frame_mvs frame level - 1, // enable masked compound at sequence level - 1, // enable one sided compound at sequence level - 1, // enable interintra compound at sequence level - 1, // enable smooth interintra mode - 1, // enable difference-weighted compound - 1, // enable interinter wedge compound - 1, // enable interintra wedge compound - 1, // enable_global_motion usage - 1, // enable_warped_motion at sequence level +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + 1, // enable frame output order derivation based on order hint value +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + 1, // enable_ref_frame_mvs sequence level + 1, // allow ref_frame_mvs frame level + 1, // enable masked compound at sequence level + 1, // enable one sided compound at sequence level + 1, // enable interintra compound at sequence level + 1, // enable smooth interintra mode + 1, // enable difference-weighted compound + 1, // enable interinter wedge compound + 1, // enable interintra wedge compound + 0, // enable_global_motion usage + 1, // enable_warped_motion at sequence level #if CONFIG_EXTENDED_WARP_PREDICTION 1, // enable_warped_causal at sequence level 1, // enable_warp_delta at sequence level @@ -831,6 +858,9 @@ RANGE_CHECK(extra_cfg, max_reference_frames, 3, 7); RANGE_CHECK(extra_cfg, enable_reduced_reference_set, 0, 1); RANGE_CHECK(extra_cfg, explicit_ref_frame_map, 0, 1); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + RANGE_CHECK(extra_cfg, enable_frame_output_order, 0, 1); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT RANGE_CHECK_HI(extra_cfg, chroma_subsampling_x, 1); RANGE_CHECK_HI(extra_cfg, chroma_subsampling_y, 1); @@ -974,10 +1004,19 @@ #if CONFIG_BAWP cfg->enable_bawp = extra_cfg->enable_bawp; #endif // CONFIG_BAWP +#if CONFIG_CWP + cfg->enable_cwp = extra_cfg->enable_cwp; +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + cfg->enable_imp_msk_bld = extra_cfg->enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD cfg->enable_fsc = extra_cfg->enable_fsc; #if CONFIG_ORIP cfg->enable_orip = extra_cfg->enable_orip; #endif +#if CONFIG_IDIF + cfg->enable_idif = extra_cfg->enable_idif; +#endif // CONFIG_IDIF cfg->enable_ist = extra_cfg->enable_ist; #if CONFIG_CROSS_CHROMA_TX cfg->enable_cctx = extra_cfg->enable_cctx; @@ -997,6 +1036,9 @@ #if CONFIG_JOINT_MVD cfg->enable_joint_mvd = extra_cfg->enable_joint_mvd; #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + cfg->enable_refinemv = extra_cfg->enable_refinemv; +#endif // CONFIG_REFINEMV cfg->max_partition_size = extra_cfg->max_partition_size; cfg->min_partition_size = extra_cfg->min_partition_size; cfg->enable_intra_edge_filter = extra_cfg->enable_intra_edge_filter; @@ -1029,6 +1071,9 @@ cfg->enable_onesided_comp = extra_cfg->enable_onesided_comp; cfg->enable_reduced_reference_set = extra_cfg->enable_reduced_reference_set; cfg->explicit_ref_frame_map = extra_cfg->explicit_ref_frame_map; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + cfg->enable_frame_output_order = extra_cfg->enable_frame_output_order; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT cfg->reduced_tx_type_set = extra_cfg->reduced_tx_type_set; cfg->max_drl_refmvs = extra_cfg->max_drl_refmvs; #if CONFIG_REF_MV_BANK @@ -1088,10 +1133,19 @@ #if CONFIG_BAWP extra_cfg->enable_bawp = cfg->enable_bawp; #endif // CONFIG_BAWP +#if CONFIG_CWP + extra_cfg->enable_cwp = cfg->enable_cwp; +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + extra_cfg->enable_imp_msk_bld = cfg->enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD extra_cfg->enable_fsc = cfg->enable_fsc; #if CONFIG_ORIP extra_cfg->enable_orip = cfg->enable_orip; #endif +#if CONFIG_IDIF + extra_cfg->enable_idif = cfg->enable_idif; +#endif // CONFIG_IDIF extra_cfg->enable_ist = cfg->enable_ist; #if CONFIG_CROSS_CHROMA_TX extra_cfg->enable_cctx = cfg->enable_cctx; @@ -1111,6 +1165,10 @@ #if CONFIG_JOINT_MVD extra_cfg->enable_joint_mvd = cfg->enable_joint_mvd; #endif // CONFIG_JOINT_MVD + +#if CONFIG_REFINEMV + extra_cfg->enable_refinemv = cfg->enable_refinemv; +#endif // CONFIG_REFINEMV extra_cfg->max_partition_size = cfg->max_partition_size; extra_cfg->min_partition_size = cfg->min_partition_size; extra_cfg->enable_intra_edge_filter = cfg->enable_intra_edge_filter; @@ -1142,6 +1200,9 @@ extra_cfg->enable_onesided_comp = cfg->enable_onesided_comp; extra_cfg->enable_reduced_reference_set = cfg->enable_reduced_reference_set; extra_cfg->explicit_ref_frame_map = cfg->explicit_ref_frame_map; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + extra_cfg->enable_frame_output_order = cfg->enable_frame_output_order; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT extra_cfg->reduced_tx_type_set = cfg->reduced_tx_type_set; extra_cfg->max_drl_refmvs = cfg->max_drl_refmvs; #if CONFIG_REF_MV_BANK @@ -1390,6 +1451,9 @@ #if CONFIG_JOINT_MVD tool_cfg->enable_joint_mvd = extra_cfg->enable_joint_mvd; #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + tool_cfg->enable_refinemv = extra_cfg->enable_refinemv; +#endif // CONFIG_REFINEMV #if CONFIG_TIP tool_cfg->enable_tip = extra_cfg->enable_tip; if (tool_cfg->enable_tip) { @@ -1405,6 +1469,12 @@ #if CONFIG_BAWP tool_cfg->enable_bawp = extra_cfg->enable_bawp; #endif // CONFIG_BAWP +#if CONFIG_CWP + tool_cfg->enable_cwp = extra_cfg->enable_cwp; +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + tool_cfg->enable_imp_msk_bld = extra_cfg->enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD tool_cfg->force_video_mode = extra_cfg->force_video_mode; tool_cfg->enable_palette = extra_cfg->enable_palette; // FIXME(debargha): Should this be: @@ -1601,6 +1671,16 @@ extra_cfg->enable_reduced_reference_set; oxcf->ref_frm_cfg.enable_onesided_comp = extra_cfg->enable_onesided_comp; oxcf->ref_frm_cfg.explicit_ref_frame_map = extra_cfg->explicit_ref_frame_map; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // Disable the implicit derivation of frame output order + // when order_hint is not available, S-frame is used or error resilience mode + // is used. + oxcf->ref_frm_cfg.enable_frame_output_order = + (!tool_cfg->enable_order_hint || kf_cfg->enable_sframe || + tool_cfg->error_resilient_mode) + ? 0 + : extra_cfg->enable_frame_output_order; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT oxcf->row_mt = extra_cfg->row_mt; @@ -1662,6 +1742,9 @@ #if CONFIG_ORIP intra_mode_cfg->enable_orip = extra_cfg->enable_orip; #endif +#if CONFIG_IDIF + intra_mode_cfg->enable_idif = extra_cfg->enable_idif; +#endif // CONFIG_IDIF intra_mode_cfg->enable_ibp = extra_cfg->enable_ibp; // Set transform size/type configuration. @@ -1673,7 +1756,7 @@ txfm_cfg->use_intra_default_tx_only = extra_cfg->use_intra_default_tx_only; txfm_cfg->disable_ml_transform_speed_features = extra_cfg->disable_ml_transform_speed_features; - txfm_cfg->enable_ist = extra_cfg->enable_ist; + txfm_cfg->enable_ist = extra_cfg->enable_ist && !extra_cfg->lossless; #if CONFIG_CROSS_CHROMA_TX txfm_cfg->enable_cctx = tool_cfg->enable_monochrome ? 0 : extra_cfg->enable_cctx; @@ -2666,6 +2749,15 @@ return AOM_CODEC_OK; } +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT +static aom_codec_err_t ctrl_set_frame_output_order(aom_codec_alg_priv_t *ctx, + va_list args) { + struct av1_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.enable_frame_output_order = + CAST(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, args); + return update_extra_cfg(ctx, &extra_cfg); +} +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT static aom_codec_err_t create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer, STATS_BUFFER_CTX *stats_buf_context, int num_lap_buffers) { @@ -3205,7 +3297,20 @@ index_size = MAG_SIZE * (ctx->pending_frame_count - 1) + 2; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (cpi->oxcf.ref_frm_cfg.enable_frame_output_order) { + if (cpi->common.current_frame.frame_type == KEY_FRAME || + !cpi->common.show_existing_frame) { + is_frame_visible = cpi->common.show_frame; + } else { + is_frame_visible = 0; + } + } else { + is_frame_visible = cpi->common.show_frame; + } +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT is_frame_visible = cpi->common.show_frame; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT has_no_show_keyframe |= (!is_frame_visible && @@ -3215,6 +3320,13 @@ report_stats(cpi, frame_size, cx_time); } } +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (cpi->oxcf.ref_frm_cfg.enable_frame_output_order && + cpi->common.show_frame && cpi->common.show_existing_frame) { + cpi->frames_left = AOMMAX(0, cpi->frames_left - 1); + break; + } +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } if (is_frame_visible) { // Add the frame packet to the list of returned packets. @@ -3244,6 +3356,9 @@ pkt.data.frame.sz = ctx->pending_cx_data_sz; pkt.data.frame.partition_id = -1; pkt.data.frame.vis_frame_size = frame_size; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + pkt.data.frame.frame_count = ctx->pending_frame_count; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT pkt.data.frame.pts = ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) + @@ -3818,6 +3933,16 @@ err_string)) { extra_cfg.enable_bawp = arg_parse_int_helper(&arg, err_string); #endif // CONFIG_BAWP +#if CONFIG_CWP + } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_cwp, argv, + err_string)) { + extra_cfg.enable_cwp = arg_parse_int_helper(&arg, err_string); +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_imp_msk_bld, + argv, err_string)) { + extra_cfg.enable_imp_msk_bld = arg_parse_uint_helper(&arg, err_string); +#endif // CONFIG_D071_IMP_MSK_BLD } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_fsc, argv, err_string)) { extra_cfg.enable_fsc = arg_parse_int_helper(&arg, err_string); @@ -3826,6 +3951,11 @@ err_string)) { extra_cfg.enable_orip = arg_parse_int_helper(&arg, err_string); #endif +#if CONFIG_IDIF + } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_idif, argv, + err_string)) { + extra_cfg.enable_idif = arg_parse_int_helper(&arg, err_string); +#endif // CONFIG_IDIF } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_ist, argv, err_string)) { extra_cfg.enable_ist = arg_parse_int_helper(&arg, err_string); @@ -3857,6 +3987,12 @@ argv, err_string)) { extra_cfg.enable_joint_mvd = arg_parse_int_helper(&arg, err_string); #endif // CONFIG_JOINT_MVD + +#if CONFIG_REFINEMV + } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_refinemv, argv, + err_string)) { + extra_cfg.enable_refinemv = arg_parse_int_helper(&arg, err_string); +#endif // CONFIG_REFINEMV } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.min_partition_size, argv, err_string)) { extra_cfg.min_partition_size = arg_parse_int_helper(&arg, err_string); @@ -3888,6 +4024,13 @@ &g_av1_codec_arg_defs.explicit_ref_frame_map, argv, err_string)) { extra_cfg.explicit_ref_frame_map = arg_parse_int_helper(&arg, err_string); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + } else if (arg_match_helper(&arg, + &g_av1_codec_arg_defs.enable_frame_output_order, + argv, err_string)) { + extra_cfg.enable_frame_output_order = + arg_parse_int_helper(&arg, err_string); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.enable_ref_frame_mvs, argv, err_string)) { extra_cfg.enable_ref_frame_mvs = arg_parse_int_helper(&arg, err_string); @@ -4198,6 +4341,9 @@ { AV1E_SET_VBR_CORPUS_COMPLEXITY_LAP, ctrl_set_vbr_corpus_complexity_lap }, { AV1E_ENABLE_SB_MULTIPASS_UNIT_TEST, ctrl_enable_sb_multipass_unit_test }, { AV1E_ENABLE_SUBGOP_STATS, ctrl_enable_subgop_stats }, +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + { AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, ctrl_set_frame_output_order }, +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT // Getters { AOME_GET_LAST_QUANTIZER, ctrl_get_quantizer }, @@ -4288,14 +4434,10 @@ 1, 5, // aggressiveness for erp pruning 0, // use ml model for erp pruning -#if CONFIG_H_PARTITION 1, // enable extended partitions -#else - 0, // enable extended partitions -#endif -#else // CONFIG_EXT_RECUR_PARTITIONS +#else // CONFIG_EXT_RECUR_PARTITIONS 0, -#endif // CONFIG_EXT_RECUR_PARTITIONS +#endif // CONFIG_EXT_RECUR_PARTITIONS 0, 1, 1, #if CONFIG_TIP 1, @@ -4303,10 +4445,19 @@ #if CONFIG_BAWP 1, #endif // CONFIG_BAWP +#if CONFIG_CWP + 1, +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + 1, +#endif // CONFIG_D071_IMP_MSK_BLD 1, #if CONFIG_ORIP 1, #endif +#if CONFIG_IDIF + 1, +#endif // CONFIG_IDIF 1, // IST #if CONFIG_CROSS_CHROMA_TX 1, @@ -4324,6 +4475,9 @@ #if CONFIG_JOINT_MVD 1, #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + 1, +#endif // CONFIG_REFINEMV 1, 1, 1, 1, 1, 1, #if CONFIG_PC_WIENER 1, @@ -4349,7 +4503,11 @@ #if CONFIG_OPTFLOW_REFINEMENT 1, #endif // CONFIG_OPTFLOW_REFINEMENT - 1, 1, 1, 1, 1, 1, 3, 1, 1, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 3, 1, 1, 0, +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + 1, +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + 0, 0, #if CONFIG_REF_MV_BANK 1, #endif // CONFIG_REF_MV_BANK
diff --git a/av1/av1_dx_iface.c b/av1/av1_dx_iface.c index 0609e84..c5f1323 100644 --- a/av1/av1_dx_iface.c +++ b/av1/av1_dx_iface.c
@@ -76,6 +76,7 @@ unsigned int enable_subgop_stats; #if CONFIG_INSPECTION aom_inspect_cb inspect_cb; + aom_inspect_cb inspect_sb_cb; void *inspect_ctx; #endif }; @@ -566,6 +567,7 @@ AV1Decoder *const pbi = frame_worker_data->pbi; AV1_COMMON *const cm = &pbi->common; frame_worker_data->pbi->inspect_cb = ctx->inspect_cb; + frame_worker_data->pbi->inspect_sb_cb = ctx->inspect_sb_cb; frame_worker_data->pbi->inspect_ctx = ctx->inspect_ctx; res = av1_receive_compressed_data(frame_worker_data->pbi, data_sz, &data); check_resync(ctx, frame_worker_data->pbi); @@ -610,9 +612,21 @@ struct AV1Decoder *pbi = frame_worker_data->pbi; if (ctx->enable_subgop_stats) memset(&pbi->subgop_stats, 0, sizeof(pbi->subgop_stats)); - for (size_t j = 0; j < pbi->num_output_frames; j++) { - decrease_ref_count(pbi->output_frames[j], pool); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // When multiple layers are enabled, use the mechanism of + // show_existing_frame + if (pbi->common.seq_params.order_hint_info.enable_order_hint && + pbi->common.seq_params.enable_frame_output_order) { + if (!pbi->common.show_existing_frame) + decrease_ref_count(pbi->output_frames[0], pool); + } else { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + for (size_t j = 0; j < pbi->num_output_frames; j++) { + decrease_ref_count(pbi->output_frames[j], pool); + } +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT pbi->num_output_frames = 0; unlock_buffer_pool(pool); for (size_t j = 0; j < ctx->num_grain_image_frame_buffers; j++) { @@ -1590,6 +1604,7 @@ #else aom_inspect_init *init = va_arg(args, aom_inspect_init *); ctx->inspect_cb = init->inspect_cb; + ctx->inspect_sb_cb = init->inspect_sb_cb; ctx->inspect_ctx = init->inspect_ctx; return AOM_CODEC_OK; #endif
diff --git a/av1/common/alloccommon.c b/av1/common/alloccommon.c index 1716e87..4256369 100644 --- a/av1/common/alloccommon.c +++ b/av1/common/alloccommon.c
@@ -129,6 +129,9 @@ } aom_free_frame_buffer(&cm->rst_frame); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + aom_free_frame_buffer(&cm->pre_rst_frame); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } void av1_free_above_context_buffers(CommonContexts *above_contexts) {
diff --git a/av1/common/av1_common_int.h b/av1/common/av1_common_int.h index 5cd51ec..978e300 100644 --- a/av1/common/av1_common_int.h +++ b/av1/common/av1_common_int.h
@@ -257,6 +257,12 @@ // Frame's level within the hierarchical structure unsigned int pyramid_level; +#if CONFIG_IMPROVED_GLOBAL_MOTION + // How many ref frames did this frame use? + // This is set to 0 for intra frames + int num_ref_frames; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + MV_REF *mvs; uint8_t *seg_map; struct segmentation seg; @@ -410,7 +416,11 @@ int mib_size; // Size of the superblock in units of MI blocks int mib_size_log2; // Log 2 of above. int explicit_ref_frame_map; // Explicitly signal the reference frame mapping - int max_reference_frames; // Number of reference frames allowed +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + int enable_frame_output_order; // Enable frame output order derivation based + // on order hint value +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + int max_reference_frames; // Number of reference frames allowed #if CONFIG_ALLOW_SAME_REF_COMPOUND int num_same_ref_compound; // Number of the allowed same reference frames for // the compound mode @@ -435,13 +445,23 @@ #if CONFIG_BAWP uint8_t enable_bawp; // enables/disables block adaptive weighted prediction #endif // CONFIG_BAWP - uint8_t enable_fsc; // enables/disables forward skip coding +#if CONFIG_CWP + uint8_t enable_cwp; // enables/disables compound weighted prediction +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + uint8_t enable_imp_msk_bld; // enable implicit maksed blending +#endif // CONFIG_D071_IMP_MSK_BLD + uint8_t enable_fsc; // enables/disables forward skip coding uint8_t enable_filter_intra; // enables/disables filterintra uint8_t enable_intra_edge_filter; // enables/disables edge upsampling #if CONFIG_ORIP uint8_t enable_orip; // To turn on/off sub-block based ORIP #endif +#if CONFIG_IDIF + uint8_t + enable_idif; // enables/disables Intra Directional Interpolation Filter +#endif // CONFIG_IDIF uint8_t enable_ist; // enables/disables intra secondary transform #if CONFIG_CROSS_CHROMA_TX uint8_t enable_cctx; // enables/disables cross-chroma component transform @@ -462,6 +482,10 @@ uint8_t enable_joint_mvd; // enables/disables joint MVD coding #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + uint8_t enable_refinemv; // enables/disables refineMV mode +#endif // CONFIG_REFINEMV + #if CONFIG_EXTENDED_WARP_PREDICTION int seq_enabled_motion_modes; // Bit mask of enabled motion modes for // sequence @@ -509,6 +533,9 @@ #if CONFIG_EXT_RECUR_PARTITIONS uint8_t enable_ext_partitions; // enable extended partitions #endif // CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_IMPROVED_GLOBAL_MOTION + bool enable_global_motion; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION BITSTREAM_PROFILE profile; // Color config. @@ -604,12 +631,7 @@ */ bool use_pb_mv_precision; #endif // CONFIG_FLEX_MVRES -#if DS_FRAME_LEVEL - /*! - * Dowsample filter type - */ - int ds_filter_type; -#endif // DS_FRAME_LEVEl + /*! * If true, palette tool and/or intra block copy tools may be used. */ @@ -624,6 +646,9 @@ #if !CONFIG_EXTENDED_WARP_PREDICTION bool allow_warped_motion; /*!< If true, frame may use warped motion mode. */ #endif +#if CONFIG_CWG_D067_IMPROVED_WARP + bool allow_warpmv_mode; /*!< If true, frame may use WARPMV mode. */ +#endif // CONFIG_CWG_D067_IMPROVED_WARP /*! * If true, using previous frames' motion vectors for prediction is allowed. */ @@ -715,6 +740,18 @@ */ bool enable_bawp; #endif // CONFIG_BAWP +#if CONFIG_CWP + /*! + * Enables/disables compound weighted prediction + */ + bool enable_cwp; +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + /*! + * Enables/disables implicit masked blending. + */ + bool enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD #if CONFIG_EXTENDED_WARP_PREDICTION /*! * Bit mask of enabled motion modes for this frame @@ -1537,6 +1574,11 @@ int32_t *rst_tmpbuf; /*!< Scratch buffer for self-guided restoration */ RestorationLineBuffers *rlbs; /*!< Line buffers needed by loop restoration */ YV12_BUFFER_CONFIG rst_frame; /*!< Stores the output of loop restoration */ +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + YV12_BUFFER_CONFIG pre_rst_frame; /*!< Stores the reconstructed frame before + loop restoration, only used by encoder, + to be moved to encoder buffer */ +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER /**@}*/ /*! @@ -1561,6 +1603,18 @@ */ DeltaQInfo delta_q_info; +#if CONFIG_IMPROVED_GLOBAL_MOTION + /*! + * Base model used for delta-coding global motion parameters + */ + WarpedMotionParams base_global_motion_model; + + /*! + * Temporal length of `base_global_motion_model` + */ + int base_global_motion_distance; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + /*! * Global motion parameters for each reference frame. */ @@ -1632,12 +1686,12 @@ * TODO(jingning): This can be combined with sign_bias later. */ int8_t ref_frame_side[INTER_REFS_PER_FRAME]; -#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD +#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD /*! * relative distance between reference 'k' and current frame. */ - int8_t ref_frame_relative_dist[REF_FRAMES]; -#endif // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD + int ref_frame_relative_dist[REF_FRAMES]; +#endif // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD /*! * Number of temporal layers: may be > 1 for SVC (scalable vector coding). */ @@ -1733,6 +1787,11 @@ * Log2 of the size of the superblock in units of MI. */ int mib_size_log2; + +#if CONFIG_INSPECTION + YV12_BUFFER_CONFIG predicted_pixels; + YV12_BUFFER_CONFIG prefiltered_pixels; +#endif // CONFIG_INSPECTION } AV1_COMMON; /*!\cond */ @@ -2136,6 +2195,10 @@ xd->mi_col = mi_col; xd->mi[0]->mi_row_start = mi_row; xd->mi[0]->mi_col_start = mi_col; +#if CONFIG_EXT_RECUR_PARTITIONS + xd->mi[0]->chroma_mi_row_start = mi_row; + xd->mi[0]->chroma_mi_col_start = mi_col; +#endif // CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_EXTENDED_WARP_PREDICTION xd->tile.mi_col_start = tile->mi_col_start; @@ -2225,7 +2288,7 @@ if (xd->width > xd->height) if (!(mi_row & (xd->width - 1))) xd->is_first_horizontal_rect = 1; -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT xd->is_last_horizontal_rect = 0; if (xd->width > xd->height) { if (!((mi_row + xd->height) & (xd->width - 1))) { @@ -2236,10 +2299,29 @@ xd->is_first_vertical_rect = 0; if (xd->width < xd->height) if (!(mi_col & (xd->height - 1))) xd->is_first_vertical_rect = 1; -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #endif // !CONFIG_EXT_RECUR_PARTITIONS } +#if CONFIG_ATC_DCTX_ALIGNED +// Return the inter TX context based on last position value. +static INLINE int get_lp2tx_ctx(TX_SIZE tx_size, int bwl, int eob) { + assert(eob != 0); + const int lim = 2; + const int eoby = (eob - 1) >> bwl; + const int eobx = (eob - 1) - (eoby << bwl); + const int diag = eobx + eoby; + const int max_diag = tx_size_wide[tx_size] + tx_size_high[tx_size] - 2; + int ctx_idx = 0; + if (diag < lim) { + ctx_idx = 1; + } else if (diag > (max_diag - lim)) { + ctx_idx = 2; + } + return ctx_idx; +} +#endif // CONFIG_ATC_DCTX_ALIGNED + static INLINE int get_fsc_mode_ctx(const MACROBLOCKD *xd, const int is_key) { int ctx = 0; if (is_key) { @@ -2281,6 +2363,19 @@ } #endif // !CONFIG_AIMC +#if CONFIG_EXT_DIR +static INLINE int get_mrl_index_ctx(const MB_MODE_INFO *neighbor0, + const MB_MODE_INFO *neighbor1) { + int ctx0 = neighbor0 && !is_inter_block(neighbor0, SHARED_PART) && + !is_intrabc_block(neighbor0, SHARED_PART) && + neighbor0->mrl_index != 0; + int ctx1 = neighbor1 && !is_inter_block(neighbor1, SHARED_PART) && + !is_intrabc_block(neighbor1, SHARED_PART) && + neighbor1->mrl_index != 0; + return ctx0 + ctx1; +} +#endif // CONFIG_EXT_DIR + static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize) { @@ -2708,8 +2803,9 @@ if (subsize < BLOCK_SIZES_ALL) { CHROMA_REF_INFO tmp_chroma_ref_info = { 1, 0, mi_row, mi_col, subsize, subsize }; - set_chroma_ref_info(mi_row, mi_col, 0, subsize, &tmp_chroma_ref_info, - parent_chroma_ref_info, bsize, partition, ss_x, ss_y); + set_chroma_ref_info(tree_type, mi_row, mi_col, 0, subsize, + &tmp_chroma_ref_info, parent_chroma_ref_info, bsize, + partition, ss_x, ss_y); is_valid = get_plane_block_size(tmp_chroma_ref_info.bsize_base, ss_x, ss_y) != BLOCK_INVALID; } @@ -2806,6 +2902,38 @@ return is_implied; } +static AOM_INLINE PARTITION_TYPE av1_get_normative_forced_partition_type( + const CommonModeInfoParams *const mi_params, TREE_TYPE tree_type, int ss_x, + int ss_y, int mi_row, int mi_col, BLOCK_SIZE bsize, + const PARTITION_TREE *ptree_luma, const CHROMA_REF_INFO *chroma_ref_info) { + // Return NONE if this block size is not splittable + if (!is_partition_point(bsize)) { + return PARTITION_NONE; + } + + // Special case where 8x8 chroma blocks are not splittable. + // TODO(chiyotsai@google.com): This should be moved into `is_partition_point`, + // but this will require too many lines of change to do right now. + if (tree_type == CHROMA_PART && bsize == BLOCK_8X8) { + return PARTITION_NONE; + } + + // Partitions forced by SDP + if (is_luma_chroma_share_same_partition(tree_type, ptree_luma, bsize)) { + assert(ptree_luma); + return sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ss_x, ss_y); + } + + // Partitions forced by boundary + PARTITION_TYPE implied_partition; + const bool is_part_implied = is_partition_implied_at_boundary( + mi_params, tree_type, ss_x, ss_y, mi_row, mi_col, bsize, chroma_ref_info, + &implied_partition); + if (is_part_implied) return implied_partition; + + // No forced partitions + return PARTITION_INVALID; +} #else // Return the number of sub-blocks whose width and height are // less than half of the parent block. @@ -3101,11 +3229,13 @@ // PARTITION_HORZ_B. To distinguish the latter two, check if the lower // half was split. if (sshigh * 4 == bhigh) { -#if CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_UNEVEN_4WAY + return PARTITION_HORZ_4A; +#elif CONFIG_EXT_RECUR_PARTITIONS return PARTITION_HORZ_3; -#else // CONFIG_EXT_RECUR_PARTITIONS +#else // !CONFIG_UNEVEN_4WAY && !CONFIG_EXT_RECUR_PARTITIONS return PARTITION_HORZ_4; -#endif // CONFIG_EXT_RECUR_PARTITIONS +#endif // CONFIG_UNEVEN_4WAY } #if !CONFIG_EXT_RECUR_PARTITIONS assert(sshigh * 2 == bhigh); @@ -3120,11 +3250,13 @@ // PARTITION_VERT_B. To distinguish the latter two, check if the right // half was split. if (sswide * 4 == bwide) { -#if CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_UNEVEN_4WAY + return PARTITION_VERT_4A; +#elif CONFIG_EXT_RECUR_PARTITIONS return PARTITION_VERT_3; -#else // CONFIG_EXT_RECUR_PARTITIONS +#else // !CONFIG_UNEVEN_4WAY && !CONFIG_EXT_RECUR_PARTITIONS return PARTITION_VERT_4; -#endif // CONFIG_EXT_RECUR_PARTITIONS +#endif // CONFIG_UNEVEN_4WAY } #if !CONFIG_EXT_RECUR_PARTITIONS assert(sswide * 2 == bhigh); @@ -3279,8 +3411,73 @@ seq_level_idx != SEQ_LEVEL_7_2 && seq_level_idx != SEQ_LEVEL_7_3); } -// Intra derivative for second directional predictor of IBP +// Intra derivative for directional predictions. // second_dr_intra_derivative[x] = 64*64/dr_intra_derivative[x] +#if CONFIG_EXT_DIR +static const int16_t dr_intra_derivative[90] = { + // Angle in degrees. + // Starred (*) values are unused. + 0, 4096, 2048, // *, 0.9, 1.8, + 1365, 1024, 819, // 2.7, 3.6, 4.5, + 682, 585, 512, // 5.4, 6.2, 7.1, + 455, 409, 409, 409, 372, // 8.0, 8.9, *, *, 9.8, + 341, 292, 273, // 10.6, 12.4, 13.2, + 256, 227, 215, // 14.0, 15.7, 16.6, + 204, 186, 178, // 17.4, 19.0, 19.8, + 170, 157, 151, // 20.6, 22.2, 23.0, + 146, 136, 132, // 23.7, 25.2, 25.9, + 128, 117, 110, // 26.6, 28.7, 30.2, + 107, 99, 97, 97, // 30.9, 32.9, *, 33.4, + 93, 87, 83, // 34.5, 36.3, 37.6, + 81, 77, 74, // 38.3, 39.7, 40.9, + 73, 69, 66, // 41.2, 42.8, 44.1, + 64, 62, 59, // 45.0, 45.9, 47.3, + 56, 55, 53, // 48.8, 49.3, 50.4, + 50, 49, 47, // 52.0, 52.6, 53.7, + 44, 42, 42, 41, // 55.5, 56.7, *, 57.4, + 38, 37, 35, // 59.3, 60.0, 61.3, + 32, 31, 30, // 63.4, 64.2, 64.9, + 28, 27, 26, // 66.4, 67.1, 67.9, + 24, 23, 22, // 69.4, 70.2, 71.0, + 20, 19, 18, // 72.6, 73.5, 74.3, + 16, 15, 14, // 76.0, 76.8, 77.7, + 12, 11, 10, 10, 10, // 79.4, 80.2, *, *, 81.1, + 9, 8, 7, // 82.0, 82.9, 83.8, + 6, 5, 4, // 84.6, 85.5, 86.4, + 3, 2, 1, // 87.3, 88.2, 89.1, +}; +#elif CONFIG_IMPROVED_ANGULAR_INTRA +static const int16_t second_dr_intra_derivative[90] = { + 0, 0, 0, // + 2, 0, 0, // 3, ... + 4, 0, 0, // 6, ... + 8, 0, 0, 0, 0, // 9, ... + 12, 0, 0, // 14, ... + 16, 0, 0, // 17, ... + 20, 0, 0, // 20, ... + 24, 0, 0, // 23, ... (113 & 203 are base angles) + 28, 0, 0, // 26, ... + 32, 0, 0, // 29, ... + 38, 0, 0, 0, // 32, ... + 44, 0, 0, // 36, ... + 50, 0, 0, // 39, ... + 56, 0, 0, // 42, ... + 64, 0, 0, // 45, ... (45 & 135 are base angles) + 72, 0, 0, // 48, ... + 82, 0, 0, // 51, ... + 92, 0, 0, 0, // 54, ... + 106, 0, 0, // 58, ... + 128, 0, 0, // 61, ... + 146, 0, 0, // 64, ... + 170, 0, 0, // 67, ... (67 & 157 are base angles) + 204, 0, 0, // 70, ... + 256, 0, 0, // 73, ... + 340, 0, 0, 0, 0, // 76, ... + 512, 0, 0, // 81, ... + 1024, 0, 0, // 84, ... + 2048, 0, 0, // 87, ... +}; +#else static const int16_t second_dr_intra_derivative[90] = { 0, 0, 0, // 4, 0, 0, // 3, ... @@ -3311,6 +3508,7 @@ 585, 0, 0, // 84, ... 1365, 0, 0, // 87, ... }; +#endif // CONFIG_EXT_DIR // Generate the weights per pixel position for IBP static void av1_dr_prediction_z1_info(uint8_t *weights, int bw, int bh, @@ -3393,7 +3591,11 @@ int delta, int txw, int txh, int txw_log2, int txh_log2) { const int angle = mode_to_angle_map[mode] + delta * 3; const int mode_idx = angle_to_mode_index[angle]; +#if CONFIG_EXT_DIR + const int dy = dr_intra_derivative[90 - angle]; +#else const int dy = second_dr_intra_derivative[angle]; +#endif // CONFIG_EXT_DIR weights[block_idx][mode_idx] = (uint8_t *)(aom_malloc(txw * txh * sizeof(uint8_t))); av1_dr_prediction_z1_info(weights[block_idx][mode_idx], txw, txh, txw_log2, @@ -3410,7 +3612,11 @@ const int txh = tx_size_high[iblock]; const int txw_log2 = tx_size_wide_log2[iblock]; const int txh_log2 = tx_size_high_log2[iblock]; +#if CONFIG_IMPROVED_ANGULAR_INTRA + for (int delta = -2; delta < 0; delta += 2) { +#else for (int delta = -3; delta < 0; delta++) { +#endif // CONFIG_IMPROVED_ANGULAR_INTRA init_ibp_info_per_mode(weights, iblock, V_PRED, delta, txw, txh, txw_log2, txh_log2); init_ibp_info_per_mode(weights, iblock, D67_PRED, delta, txw, txh, @@ -3418,7 +3624,11 @@ init_ibp_info_per_mode(weights, iblock, D45_PRED, delta, txw, txh, txw_log2, txh_log2); } +#if CONFIG_IMPROVED_ANGULAR_INTRA + for (int delta = 0; delta <= 2; delta += 2) { +#else for (int delta = 0; delta <= 3; delta++) { +#endif // CONFIG_IMPROVED_ANGULAR_INTRA init_ibp_info_per_mode(weights, iblock, D67_PRED, delta, txw, txh, txw_log2, txh_log2); init_ibp_info_per_mode(weights, iblock, D45_PRED, delta, txw, txh,
diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index 4d53661..e0efb4e 100644 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl
@@ -229,6 +229,15 @@ add_proto qw/void av1_highbd_dr_prediction_z3/, "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_left, int dx, int dy, int bd, int mrl_index"; specialize qw/av1_highbd_dr_prediction_z3 avx2/; +if (aom_config("CONFIG_IDIF") eq "yes") { + add_proto qw/void av1_highbd_dr_prediction_z1_idif/ , "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int dx, int dy, int bd, int mrl_index"; + specialize qw/av1_highbd_dr_prediction_z1_idif avx2/; + add_proto qw/void av1_highbd_dr_prediction_z2_idif/ , "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int dx, int dy, int bd, int mrl_index"; + specialize qw/av1_highbd_dr_prediction_z2_idif avx2/; + add_proto qw/void av1_highbd_dr_prediction_z3_idif/ , "uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int dx, int dy, int bd, int mrl_index"; + specialize qw/av1_highbd_dr_prediction_z3_idif avx2/ +} + add_proto qw / void av1_highbd_ibp_dr_prediction_z1 /, "uint8_t* weights, uint16_t *dst, ptrdiff_t stride, uint16_t* second_pred, ptrdiff_t second_stride, int bw, int bh"; add_proto qw / void av1_highbd_ibp_dr_prediction_z3 /, @@ -334,10 +343,14 @@ # txb add_proto qw/void av1_txb_init_levels_skip/, "const tran_low_t *const coeff, const int width, const int height, uint8_t *const levels"; specialize qw/av1_txb_init_levels_skip sse4_1 avx2/; - add_proto qw/void av1_get_nz_map_contexts_skip/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, int8_t *const coeff_contexts"; - specialize qw/av1_get_nz_map_contexts_skip sse2/; + if (aom_config("CONFIG_ATC_DCTX_ALIGNED") eq "yes") { + add_proto qw/void av1_get_nz_map_contexts_skip/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t bob, const uint16_t eob, const TX_SIZE tx_size, int8_t *const coeff_contexts"; + } else { + add_proto qw/void av1_get_nz_map_contexts_skip/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, int8_t *const coeff_contexts"; + specialize qw/av1_get_nz_map_contexts_skip sse2/; + } - if (aom_config("CONFIG_ATC_COEFCODING") eq "yes") { + if (aom_config("CONFIG_ATC") eq "yes") { add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts, const int plane"; } else { add_proto qw/void av1_get_nz_map_contexts/, "const uint8_t *const levels, const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts";
diff --git a/av1/common/blockd.c b/av1/common/blockd.c index c629834..26c05fd 100644 --- a/av1/common/blockd.c +++ b/av1/common/blockd.c
@@ -203,18 +203,27 @@ #endif // CONFIG_WIENER_NONSEP ) { for (int p = plane_start; p < plane_end; ++p) { - av1_reset_wiener_bank(&xd->wiener_info[p]); + av1_reset_wiener_bank(&xd->wiener_info[p], p != AOM_PLANE_Y); av1_reset_sgrproj_bank(&xd->sgrproj_info[p]); #if CONFIG_WIENER_NONSEP av1_reset_wienerns_bank(&xd->wienerns_info[p], xd->current_base_qindex, - num_filter_classes[p], p != AOM_PLANE_Y); + num_filter_classes[p], p != AOM_PLANE_Y +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + 0 +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + av1_reset_wienerns_bank(&xd->wienerns_cross_info[p], + xd->current_base_qindex, 1, p != AOM_PLANE_Y, 1); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #endif // CONFIG_WIENER_NONSEP } } // Initialize bank -void av1_reset_wiener_bank(WienerInfoBank *bank) { - set_default_wiener(&bank->filter[0]); +void av1_reset_wiener_bank(WienerInfoBank *bank, int chroma) { + set_default_wiener(&bank->filter[0], chroma); bank->bank_size = 0; bank->bank_ptr = 0; } @@ -262,19 +271,6 @@ memcpy(av1_ref_from_wiener_bank(bank, ndx), info, sizeof(*info)); } -// Convenience function to fill the provided info structure with -// filter at given index -void av1_get_from_wiener_bank(WienerInfoBank *bank, int ndx, WienerInfo *info) { - if (bank->bank_size == 0) { - set_default_wiener(info); - } else { - assert(ndx < bank->bank_size); - const int ptr = - bank->bank_ptr - ndx + (bank->bank_ptr < ndx ? LR_BANK_SIZE : 0); - memcpy(info, &bank->filter[ptr], sizeof(*info)); - } -} - // Initialize bank void av1_reset_sgrproj_bank(SgrprojInfoBank *bank) { set_default_sgrproj(&bank->filter[0]); @@ -325,29 +321,25 @@ memcpy(av1_ref_from_sgrproj_bank(bank, ndx), info, sizeof(*info)); } -// Convenience function to fill the provided info structure with -// filter at given index -void av1_get_from_sgrproj_bank(SgrprojInfoBank *bank, int ndx, - SgrprojInfo *info) { - if (bank->bank_size == 0) { - set_default_sgrproj(info); - } else { - assert(ndx < bank->bank_size); - const int ptr = - bank->bank_ptr - ndx + (bank->bank_ptr < ndx ? LR_BANK_SIZE : 0); - memcpy(info, &bank->filter[ptr], sizeof(*info)); - } -} - #if CONFIG_WIENER_NONSEP // Initialize bank void av1_reset_wienerns_bank(WienerNonsepInfoBank *bank, int qindex, - int num_classes, int chroma) { + int num_classes, int chroma +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + int is_cross +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +) { for (int i = 0; i < LR_BANK_SIZE; ++i) { +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + set_default_wienerns(&bank->filter[i], qindex, num_classes, chroma, + is_cross); +#else set_default_wienerns(&bank->filter[i], qindex, num_classes, chroma); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } for (int c_id = 0; c_id < num_classes; ++c_id) { - bank->bank_size_for_class[c_id] = 1; + bank->bank_size_for_class[c_id] = 0; bank->bank_ptr_for_class[c_id] = 0; } } @@ -379,26 +371,30 @@ WienerNonsepInfo *av1_ref_from_wienerns_bank(WienerNonsepInfoBank *bank, int ndx, int wiener_class_id) { assert(wiener_class_id != ALL_WIENERNS_CLASSES); - assert(bank->bank_size_for_class[wiener_class_id] > 0); - - assert(ndx < bank->bank_size_for_class[wiener_class_id]); - const int ptr = - bank->bank_ptr_for_class[wiener_class_id] - ndx + - (bank->bank_ptr_for_class[wiener_class_id] < ndx ? LR_BANK_SIZE : 0); - return &bank->filter[ptr]; + if (bank->bank_size_for_class[wiener_class_id] == 0) { + return &bank->filter[0]; + } else { + assert(ndx < bank->bank_size_for_class[wiener_class_id]); + const int ptr = + bank->bank_ptr_for_class[wiener_class_id] - ndx + + (bank->bank_ptr_for_class[wiener_class_id] < ndx ? LR_BANK_SIZE : 0); + return &bank->filter[ptr]; + } } // Get a const reference to a filter given the index const WienerNonsepInfo *av1_constref_from_wienerns_bank( const WienerNonsepInfoBank *bank, int ndx, int wiener_class_id) { assert(wiener_class_id != ALL_WIENERNS_CLASSES); - assert(bank->bank_size_for_class[wiener_class_id] > 0); - - assert(ndx < bank->bank_size_for_class[wiener_class_id]); - const int ptr = - bank->bank_ptr_for_class[wiener_class_id] - ndx + - (bank->bank_ptr_for_class[wiener_class_id] < ndx ? LR_BANK_SIZE : 0); - return &bank->filter[ptr]; + if (bank->bank_size_for_class[wiener_class_id] == 0) { + return &bank->filter[0]; + } else { + assert(ndx < bank->bank_size_for_class[wiener_class_id]); + const int ptr = + bank->bank_ptr_for_class[wiener_class_id] - ndx + + (bank->bank_ptr_for_class[wiener_class_id] < ndx ? LR_BANK_SIZE : 0); + return &bank->filter[ptr]; + } } // Directly replace a filter in the bank at given index
diff --git a/av1/common/blockd.h b/av1/common/blockd.h index d934852..eab428b 100644 --- a/av1/common/blockd.h +++ b/av1/common/blockd.h
@@ -365,6 +365,40 @@ DIFFWTD_MASK_TYPE mask_type; COMPOUND_TYPE type; } INTERINTER_COMPOUND_DATA; +#if CONFIG_D071_IMP_MSK_BLD +// This structure is used for the position check of the implicit masked blending +typedef struct BacpBlockData { + int x0; // top left sample horizontal cood. + int x1; // x0 + bw + int y0; // top left sample vertical cood. + int y1; // y0 + bh +} BacpBlockData; +// This struct contains enable flag and date for implicit masked blending mode +typedef struct { + uint8_t enable_bacp; // enable boundary aware compound prediction + BacpBlockData *bacp_block_data; +} INTERINTER_COMPOUND_BORDER_DATA; +#endif // CONFIG_D071_IMP_MSK_BLD + +#if CONFIG_REFINEMV +#define REF_BUFFER_WIDTH \ + (REFINEMV_SUBBLOCK_WIDTH + (AOM_INTERP_EXTEND - 1) + AOM_INTERP_EXTEND) +#define REF_BUFFER_HEIGHT \ + (REFINEMV_SUBBLOCK_HEIGHT + (AOM_INTERP_EXTEND - 1) + AOM_INTERP_EXTEND) +typedef struct PadBlock { + int x0; + int x1; + int y0; + int y1; +} PadBlock; + +typedef struct PadArea { + PadBlock pad_block; + uint16_t paded_ref_buf[(REF_BUFFER_WIDTH) * (REF_BUFFER_HEIGHT)]; + int paded_ref_buf_stride; +} ReferenceArea; + +#endif // CONFIG_REFINEMV #if CONFIG_OPTFLOW_REFINEMENT // Macros for optical flow experiment where offsets are added in nXn blocks @@ -420,6 +454,12 @@ int mi_row_start; /*! \brief Starting mi_col of current coding block */ int mi_col_start; +#if CONFIG_EXT_RECUR_PARTITIONS + /*! \brief Starting chroma mi_row of current coding block */ + int chroma_mi_row_start; + /*! \brief Starting chroma mi_col of current coding block */ + int chroma_mi_col_start; +#endif // CONFIG_EXT_RECUR_PARTITIONS /*! \brief The partition type of the current coding block. */ PARTITION_TYPE partition; /*! \brief The prediction mode used */ @@ -464,6 +504,11 @@ */ uint8_t mb_precision_set; #endif +#if CONFIG_REFINEMV + /*! \brief The flag to signal if DMVR is used for the inter prediction. */ + uint8_t refinemv_flag; +#endif // CONFIG_REFINEMV + /*! \brief The motion mode used by the inter prediction. */ MOTION_MODE motion_mode; /*! \brief Number of samples used by spatial warp prediction */ @@ -491,6 +536,11 @@ /*! \brief The bawp parameters offset*/ int32_t bawp_beta[3][2]; //[yuv][ref0/1], current only [0][0] is used. #endif // CONFIG_BAWP + +#if CONFIG_CWP + //! Index for compound weighted prediction parameters. + int8_t cwp_idx; +#endif // CONFIG_CWP /**@}*/ /***************************************************************************** @@ -561,7 +611,11 @@ /*! \brief Only valid when temporal update if off. */ uint8_t seg_id_predicted : 1; /*! \brief Which ref_mv to use */ +#if CONFIG_SEP_COMP_DRL + int ref_mv_idx[2]; +#else uint8_t ref_mv_idx : 3; +#endif // CONFIG_SEP_COMP_DRL /*! \brief Inter skip mode */ #if CONFIG_SKIP_MODE_ENHANCEMENT uint8_t skip_mode : 2; @@ -570,14 +624,14 @@ #endif // CONFIG_SKIP_MODE_ENHANCEMENT /*! \brief Whether intrabc is used. */ uint8_t use_intrabc[PARTITION_STRUCTURE_NUM]; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT /*! \brief Intrabc BV prediction mode. */ uint8_t intrabc_mode; /*! \brief Index of ref_bv. */ uint8_t intrabc_drl_idx; /*! \brief Which ref_bv to use. */ int_mv ref_bv; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT #if CONFIG_WARP_REF_LIST /*! \brief Which index to use for warp base parameter. */ @@ -585,6 +639,10 @@ /*! \brief Maximum number of warp reference indices to use for warp base * parameter. */ uint8_t max_num_warp_candidates; +#if CONFIG_CWG_D067_IMPROVED_WARP + /*! \brief warpmv_with_mvd_flag. */ + uint8_t warpmv_with_mvd_flag; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST /*! \brief Indicates if masked compound is used(1) or not (0). */ @@ -634,6 +692,15 @@ } SUBMB_INFO; #endif // CONFIG_C071_SUBBLK_WARPMV +#if CONFIG_REFINEMV +/*! \brief Stores the subblock refinemv motion info of the current coding block + */ +typedef struct REFINEMV_SUBMB_INFO { + /*! \brief Stored subblock mv for reference. */ + int_mv refinemv[2]; +} REFINEMV_SUBMB_INFO; +#endif // CONFIG_REFINEMV + /*!\cond */ // Get the start plane for semi-decoupled partitioning static INLINE int get_partition_plane_start(int tree_type) { @@ -750,7 +817,7 @@ if (bsize > BLOCK_64X64) { return false; } - // At bsize \leq 8X8, extended partitions will lead to dimension < 2. + // At bsize <= 8X8, extended partitions will lead to dimension < 2. if (bsize <= BLOCK_8X8) { return false; } @@ -771,7 +838,7 @@ return false; } // A splittable wide block has ratio 2:1. If it performs HORZ_3 split, then - // we'll get a block ratio of 2:0.5 == 4:1, which is illegal. So extended + // we'll get a block ratio of 2:0.25 == 8:1, which is illegal. So extended // partition is disabled. The same goes for tall block. if ((is_wide_block(bsize) && rect_type == HORZ) || (is_tall_block(bsize) && rect_type == VERT)) { @@ -780,6 +847,25 @@ return true; } +#if CONFIG_UNEVEN_4WAY +/*!\brief Checks whether uneven 4-way partition is allowed for current bsize and + * rect_type. */ +static AOM_INLINE bool is_uneven_4way_partition_allowed( + BLOCK_SIZE bsize, RECT_PART_TYPE rect_type, TREE_TYPE tree_type) { + assert(is_ext_partition_allowed(bsize, rect_type, tree_type)); + + if (rect_type == HORZ) { + if (bsize == BLOCK_32X64) return true; + if (bsize == BLOCK_16X32 && tree_type != CHROMA_PART) return true; + } else { + assert(rect_type == VERT); + if (bsize == BLOCK_64X32) return true; + if (bsize == BLOCK_32X16 && tree_type != CHROMA_PART) return true; + } + return false; +} +#endif // CONFIG_UNEVEN_4WAY + /*!\brief Returns the rect_type that's implied by the bsize. If the rect_type * cannot be derived from bsize, returns RECT_INVALID. */ static AOM_INLINE RECT_PART_TYPE @@ -807,18 +893,24 @@ /*!\brief Returns whether square split is allowed for current bsize. */ static AOM_INLINE bool is_square_split_eligible(BLOCK_SIZE bsize, BLOCK_SIZE sb_size) { - if (sb_size != BLOCK_256X256) { - return false; - } + (void)sb_size; return bsize == BLOCK_128X128 || bsize == BLOCK_256X256; } -/*!\brief Returns whether the current partition is horizontal type for vertical +/*!\brief Returns whether the current partition is horizontal type or vertical * type. */ static AOM_INLINE RECT_PART_TYPE get_rect_part_type(PARTITION_TYPE partition) { - if (partition == PARTITION_HORZ || partition == PARTITION_HORZ_3) { + if (partition == PARTITION_HORZ || partition == PARTITION_HORZ_3 +#if CONFIG_UNEVEN_4WAY + || partition == PARTITION_HORZ_4A || partition == PARTITION_HORZ_4B +#endif // CONFIG_UNEVEN_4WAY + ) { return HORZ; - } else if (partition == PARTITION_VERT || partition == PARTITION_VERT_3) { + } else if (partition == PARTITION_VERT || partition == PARTITION_VERT_3 +#if CONFIG_UNEVEN_4WAY + || partition == PARTITION_VERT_4A || partition == PARTITION_VERT_4B +#endif // CONFIG_UNEVEN_4WAY + ) { return VERT; } assert(0 && "Rectangular partition expected!"); @@ -830,6 +922,20 @@ return is_inter_ref_frame(mbmi->ref_frame[1]); } +#if CONFIG_SEP_COMP_DRL +/*!\brief Return whether the current coding block has two separate DRLs */ +static INLINE int has_second_drl(const MB_MODE_INFO *mbmi) { + int ret = (mbmi->mode == NEAR_NEARMV || mbmi->mode == NEAR_NEWMV) && + !is_tip_ref_frame(mbmi->ref_frame[0]) && !mbmi->skip_mode; + return ret; +} + +/*!\brief Return the mv_ref_idx of the current coding block based on ref_idx */ +static INLINE int get_ref_mv_idx(const MB_MODE_INFO *mbmi, int ref_idx) { + return has_second_drl(mbmi) ? mbmi->ref_mv_idx[ref_idx] : mbmi->ref_mv_idx[0]; +} +#endif // CONFIG_SEP_COMP_DRL + #if CONFIG_AIMC PREDICTION_MODE av1_get_joint_mode(const MB_MODE_INFO *mi); #else @@ -893,7 +999,7 @@ } } -#if CONFIG_H_PARTITION +#if CONFIG_EXT_RECUR_PARTITIONS // Get the block size of the ith sub-block in a block partitioned via an // h-partition mode. static INLINE BLOCK_SIZE get_h_partition_subsize(BLOCK_SIZE bsize, int index, @@ -981,7 +1087,7 @@ } } } -#endif // CONFIG_H_PARTITION +#endif // CONFIG_EXT_RECUR_PARTITIONS static INLINE int is_partition_valid(BLOCK_SIZE bsize, PARTITION_TYPE p) { if (is_partition_point(bsize)) @@ -1009,25 +1115,36 @@ int subsampling_y) { const int bw = block_size_wide[bsize] >> subsampling_x; const int bh = block_size_high[bsize] >> subsampling_y; + // Check if block width/height is less than 4. const int bw_less_than_4 = bw < 4; const int bh_less_than_4 = bh < 4; + // Check if half block width/height is less than 8. const int hbw_less_than_4 = bw < 8; const int hbh_less_than_4 = bh < 8; +#if !CONFIG_UNEVEN_4WAY || CONFIG_EXT_RECUR_PARTITIONS + // Check if quarter block width/height is less than 16. const int qbw_less_than_4 = bw < 16; const int qbh_less_than_4 = bh < 16; +#endif // !CONFIG_UNEVEN_4WAY || CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_UNEVEN_4WAY + // Check if one-eighth block width/height is less than 32. + const int ebw_less_than_4 = bw < 32; + const int ebh_less_than_4 = bh < 32; +#endif // CONFIG_UNEVEN_4WAY switch (partition) { case PARTITION_NONE: return bw_less_than_4 || bh_less_than_4; case PARTITION_HORZ: return bw_less_than_4 || hbh_less_than_4; case PARTITION_VERT: return hbw_less_than_4 || bh_less_than_4; case PARTITION_SPLIT: return hbw_less_than_4 || hbh_less_than_4; #if CONFIG_EXT_RECUR_PARTITIONS -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + case PARTITION_HORZ_4B: return bw_less_than_4 || ebh_less_than_4; + case PARTITION_VERT_4A: + case PARTITION_VERT_4B: return ebw_less_than_4 || bh_less_than_4; +#endif // CONFIG_UNEVEN_4WAY case PARTITION_HORZ_3: return hbw_less_than_4 || qbh_less_than_4; case PARTITION_VERT_3: return qbw_less_than_4 || hbh_less_than_4; -#else - case PARTITION_HORZ_3: return bw_less_than_4 || qbh_less_than_4; - case PARTITION_VERT_3: return qbw_less_than_4 || bh_less_than_4; -#endif // CONFIG_H_PARTITION #else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_HORZ_A: case PARTITION_HORZ_B: @@ -1077,13 +1194,14 @@ return 1; } #if CONFIG_EXT_RECUR_PARTITIONS -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + case PARTITION_HORZ_4B: + case PARTITION_VERT_4A: + case PARTITION_VERT_4B: return index == 3; +#endif // CONFIG_UNEVEN_4WAY case PARTITION_VERT_3: case PARTITION_HORZ_3: return index == 3; -#else - case PARTITION_VERT_3: - case PARTITION_HORZ_3: return index == 2; -#endif // CONFIG_H_PARTITION #else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_HORZ_A: case PARTITION_HORZ_B: @@ -1162,6 +1280,12 @@ case PARTITION_HORZ: case PARTITION_VERT: #if CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + case PARTITION_HORZ_4B: + case PARTITION_VERT_4A: + case PARTITION_VERT_4B: +#endif // CONFIG_UNEVEN_4WAY case PARTITION_VERT_3: case PARTITION_HORZ_3: #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -1252,14 +1376,23 @@ } } -static INLINE void set_chroma_ref_info(int mi_row, int mi_col, int index, - BLOCK_SIZE bsize, CHROMA_REF_INFO *info, +static INLINE void set_chroma_ref_info(TREE_TYPE tree_type, int mi_row, + int mi_col, int index, BLOCK_SIZE bsize, + CHROMA_REF_INFO *info, const CHROMA_REF_INFO *parent_info, BLOCK_SIZE parent_bsize, PARTITION_TYPE parent_partition, int ss_x, int ss_y) { assert(bsize < BLOCK_SIZES_ALL); initialize_chroma_ref_info(mi_row, mi_col, bsize, info); + if (tree_type == LUMA_PART) { + info->is_chroma_ref = 0; + return; + } + if (tree_type == CHROMA_PART) { + info->is_chroma_ref = 1; + return; + } if (parent_info == NULL) return; if (parent_info->is_chroma_ref) { if (parent_info->offset_started) { @@ -1290,7 +1423,7 @@ } } -#if CONFIG_MISMATCH_DEBUG +#if CONFIG_MISMATCH_DEBUG || CONFIG_INSPECTION static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col, int mi_row, int tx_blk_col, int tx_blk_row, int subsampling_x, int subsampling_y) { @@ -1318,8 +1451,21 @@ typedef struct { DECLARE_ALIGNED(32, tran_low_t, dqcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]); +#if CONFIG_INSPECTION + // dqcoeff gets clobbered before the inspect callback happens, so keep a + // copy here. + DECLARE_ALIGNED(32, tran_low_t, dqcoeff_copy[MAX_MB_PLANE][MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, tran_low_t, qcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]); + DECLARE_ALIGNED(32, tran_low_t, dequant_values[MAX_MB_PLANE][MAX_SB_SQUARE]); +#endif + // keeps the index that corresponds to end-of-block (eob) eob_info eob_data[MAX_MB_PLANE] [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; +#if CONFIG_ATC_DCTX_ALIGNED + // keeps the index that corresponds to beginning-of-block (bob) + eob_info bob_data[MAX_MB_PLANE] + [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; +#endif // CONFIG_ATC_DCTX_ALIGNED DECLARE_ALIGNED(16, uint8_t, color_index_map[2][MAX_SB_SQUARE]); } CB_BUFFER; @@ -1351,6 +1497,9 @@ qm_val_t *seg_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL]; qm_val_t *seg_qmatrix[MAX_SEGMENTS][TX_SIZES_ALL]; +#if CONFIG_INSPECTION + DECLARE_ALIGNED(32, int16_t, predicted_pixels[MAX_SB_SQUARE]); +#endif } MACROBLOCKD_PLANE; #define BLOCK_OFFSET(i) ((i) << 4) @@ -1360,7 +1509,6 @@ #else #define LR_BANK_SIZE 1 #endif // CONFIG_LR_MERGE_COEFFS - /*!\endcond */ /*!\brief Parameters related to Wiener Filter */ @@ -1456,7 +1604,12 @@ */ DECLARE_ALIGNED(16, int16_t, allfiltertaps[WIENERNS_MAX_CLASSES * WIENERNS_YUV_MAX]); - +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + /*! + * Whether this is a cross-filter, temporaly used + */ + int is_cross_filter; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #if CONFIG_LR_MERGE_COEFFS /*! * Best Reference from dynamic bank for each class. @@ -1599,7 +1752,7 @@ } WARP_PARAM_BANK; #endif // CONFIG_WARP_REF_LIST -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT /*! \brief Variables related to mvp list of skip mode.*/ typedef struct { //! MV list @@ -1617,7 +1770,7 @@ //! Global mvs int_mv global_mvs[2]; } SKIP_MODE_MVP_LIST; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT /*! \brief Variables related to current coding block. * @@ -1644,7 +1797,7 @@ * \name Reference MV bank info. */ /**@{*/ -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT REF_MV_BANK *ref_mv_bank_pt; /*!< Pointer to bank to refer to */ #endif REF_MV_BANK ref_mv_bank; /*!< Ref mv bank to update */ @@ -1902,6 +2055,13 @@ * Nonseparable Wiener filter information for all planes. */ WienerNonsepInfoBank wienerns_info[MAX_MB_PLANE]; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + /*! + * Nonseparable Wiener cross filter information for all planes, only Cb and Cr + * are applied + */ + WienerNonsepInfoBank wienerns_cross_info[MAX_MB_PLANE]; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #endif // CONFIG_WIENER_NONSEP /**@}*/ @@ -1930,9 +2090,9 @@ /*! * skip_mvp_candidate_list is the MVP list for skip mode. */ -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT SKIP_MODE_MVP_LIST skip_mvp_candidate_list; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_WARP_REF_LIST /*! @@ -1958,7 +2118,7 @@ */ bool is_first_horizontal_rect; -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT /*! * True if this is the last horizontal rectangular block in a HORIZONTAL or * HORIZONTAL_4 partition. @@ -1969,7 +2129,7 @@ * VERTICAL_4 partition. */ bool is_first_vertical_rect; -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #endif // !CONFIG_EXT_RECUR_PARTITIONS /*! @@ -2132,6 +2292,11 @@ /** variable to store eob_u flag */ uint8_t eob_u_flag; #endif // CONFIG_CONTEXT_DERIVATION + +#if CONFIG_REFINEMV + /** block level storage to store luma refined MVs for chroma use */ + REFINEMV_SUBMB_INFO refinemv_subinfo[MAX_MIB_SIZE * MAX_MIB_SIZE]; +#endif // CONFIG_REFINEMV } MACROBLOCKD; /*!\cond */ @@ -2180,14 +2345,14 @@ // Number of transform types in each set type for intra blocks static const int av1_num_ext_tx_set_intra[EXT_TX_SET_TYPES] = { 1, 1, 4, 6, 11, 15, -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC 7 -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC }; -#if CONFIG_ATC_NEWTXSETS && CONFIG_ATC_REDUCED_TXSET +#if CONFIG_ATC && CONFIG_ATC_REDUCED_TXSET static const int av1_num_reduced_tx_set = 2; -#endif // CONFIG_ATC_NEWTXSETS && CONFIG_ATC_REDUCED_TXSET +#endif // CONFIG_ATC && CONFIG_ATC_REDUCED_TXSET // Number of transform types in each set type static const int av1_num_ext_tx_set[EXT_TX_SET_TYPES] = { @@ -2201,12 +2366,12 @@ { 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0 }, { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC }; -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC static const int av1_mdtx_used_flag[EXT_TX_SIZES][INTRA_MODES][TX_TYPES] = { { { 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0 }, @@ -2269,7 +2434,7 @@ { 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, }, // size_class: 3 }; -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC static const uint16_t av1_reduced_intra_tx_used_flag[INTRA_MODES] = { 0x080F, // DC_PRED: 0000 1000 0000 1111 @@ -2294,12 +2459,12 @@ 0x0E0F, // 0000 1110 0000 1111 0x0FFF, // 0000 1111 1111 1111 0xFFFF, // 1111 1111 1111 1111 -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC 0xFFFF, -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC }; -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC static const uint16_t av1_md_trfm_used_flag[EXT_TX_SIZES][INTRA_MODES] = { { 0x218F, @@ -2362,7 +2527,7 @@ 0x0000, }, // size_class: 3 }; -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC static const TxSetType av1_ext_tx_set_lookup[2][2] = { { EXT_TX_SET_DTT4_IDTX_1DDCT, EXT_TX_SET_DTT4_IDTX }, @@ -2374,14 +2539,18 @@ const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size]; if (tx_size_sqr_up > TX_32X32) return EXT_TX_SET_DCTONLY; if (tx_size_sqr_up == TX_32X32) +#if CONFIG_ATC_DCTX_ALIGNED + return EXT_TX_SET_DCT_IDTX; +#else return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DCTONLY; +#endif // CONFIG_ATC_DCTX_ALIGNED #if CONFIG_ATC_REDUCED_TXSET if (use_reduced_set) return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_NEW_TX_SET; #else if (use_reduced_set) return is_inter ? EXT_TX_SET_DCT_IDTX : EXT_TX_SET_DTT4_IDTX; #endif // CONFIG_ATC_REDUCED_TXSET -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC if (is_inter) { const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size]; return av1_ext_tx_set_lookup[is_inter][tx_size_sqr == TX_16X16]; @@ -2391,17 +2560,17 @@ #else const TX_SIZE tx_size_sqr = txsize_sqr_map[tx_size]; return av1_ext_tx_set_lookup[is_inter][tx_size_sqr == TX_16X16]; -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC } // Maps tx set types to the indices. static const int ext_tx_set_index[2][EXT_TX_SET_TYPES] = { { // Intra -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC 0, -1, -1, -1, -1, -1, 1 }, #else 0, -1, 2, 1, -1, -1 }, -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC { // Inter 0, 3, -1, -1, 2, 1 }, }; @@ -2466,7 +2635,11 @@ int is_screen_content_type) { const MB_MODE_INFO *const mbmi = xd->mi[0]; if (is_inter_block(mbmi, xd->tree_type) || plane_type != PLANE_TYPE_Y || +#if CONFIG_ATC_DCTX_ALIGNED + xd->lossless[mbmi->segment_id] || tx_size > TX_32X32 || +#else xd->lossless[mbmi->segment_id] || tx_size >= TX_32X32 || +#endif // CONFIG_ATC_DCTX_ALIGNED is_screen_content_type) return DCT_DCT; @@ -2846,6 +3019,9 @@ const int code_stx = (primary_tx_type == DCT_DCT || primary_tx_type == ADST_ADST) && (intra_dir < PAETH_PRED) && +#if CONFIG_ATC_DCTX_ALIGNED + (eob != 1) && +#endif // CONFIG_ATC_DCTX_ALIGNED !(mbmi->filter_intra_mode_info.use_filter_intra) && is_depth0 && ist_eob; return code_stx; } @@ -2990,14 +3166,13 @@ void av1_reset_loop_filter_delta(MACROBLOCKD *xd, int num_planes); -void av1_reset_wiener_bank(WienerInfoBank *bank); +void av1_reset_wiener_bank(WienerInfoBank *bank, int chroma); void av1_add_to_wiener_bank(WienerInfoBank *bank, const WienerInfo *info); WienerInfo *av1_ref_from_wiener_bank(WienerInfoBank *bank, int ndx); const WienerInfo *av1_constref_from_wiener_bank(const WienerInfoBank *bank, int ndx); void av1_upd_to_wiener_bank(WienerInfoBank *bank, int ndx, const WienerInfo *info); -void av1_get_from_wiener_bank(WienerInfoBank *bank, int ndx, WienerInfo *info); void av1_reset_sgrproj_bank(SgrprojInfoBank *bank); void av1_add_to_sgrproj_bank(SgrprojInfoBank *bank, const SgrprojInfo *info); @@ -3006,14 +3181,17 @@ int ndx); void av1_upd_to_sgrproj_bank(SgrprojInfoBank *bank, int ndx, const SgrprojInfo *info); -void av1_get_from_sgrproj_bank(SgrprojInfoBank *bank, int ndx, - SgrprojInfo *info); #if CONFIG_WIENER_NONSEP // Resets the bank data structure holding LR_BANK_SIZE nonseparable Wiener // filters. The bank holds a rootating buffer of filters. void av1_reset_wienerns_bank(WienerNonsepInfoBank *bank, int qindex, - int num_classes, int chroma); + int num_classes, int chroma +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + int is_cross +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +); // Adds the nonseparable Wiener filter in info into the bank of rotating // filters. The add is so that once the bank has LR_BANK_SIZE filters the first @@ -3149,6 +3327,7 @@ return 0; } #if CONFIG_EXT_RECUR_PARTITIONS +#if !CONFIG_UNEVEN_4WAY // TODO(urvang): Enable this special case, if we make OBMC work. // TODO(yuec): Enable this case when the alignment issue is fixed. There // will be memory leak in global above_pred_buff and left_pred_buff if @@ -3156,11 +3335,13 @@ if ((mi_row & 0x01) || (mi_col & 0x01)) { return 0; } +#endif // !CONFIG_UNEVEN_4WAY #else assert(!(mi_row & 0x01) && !(mi_col & 0x01)); +#endif // CONFIG_EXT_RECUR_PARTITIONS (void)mi_row; (void)mi_col; -#endif // CONFIG_EXT_RECUR_PARTITIONS + return 1; } @@ -3276,10 +3457,10 @@ [PALETTE_COLORS]; /* clang-format on */ -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS typedef aom_cdf_prob (*IdentityRowCdf)[CDF_SIZE(2)]; typedef const int (*IdentityRowCost)[PALETTE_ROW_FLAG_CONTEXTS][2]; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS typedef struct { int rows; @@ -3290,10 +3471,10 @@ uint8_t *color_map; MapCdf map_cdf; ColorCost color_cost; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS IdentityRowCdf identity_row_cdf; IdentityRowCost identity_row_cost; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS } Av1ColorMapParam; static INLINE int is_nontrans_global_motion(const MACROBLOCKD *xd, @@ -3337,6 +3518,29 @@ } #endif // CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_CWP +// check whether compound weighted prediction can be allowed +static INLINE int is_cwp_allowed(const MB_MODE_INFO *mbmi) { +#if CONFIG_REFINEMV + if (mbmi->refinemv_flag) return 0; +#endif // CONFIG_REFINEMV + + if (mbmi->skip_mode) return 1; + int use_cwp = has_second_ref(mbmi) && mbmi->mode < NEAR_NEARMV_OPTFLOW && + mbmi->interinter_comp.type == COMPOUND_AVERAGE && + mbmi->motion_mode == SIMPLE_TRANSLATION; + use_cwp &= + (mbmi->mode == NEAR_NEARMV || is_joint_mvd_coding_mode(mbmi->mode)); + use_cwp &= (mbmi->jmvd_scale_mode == 0); + return use_cwp; +} +// Return the index for compound weighted prediction +static INLINE int8_t get_cwp_idx(const MB_MODE_INFO *mbmi) { + assert(mbmi->cwp_idx <= CWP_MAX && mbmi->cwp_idx >= CWP_MIN); + return mbmi->cwp_idx; +} +#endif + /*!\endcond */ #ifdef __cplusplus
diff --git a/av1/common/cfl.c b/av1/common/cfl.c index b244669..7f1f66f 100644 --- a/av1/common/cfl.c +++ b/av1/common/cfl.c
@@ -218,17 +218,19 @@ for (int i = 0; i < width; i += 2) { const int bot = i + input_stride; #if CONFIG_ADAPTIVE_DS_FILTER -#if DS_FRAME_LEVEL - const int filter_type = cm->features.ds_filter_type; -#else const int filter_type = cm->seq_params.enable_cfl_ds_filter; -#endif // DS_FRAME_LEVEL if (filter_type == 1) { output_q3[i >> 1] = input[AOMMAX(0, i - 1)] + 2 * input[i] + input[i + 1] + input[bot + AOMMAX(-1, -i)] + 2 * input[bot] + input[bot + 1]; } else if (filter_type == 2) { +#if CONFIG_CFL_IMPROVEMENTS + const int top = i - input_stride; + output_q3[i >> 1] = input[AOMMAX(0, i - 1)] + 4 * input[i] + + input[i + 1] + input[top] + input[bot]; +#else output_q3[i >> 1] = input[i] * 8; +#endif // CONFIG_CFL_IMPROVEMENTS } else { output_q3[i >> 1] = (input[i] + input[i + 1] + input[bot] + input[bot + 1] + 2) << 1; @@ -244,6 +246,25 @@ #endif #endif // CONFIG_ADAPTIVE_DS_FILTER } +#if CONFIG_ADPTIVE_DS_422 + } else if (sub_x) { + uint16_t *input = dst - input_stride; + for (int i = 0; i < width; i += 2) { +#if CONFIG_ADAPTIVE_DS_FILTER + const int filter_type = cm->seq_params.enable_cfl_ds_filter; + if (filter_type == 1) { + output_q3[i >> 1] = + (input[AOMMAX(0, i - 1)] + 2 * input[i] + input[i + 1]) << 1; + } else if (filter_type == 2) { + output_q3[i >> 1] = input[i] << 3; + } else { + output_q3[i >> 1] = (input[i] + input[i + 1]) << 2; + } +#else + output_q3[i >> 1] = input[i] << 3; +#endif // CONFIG_ADAPTIVE_DS_FILTER + } +#endif // CONFIG_ADPTIVE_DS_422 } else if (sub_y) { uint16_t *input = dst - 2 * input_stride; for (int i = 0; i < width; ++i) { @@ -277,16 +298,18 @@ for (int j = 0; j < height; j += 2) { const int bot = input_stride; #if CONFIG_ADAPTIVE_DS_FILTER -#if DS_FRAME_LEVEL - const int filter_type = cm->features.ds_filter_type; -#else const int filter_type = cm->seq_params.enable_cfl_ds_filter; -#endif // DS_FRAME_LEVEL if (filter_type == 1) { output_q3[j >> 1] = input[-1] + 2 * input[0] + input[1] + input[bot - 1] + 2 * input[bot] + input[bot + 1]; } else if (filter_type == 2) { +#if CONFIG_CFL_IMPROVEMENTS + const int top = (j == 0) ? 0 : (0 - input_stride); + output_q3[j >> 1] = + input[-1] + 4 * input[0] + input[1] + input[top] + input[bot]; +#else output_q3[j >> 1] = input[0] * 8; +#endif // CONFIG_CFL_IMPROVEMENTS } else { output_q3[j >> 1] = (input[0] + input[1] + input[bot] + input[bot + 1]) << 1; @@ -302,6 +325,25 @@ #endif // CONFIG_ADAPTIVE_DS_FILTER input += input_stride * 2; } +#if CONFIG_ADPTIVE_DS_422 + } else if (sub_x) { + uint16_t *input = dst - 2; + for (int j = 0; j < height; ++j) { +#if CONFIG_ADAPTIVE_DS_FILTER + const int filter_type = cm->seq_params.enable_cfl_ds_filter; + if (filter_type == 1) { + output_q3[j] = (input[-1] + 2 * input[0] + input[1]) << 1; + } else if (filter_type == 2) { + output_q3[j] = input[0] << 3; + } else { + output_q3[j] = (input[0] + input[1]) << 2; + } +#else + output_q3[j] = input[0] << 3; +#endif // CONFIG_ADAPTIVE_DS_FILTER + input += input_stride; + } +#endif // CONFIG_ADPTIVE_DS_422 } else if (sub_y) { uint16_t *input = dst - 1; for (int j = 0; j < height; ++j) { @@ -609,7 +651,14 @@ int height) { for (int j = 0; j < height; j += 2) { for (int i = 0; i < width; i += 2) { +#if CONFIG_CFL_IMPROVEMENTS + const int top = (j == 0) ? i : (i - input_stride); + const int bot = i + input_stride; + output_q3[i >> 1] = input[AOMMAX(0, i - 1)] + 4 * input[i] + + input[i + 1] + input[top] + input[bot]; +#else output_q3[i >> 1] = input[i] * 8; +#endif // CONFIG_CFL_IMPROVEMENTS } input += input_stride << 1; output_q3 += CFL_BUF_LINE; @@ -631,6 +680,45 @@ } } +#if CONFIG_ADPTIVE_DS_422 +#if CONFIG_ADAPTIVE_DS_FILTER +void cfl_adaptive_luma_subsampling_422_hbd_c(const uint16_t *input, + int input_stride, + uint16_t *output_q3, int width, + int height, int filter_type) { + assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE); + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i += 2) { + if (filter_type == 1) { + output_q3[i >> 1] = + (input[AOMMAX(0, i - 1)] + 2 * input[i] + input[i + 1]) << 1; + } else if (filter_type == 2) { + output_q3[i >> 1] = (input[i]) << 3; + } else { + output_q3[i >> 1] = (input[i] + input[i + 1]) << 2; + } + } + input += input_stride; + output_q3 += CFL_BUF_LINE; + } +} +#else +void cfl_luma_subsampling_422_hbd_colocated(const uint16_t *input, + int input_stride, + uint16_t *output_q3, int width, + int height) { + assert((height - 1) * CFL_BUF_LINE + width <= CFL_BUF_SQUARE); + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i += 2) { + output_q3[i >> 1] = (input[i]) << 3; + } + input += input_stride; + output_q3 += CFL_BUF_LINE; + } +} +#endif // CONFIG_ADAPTIVE_DS_FILTER +#endif // CONFIG_ADPTIVE_DS_422 + static void cfl_luma_subsampling_444_hbd_c(const uint16_t *input, int input_stride, uint16_t *output_q3, int width, @@ -706,7 +794,14 @@ uint16_t *recon_buf_q3 = cfl->recon_buf_q3 + (store_row * CFL_BUF_LINE + store_col); #if CONFIG_ADAPTIVE_DS_FILTER +#if CONFIG_ADPTIVE_DS_422 + if (sub_x == 1 && sub_y == 0) { + cfl_adaptive_luma_subsampling_422_hbd_c(input, input_stride, recon_buf_q3, + width, height, filter_type); + } else if (filter_type == 1) { +#else if (filter_type == 1) { +#endif // CONFIG_ADPTIVE_DS_422 if (sub_x && sub_y) cfl_luma_subsampling_420_hbd_121_c(input, input_stride, recon_buf_q3, width, height); @@ -729,6 +824,11 @@ if (sub_x && sub_y) cfl_luma_subsampling_420_hbd_121_c(input, input_stride, recon_buf_q3, width, height); +#if CONFIG_ADPTIVE_DS_422 + else if (sub_x == 1 && sub_y == 0) + cfl_luma_subsampling_422_hbd_colocated(input, input_stride, recon_buf_q3, + width, height); +#endif // CONFIG_ADPTIVE_DS_422 else #endif cfl_subsampling_hbd(tx_size, sub_x, sub_y)(input, input_stride,
diff --git a/av1/common/cfl.h b/av1/common/cfl.h index 53cfc7c..2115a19 100644 --- a/av1/common/cfl.h +++ b/av1/common/cfl.h
@@ -96,13 +96,27 @@ int input_stride, uint16_t *output_q3, int width, int height); +#if CONFIG_ADPTIVE_DS_422 +void cfl_adaptive_luma_subsampling_422_hbd_c(const uint16_t *input, + int input_stride, + uint16_t *output_q3, int width, + int height, int filter_type); +#endif // CONFIG_ADPTIVE_DS_422 #endif // CONFIG_ADAPTIVE_DS_FILTER +#if CONFIG_ADPTIVE_DS_422 && !CONFIG_ADAPTIVE_DS_FILTER +void cfl_luma_subsampling_422_hbd_colocated(const uint16_t *input, + int input_stride, + uint16_t *output_q3, int width, + int height); +#endif // CONFIG_ADPTIVE_DS_422 && !CONFIG_ADAPTIVE_DS_FILTER + #if CONFIG_IMPROVED_CFL // 121 subsample filter void cfl_luma_subsampling_420_hbd_121_c(const uint16_t *input, int input_stride, uint16_t *output_q3, int width, int height); + // Get neighbor luma reconstruction pixels void cfl_implicit_fetch_neighbor_luma(const AV1_COMMON *cm, MACROBLOCKD *const xd, int row, int col,
diff --git a/av1/common/common_data.h b/av1/common/common_data.h index ada0d89..6b08776 100644 --- a/av1/common/common_data.h +++ b/av1/common/common_data.h
@@ -68,7 +68,19 @@ }; static const uint8_t fsc_bsize_groups[BLOCK_SIZES_ALL] = { - 0, 1, 1, 2, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 5, 5, 5, 5 +#if CONFIG_ATC_DCTX_ALIGNED + 0, 1, 1, 2, 3, 3, 4, 5, 5, 5, 6, 6, 6, 6, 6, 6, +#if CONFIG_BLOCK_256 + 6, 6, 6, +#endif // CONFIG_BLOCK_256 + 3, 3, 4, 4, 6, 6 +#else + 0, 1, 1, 2, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, +#if CONFIG_BLOCK_256 + 5, 5, 5, +#endif // CONFIG_BLOCK_256 + 3, 3, 5, 5, 5, 5 +#endif // CONFIG_ATC_DCTX_ALIGNED }; static const uint8_t num_pels_log2_lookup[BLOCK_SIZES_ALL] = { @@ -76,11 +88,19 @@ 13, 13, 14, 15, 15, 16, 6, 6, 8, 8, 10, 10 }; +#if CONFIG_CWP +// Supported weighting factor for compound weighted prediction +static const int8_t cwp_weighting_factor[2][MAX_CWP_NUM] = { + { 8, 12, 4, 10, 6 }, + { 8, 12, 4, 20, -4 }, +}; +#endif // CONFIG_CWP + #if CONFIG_EXT_RECUR_PARTITIONS /* clang-format off */ // This table covers all square blocks and 1:2/2:1 rectangular blocks static const BLOCK_SIZE - subsize_lookup[EXT_PARTITION_TYPES + 1][BLOCK_SIZES_ALL] = { { + subsize_lookup[ALL_PARTITION_TYPES][BLOCK_SIZES_ALL] = { { // PARTITION_NONE BLOCK_4X4, // 4 BLOCK_4X8, BLOCK_8X4, BLOCK_8X8, // 8 @@ -140,6 +160,52 @@ BLOCK_INVALID, BLOCK_4X4, // 4,16 BLOCK_INVALID, BLOCK_8X8, // 8,32 BLOCK_INVALID, BLOCK_16X16, // 32,64 +#if CONFIG_UNEVEN_4WAY + }, { // PARTITION_HORZ_4A + BLOCK_INVALID, // 4 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 16 + BLOCK_16X4, BLOCK_INVALID, BLOCK_INVALID, // 32 + BLOCK_32X8, BLOCK_INVALID, BLOCK_INVALID, // 64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 256 + BLOCK_INVALID, BLOCK_INVALID, // 4,16 + BLOCK_INVALID, BLOCK_INVALID, // 8,32 + BLOCK_INVALID, BLOCK_INVALID, // 32,64 + }, { // PARTITION_HORZ_4B + BLOCK_INVALID, // 4 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 16 + BLOCK_16X4, BLOCK_INVALID, BLOCK_INVALID, // 32 + BLOCK_32X8, BLOCK_INVALID, BLOCK_INVALID, // 64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 256 + BLOCK_INVALID, BLOCK_INVALID, // 4,16 + BLOCK_INVALID, BLOCK_INVALID, // 8,32 + BLOCK_INVALID, BLOCK_INVALID, // 32,64 + }, { // PARTITION_VERT_4A + BLOCK_INVALID, // 4 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 16 + BLOCK_INVALID, BLOCK_4X16, BLOCK_INVALID, // 32 + BLOCK_INVALID, BLOCK_8X32, BLOCK_INVALID, // 64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 256 + BLOCK_INVALID, BLOCK_INVALID, // 4,16 + BLOCK_INVALID, BLOCK_INVALID, // 8,32 + BLOCK_INVALID, BLOCK_INVALID, // 32,64 + }, { // PARTITION_VERT_4B + BLOCK_INVALID, // 4 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 8 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 16 + BLOCK_INVALID, BLOCK_4X16, BLOCK_INVALID, // 32 + BLOCK_INVALID, BLOCK_8X32, BLOCK_INVALID, // 64 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 128 + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, // 256 + BLOCK_INVALID, BLOCK_INVALID, // 4,16 + BLOCK_INVALID, BLOCK_INVALID, // 8,32 + BLOCK_INVALID, BLOCK_INVALID, // 32,64 +#endif // CONFIG_UNEVEN_4WAY }, { // PARTITION_SPLIT BLOCK_INVALID, // 4 @@ -160,16 +226,52 @@ BLOCK_SIZE bsize, PARTITION_TYPE luma_part, int ssx, int ssy) { const int bh_chr = block_size_high[bsize] >> ssy; const int bw_chr = block_size_wide[bsize] >> ssx; + assert(bh_chr >= 16 && bw_chr >= 16 && + "Current implementation cannot handle SDP for sub 16x16 blocks!"); switch (luma_part) { case PARTITION_NONE: return PARTITION_NONE; case PARTITION_HORZ: return (bh_chr < 8) ? PARTITION_NONE : PARTITION_HORZ; + case PARTITION_VERT: return (bw_chr < 8) ? PARTITION_NONE : PARTITION_VERT; +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + if (bh_chr >= 32) { + return PARTITION_HORZ_4A; + } else if (bh_chr >= 8) { + return PARTITION_HORZ; + } else { + return PARTITION_NONE; + } + case PARTITION_HORZ_4B: + if (bh_chr >= 32) { + return PARTITION_HORZ_4B; + } else if (bh_chr >= 8) { + return PARTITION_HORZ; + } else { + return PARTITION_NONE; + } + case PARTITION_VERT_4A: + if (bw_chr >= 32) { + return PARTITION_VERT_4A; + } else if (bw_chr >= 8) { + return PARTITION_VERT; + } else { + return PARTITION_NONE; + } + case PARTITION_VERT_4B: + if (bw_chr >= 32) { + return PARTITION_VERT_4B; + } else if (bw_chr >= 8) { + return PARTITION_VERT; + } else { + return PARTITION_NONE; + } +#endif // CONFIG_UNEVEN_4WAY case PARTITION_HORZ_3: if (bh_chr >= 16) return PARTITION_HORZ_3; else return (bh_chr < 8) ? PARTITION_NONE : PARTITION_HORZ; - case PARTITION_VERT: return (bw_chr < 8) ? PARTITION_NONE : PARTITION_VERT; case PARTITION_VERT_3: if (bw_chr >= 16) return PARTITION_VERT_3; @@ -632,17 +734,17 @@ { 13, 3 }, }; -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC // Mapping of mode dependent TX based on intra modes. static const int av1_md_class[INTRA_MODES] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, }; // Mapping between mode dependent TX size groups based on allowed TX sizes. -static const int av1_size_class[MODE_DEPTX_TXSIZES] = { - 0, 1, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 1, 1, 2, 2, +static const int av1_size_class[TX_SIZES_ALL] = { + 0, 1, 2, 3, 3, 0, 0, 1, 1, 3, 3, 3, 3, 0, 0, 3, 3, 3, 3, }; -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC static AOM_INLINE bool is_bsize_geq(BLOCK_SIZE bsize1, BLOCK_SIZE bsize2) { if (bsize1 == BLOCK_INVALID || bsize2 == BLOCK_INVALID) {
diff --git a/av1/common/entropy.c b/av1/common/entropy.c index 2016bc1..bf459eb 100644 --- a/av1/common/entropy.c +++ b/av1/common/entropy.c
@@ -45,13 +45,13 @@ av1_copy(cm->fc->v_dc_sign_cdf, av1_default_v_dc_sign_cdfs[index]); av1_copy(cm->fc->v_ac_sign_cdf, av1_default_v_ac_sign_cdfs[index]); #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC av1_copy(cm->fc->coeff_base_lf_cdf, av1_default_coeff_base_lf_multi_cdfs[index]); av1_copy(cm->fc->coeff_base_lf_eob_cdf, av1_default_coeff_base_lf_eob_multi_cdfs[index]); av1_copy(cm->fc->coeff_br_lf_cdf, av1_default_coeff_lps_lf_multi_cdfs[index]); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC av1_copy(cm->fc->coeff_br_cdf, av1_default_coeff_lps_multi_cdfs[index]); av1_copy(cm->fc->coeff_base_cdf, av1_default_coeff_base_multi_cdfs[index]); av1_copy(cm->fc->idtx_sign_cdf, av1_default_idtx_sign_cdfs[index]); @@ -72,6 +72,10 @@ av1_copy(cm->fc->coeff_base_ph_cdf, av1_default_coeff_base_ph_cdfs[index]); av1_copy(cm->fc->coeff_br_ph_cdf, av1_default_coeff_br_ph_cdfs[index]); #endif // CONFIG_PAR_HIDING +#if CONFIG_ATC_DCTX_ALIGNED + av1_copy(cm->fc->coeff_base_bob_cdf, + av1_default_coeff_base_bob_multi_cdfs[index]); +#endif // CONFIG_ATC_DCTX_ALIGNED } static AOM_INLINE void reset_cdf_symbol_counter(aom_cdf_prob *cdf_ptr, @@ -129,19 +133,22 @@ RESET_CDF_COUNTER(fc->v_dc_sign_cdf, 2); RESET_CDF_COUNTER(fc->v_ac_sign_cdf, 2); #endif // CONFIG_CONTEXT_DERIVATION - RESET_CDF_COUNTER(fc->eob_flag_cdf16, 5); - RESET_CDF_COUNTER(fc->eob_flag_cdf32, 6); - RESET_CDF_COUNTER(fc->eob_flag_cdf64, 7); - RESET_CDF_COUNTER(fc->eob_flag_cdf128, 8); - RESET_CDF_COUNTER(fc->eob_flag_cdf256, 9); - RESET_CDF_COUNTER(fc->eob_flag_cdf512, 10); - RESET_CDF_COUNTER(fc->eob_flag_cdf1024, 11); + RESET_CDF_COUNTER(fc->eob_flag_cdf16, EOB_MAX_SYMS - 6); + RESET_CDF_COUNTER(fc->eob_flag_cdf32, EOB_MAX_SYMS - 5); + RESET_CDF_COUNTER(fc->eob_flag_cdf64, EOB_MAX_SYMS - 4); + RESET_CDF_COUNTER(fc->eob_flag_cdf128, EOB_MAX_SYMS - 3); + RESET_CDF_COUNTER(fc->eob_flag_cdf256, EOB_MAX_SYMS - 2); + RESET_CDF_COUNTER(fc->eob_flag_cdf512, EOB_MAX_SYMS - 1); + RESET_CDF_COUNTER(fc->eob_flag_cdf1024, EOB_MAX_SYMS); RESET_CDF_COUNTER(fc->coeff_base_eob_cdf, 3); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC_DCTX_ALIGNED + RESET_CDF_COUNTER(fc->coeff_base_bob_cdf, 3); +#endif // CONFIG_ATC_DCTX_ALIGNED +#if CONFIG_ATC RESET_CDF_COUNTER(fc->coeff_base_lf_cdf, LF_BASE_SYMBOLS); RESET_CDF_COUNTER(fc->coeff_base_lf_eob_cdf, LF_BASE_SYMBOLS - 1); RESET_CDF_COUNTER(fc->coeff_br_lf_cdf, BR_CDF_SIZE); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC RESET_CDF_COUNTER(fc->coeff_base_cdf, 4); RESET_CDF_COUNTER(fc->idtx_sign_cdf, 2); RESET_CDF_COUNTER(fc->coeff_base_cdf_idtx, 4); @@ -154,15 +161,19 @@ RESET_CDF_COUNTER(fc->drl_cdf[0], 2); RESET_CDF_COUNTER(fc->drl_cdf[1], 2); RESET_CDF_COUNTER(fc->drl_cdf[2], 2); -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT RESET_CDF_COUNTER(fc->skip_drl_cdf, 2); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_OPTFLOW_REFINEMENT RESET_CDF_COUNTER(fc->use_optflow_cdf, 2); RESET_CDF_COUNTER(fc->inter_compound_mode_cdf, INTER_COMPOUND_REF_TYPES); #else RESET_CDF_COUNTER(fc->inter_compound_mode_cdf, INTER_COMPOUND_MODES); #endif // CONFIG_OPTFLOW_REFINEMENT + +#if CONFIG_CWP + RESET_CDF_COUNTER(fc->cwp_idx_cdf, 2); +#endif #if CONFIG_IMPROVED_JMVD RESET_CDF_COUNTER(fc->jmvd_scale_mode_cdf, JOINT_NEWMV_SCALE_FACTOR_CNT); RESET_CDF_COUNTER(fc->jmvd_amvd_scale_mode_cdf, JOINT_AMVD_SCALE_FACTOR_CNT); @@ -180,6 +191,11 @@ RESET_CDF_COUNTER(fc->interintra_cdf, 2); RESET_CDF_COUNTER(fc->wedge_interintra_cdf, 2); RESET_CDF_COUNTER(fc->interintra_mode_cdf, INTERINTRA_MODES); + +#if CONFIG_REFINEMV + RESET_CDF_COUNTER(fc->refinemv_flag_cdf, REFINEMV_NUM_MODES); +#endif // CONFIG_REFINEMV + #if CONFIG_EXTENDED_WARP_PREDICTION RESET_CDF_COUNTER(fc->obmc_cdf, 2); RESET_CDF_COUNTER(fc->warped_causal_cdf, 2); @@ -191,6 +207,9 @@ RESET_CDF_COUNTER(fc->warp_ref_idx_cdf[0], 2); RESET_CDF_COUNTER(fc->warp_ref_idx_cdf[1], 2); RESET_CDF_COUNTER(fc->warp_ref_idx_cdf[2], 2); +#if CONFIG_CWG_D067_IMPROVED_WARP + RESET_CDF_COUNTER(fc->warpmv_with_mvd_flag_cdf, 2); +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST RESET_CDF_COUNTER(fc->warp_delta_param_cdf, WARP_DELTA_NUM_SYMBOLS); RESET_CDF_COUNTER(fc->warp_extend_cdf, 2); @@ -206,10 +225,10 @@ #endif // CONFIG_TIP RESET_CDF_COUNTER(fc->palette_y_size_cdf, PALETTE_SIZES); RESET_CDF_COUNTER(fc->palette_uv_size_cdf, PALETTE_SIZES); -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS RESET_CDF_COUNTER(fc->identity_row_cdf_y, 2); RESET_CDF_COUNTER(fc->identity_row_cdf_uv, 2); -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS for (int j = 0; j < PALETTE_SIZES; j++) { int nsymbs = j + PALETTE_MIN_SIZE; RESET_CDF_COUNTER_STRIDE(fc->palette_y_color_index_cdf[j], nsymbs, @@ -234,20 +253,20 @@ #endif // CONFIG_NEW_TX_PARTITION RESET_CDF_COUNTER(fc->comp_group_idx_cdf, 2); RESET_CDF_COUNTER(fc->skip_mode_cdfs, 2); -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT RESET_CDF_COUNTER(fc->intra_inter_cdf[0], 2); RESET_CDF_COUNTER(fc->intra_inter_cdf[1], 2); #else RESET_CDF_COUNTER(fc->intra_inter_cdf, 2); -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT RESET_CDF_COUNTER(fc->skip_txfm_cdfs, 2); reset_nmv_counter(&fc->nmvc); reset_nmv_counter(&fc->ndvc); RESET_CDF_COUNTER(fc->intrabc_cdf, 2); -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT RESET_CDF_COUNTER(fc->intrabc_mode_cdf, 2); RESET_CDF_COUNTER(fc->intrabc_drl_idx_cdf, 2); -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT RESET_CDF_COUNTER(fc->seg.tree_cdf, MAX_SEGMENTS); RESET_CDF_COUNTER(fc->seg.pred_cdf, 2); RESET_CDF_COUNTER(fc->seg.spatial_pred_seg_cdf, MAX_SEGMENTS); @@ -294,6 +313,7 @@ RESET_CDF_COUNTER_STRIDE(fc->uv_mode_cdf[0], UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES)); RESET_CDF_COUNTER(fc->uv_mode_cdf[1], UV_INTRA_MODES); + #if CONFIG_EXT_RECUR_PARTITIONS for (int plane_index = 0; plane_index < PARTITION_STRUCTURE_NUM; plane_index++) { @@ -318,6 +338,13 @@ for (RECT_PART_TYPE rect = 0; rect < NUM_RECT_PARTS; rect++) { for (int i = 0; i < PARTITION_CONTEXTS; i++) { RESET_CDF_COUNTER(fc->do_ext_partition_cdf[plane_index][rect][i], 2); +#if CONFIG_UNEVEN_4WAY + RESET_CDF_COUNTER( + fc->do_uneven_4way_partition_cdf[plane_index][rect][i], 2); + RESET_CDF_COUNTER( + fc->uneven_4way_partition_type_cdf[plane_index][rect][i], + NUM_UNEVEN_4WAY_PARTS); +#endif // CONFIG_UNEVEN_4WAY } } } @@ -361,10 +388,10 @@ } RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[1], INTRA_TX_SET1, CDF_SIZE(TX_TYPES)); -#if !(CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET) +#if !(CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET) RESET_CDF_COUNTER_STRIDE(fc->intra_ext_tx_cdf[2], INTRA_TX_SET2, CDF_SIZE(TX_TYPES)); -#endif // !(CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET) +#endif // !(CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET) RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[1], 16, CDF_SIZE(TX_TYPES)); RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[2], 12, CDF_SIZE(TX_TYPES)); RESET_CDF_COUNTER_STRIDE(fc->inter_ext_tx_cdf[3], 2, CDF_SIZE(TX_TYPES));
diff --git a/av1/common/entropy.h b/av1/common/entropy.h index e6319aa..8efd218 100644 --- a/av1/common/entropy.h +++ b/av1/common/entropy.h
@@ -41,13 +41,18 @@ #define IDTX_LEVEL_CONTEXTS 14 #define EOB_COEF_CONTEXTS 9 +#if CONFIG_ATC_DCTX_ALIGNED +#define SIG_COEF_CONTEXTS_BOB 3 +#endif // CONFIG_ATC_DCTX_ALIGNED + +#define EOB_MAX_SYMS 11 #if CONFIG_PAR_HIDING #define COEFF_BASE_PH_CONTEXTS 5 #define COEFF_BR_PH_CONTEXTS 7 #endif // CONFIG_PAR_HIDING -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC // Number of coefficient coding contexts for the low-frequency region // for 2D and 1D transforms #define LF_SIG_COEF_CONTEXTS_2D 21 @@ -79,7 +84,7 @@ #define SIG_COEF_CONTEXTS_1D 16 #define SIG_COEF_CONTEXTS_EOB 4 #define SIG_COEF_CONTEXTS (SIG_COEF_CONTEXTS_2D + SIG_COEF_CONTEXTS_1D) -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #define COEFF_BASE_CONTEXTS (SIG_COEF_CONTEXTS) #define DC_SIGN_CONTEXTS 3 @@ -132,23 +137,28 @@ switch (size) { case 4: return ctx[0] != 0; case 8: -#if CONFIG_H_PARTITION +#if CONFIG_EXT_RECUR_PARTITIONS return ctx[0] != 0 || ctx[1] != 0; #else return !!*(const uint16_t *)ctx; -#endif // CONFIG_H_PARTITION +#endif // CONFIG_EXT_RECUR_PARTITIONS case 16: -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + return ctx[0] != 0 || ctx[1] != 0 || ctx[2] != 0 || ctx[3] != 0; +#elif CONFIG_EXT_RECUR_PARTITIONS return !!(*(const uint16_t *)ctx | *(const uint16_t *)(ctx + 2)); #else return !!*(const uint32_t *)ctx; -#endif // CONFIG_H_PARTITION +#endif // CONFIG_UNEVEN_4WAY case 32: -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + return !!(*(const uint16_t *)ctx | *(const uint16_t *)(ctx + 2) | + *(const uint16_t *)(ctx + 4) | *(const uint16_t *)(ctx + 6)); +#elif CONFIG_EXT_RECUR_PARTITIONS return !!(*(const uint32_t *)ctx | *(const uint32_t *)(ctx + 4)); #else - return !*(const uint64_t *)ctx; -#endif // CONFIG_H_PARTITION + return !!*(const uint64_t *)ctx; +#endif // CONFIG_UNEVEN_4WAY case 64: return !!(*(const uint64_t *)ctx | *(const uint64_t *)(ctx + 8)); default: assert(0 && "Invalid transform 1d size."); break; }
diff --git a/av1/common/entropymode.c b/av1/common/entropymode.c index abfc9de..693ed11 100644 --- a/av1/common/entropymode.c +++ b/av1/common/entropymode.c
@@ -77,11 +77,48 @@ }; #endif +#if CONFIG_EXT_DIR +static const aom_cdf_prob + default_mrl_index_cdf[MRL_INDEX_CONTEXTS][CDF_SIZE(MRL_LINE_NUMBER)] = { + { AOM_CDF4(27852, 29491, 31129) }, + { AOM_CDF4(23920, 27852, 30474) }, + { AOM_CDF4(20316, 26542, 29818) }, + }; +#else static const aom_cdf_prob default_mrl_index_cdf[CDF_SIZE(MRL_LINE_NUMBER)] = { AOM_CDF4(24756, 29049, 31092) }; +#endif // CONFIG_EXT_DIR #if CONFIG_NEW_CONTEXT_MODELING +#if CONFIG_ATC_DCTX_ALIGNED +static const aom_cdf_prob + default_fsc_mode_cdf[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS] + [CDF_SIZE(FSC_MODES)] = { { { AOM_CDF2(29360) }, + { AOM_CDF2(31501) }, + { AOM_CDF2(32278) }, + { AOM_CDF2(32371) }, + { AOM_CDF2(32560) }, + { AOM_CDF2(32531) } }, + { { AOM_CDF2(24973) }, + { AOM_CDF2(24385) }, + { AOM_CDF2(24145) }, + { AOM_CDF2(26258) }, + { AOM_CDF2(21038) }, + { AOM_CDF2(15313) } }, + { { AOM_CDF2(20868) }, + { AOM_CDF2(16117) }, + { AOM_CDF2(12254) }, + { AOM_CDF2(14424) }, + { AOM_CDF2(5350) }, + { AOM_CDF2(2348) } }, + { { AOM_CDF2(31265) }, + { AOM_CDF2(31284) }, + { AOM_CDF2(32247) }, + { AOM_CDF2(32253) }, + { AOM_CDF2(32560) }, + { AOM_CDF2(32533) } } }; +#else static const aom_cdf_prob default_fsc_mode_cdf[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS] [CDF_SIZE(FSC_MODES)] = { { { AOM_CDF2(29802) }, @@ -104,6 +141,7 @@ { AOM_CDF2(32027) }, { AOM_CDF2(32272) }, { AOM_CDF2(32317) } } }; +#endif // CONFIG_ATC_DCTX_ALIGNED #else static const aom_cdf_prob default_fsc_mode_cdf[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS] @@ -349,7 +387,6 @@ } }; - static aom_cdf_prob default_rect_type_cdf[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS][CDF_SIZE(2)] = { // Luma @@ -440,6 +477,9 @@ } }; +// Note: For the partition CDFs below, most entries are unused. An optimized +// implementation could create smaller arrays with only used values + some +// mapping tables. static aom_cdf_prob default_do_ext_partition_cdf [PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_CONTEXTS] [CDF_SIZE(2)] = { @@ -623,7 +663,6 @@ } }; - static aom_cdf_prob default_do_square_split_cdf[PARTITION_STRUCTURE_NUM][SQUARE_SPLIT_CONTEXTS][CDF_SIZE(2)] = { // Luma @@ -642,6 +681,373 @@ }, }; +#if CONFIG_UNEVEN_4WAY +static aom_cdf_prob default_do_uneven_4way_partition_cdf + [PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_CONTEXTS] + [CDF_SIZE(2)] = { + // Luma + { + // HORZ + { + // BLOCK_4X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_4X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X32, + { AOM_CDF2(23888) }, { AOM_CDF2(26675) }, { AOM_CDF2(18213) }, { AOM_CDF2(21839) }, + // BLOCK_32X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X64, + { AOM_CDF2(18156) }, { AOM_CDF2(22434) }, { AOM_CDF2(17065) }, { AOM_CDF2(23048) }, + // BLOCK_64X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#if CONFIG_BLOCK_256 + // BLOCK_128X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#endif // CONFIG_BLOCK_256 + }, + // VERT + { + // BLOCK_4X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_4X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X16, + { AOM_CDF2(18858) }, { AOM_CDF2(14975) }, { AOM_CDF2(21057) }, { AOM_CDF2(19369) }, + // BLOCK_32X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X32, + { AOM_CDF2(12384) }, { AOM_CDF2(11622) }, { AOM_CDF2(17504) }, { AOM_CDF2(17608) }, + // BLOCK_64X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#if CONFIG_BLOCK_256 + // BLOCK_128X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#endif // CONFIG_BLOCK_256 + } + }, + // Chroma + { + // HORZ + { + // BLOCK_4X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_4X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X64, + { AOM_CDF2(16705) }, { AOM_CDF2(20904) }, { AOM_CDF2(18601) }, { AOM_CDF2(22088) }, + // BLOCK_64X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#if CONFIG_BLOCK_256 + // BLOCK_128X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#endif // CONFIG_BLOCK_256 + }, + // VERT + { + // BLOCK_4X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_4X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X32, + { AOM_CDF2(15452) }, { AOM_CDF2(15654) }, { AOM_CDF2(20986) }, { AOM_CDF2(20924) }, + // BLOCK_64X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#if CONFIG_BLOCK_256 + // BLOCK_128X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#endif // CONFIG_BLOCK_256 + } + }, + }; + +static aom_cdf_prob default_uneven_4way_partition_type_cdf + [PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_CONTEXTS] + [CDF_SIZE(NUM_UNEVEN_4WAY_PARTS)] = { + // Luma + { + // HORZ + { + // BLOCK_4X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_4X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X32, + { AOM_CDF2(20372) }, { AOM_CDF2(19885) }, { AOM_CDF2(20532) }, { AOM_CDF2(18382) }, + // BLOCK_32X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X64, + { AOM_CDF2(20219) }, { AOM_CDF2(19289) }, { AOM_CDF2(18815) }, { AOM_CDF2(21548) }, + // BLOCK_64X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#if CONFIG_BLOCK_256 + // BLOCK_128X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#endif // CONFIG_BLOCK_256 + }, + // VERT + { + // BLOCK_4X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_4X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X16, + { AOM_CDF2(18025) }, { AOM_CDF2(18978) }, { AOM_CDF2(18146) }, { AOM_CDF2(20127) }, + // BLOCK_32X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X32, + { AOM_CDF2(17700) }, { AOM_CDF2(17721) }, { AOM_CDF2(18585) }, { AOM_CDF2(17912) }, + // BLOCK_64X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#if CONFIG_BLOCK_256 + // BLOCK_128X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#endif // CONFIG_BLOCK_256 + } + }, + // Chroma + { + // HORZ + { + // BLOCK_4X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_4X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X64, + { AOM_CDF2(17990) }, { AOM_CDF2(23831) }, { AOM_CDF2(17318) }, { AOM_CDF2(18155) }, + // BLOCK_64X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#if CONFIG_BLOCK_256 + // BLOCK_128X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#endif // CONFIG_BLOCK_256 + }, + // VERT + { + // BLOCK_4X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_4X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X4, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_8X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X8, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_16X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X16, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X32, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_32X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X32, + { AOM_CDF2(15888) }, { AOM_CDF2(18079) }, { AOM_CDF2(21845) }, { AOM_CDF2(18507) }, + // BLOCK_64X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_64X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X64, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_128X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#if CONFIG_BLOCK_256 + // BLOCK_128X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X128, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + // BLOCK_256X256, unused + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, +#endif // CONFIG_BLOCK_256 + } + }, + }; +#endif // CONFIG_UNEVEN_4WAY // clang-format on #else static const aom_cdf_prob @@ -782,7 +1188,7 @@ { 0 }, }, }, -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC { { { AOM_CDF7(3368, 14670, 18533, 22660, 26441, 30407) }, @@ -1034,9 +1440,121 @@ { AOM_CDF4(8192, 16384, 24576) }, }, } -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC }; +#if CONFIG_ATC_DCTX_ALIGNED +static const aom_cdf_prob default_inter_ext_tx_cdf + [EXT_TX_SETS_INTER][EOB_TX_CTXS][EXT_TX_SIZES][CDF_SIZE(TX_TYPES)] = { + { + { + { 0 }, + { 0 }, + { 0 }, + { 0 }, + }, + { + { 0 }, + { 0 }, + { 0 }, + { 0 }, + }, + { + { 0 }, + { 0 }, + { 0 }, + { 0 }, + }, + }, + { + { + { AOM_CDF16(10569, 11484, 12610, 14058, 15880, 17184, 18929, + 19803, 20702, 21995, 22642, 23795, 26269, 28128, + 30321) }, + { AOM_CDF16(2184, 3028, 4033, 5127, 6410, 7400, 8605, 13222, + 15760, 18377, 20510, 22737, 25720, 27841, 30221) }, + { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, + 18432, 20480, 22528, 24576, 26624, 28672, 30720) }, + { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, + 18432, 20480, 22528, 24576, 26624, 28672, 30720) }, + }, + { + { AOM_CDF16(3919, 4527, 5261, 6289, 7251, 8118, 9179, 12234, + 12471, 12730, 12785, 13079, 18477, 21441, 26844) }, + { AOM_CDF16(307, 498, 725, 1194, 1577, 1962, 2378, 26001, 26439, + 26880, 27109, 27393, 29418, 30271, 31374) }, + { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, + 18432, 20480, 22528, 24576, 26624, 28672, 30720) }, + { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, + 18432, 20480, 22528, 24576, 26624, 28672, 30720) }, + }, + { + { AOM_CDF16(18553, 19114, 19866, 21300, 23396, 24613, 26561, + 26686, 26933, 27441, 27579, 27906, 29437, 30176, + 31237) }, + { AOM_CDF16(14114, 15409, 17116, 18125, 19579, 20544, 21927, + 24115, 25337, 26585, 27781, 28994, 29938, 30846, + 31760) }, + { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, + 18432, 20480, 22528, 24576, 26624, 28672, 30720) }, + { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384, + 18432, 20480, 22528, 24576, 26624, 28672, 30720) }, + }, + }, + { + { + { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, + 24576, 27307, 30037) }, + { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, + 24576, 27307, 30037) }, + { AOM_CDF12(847, 1837, 2897, 8379, 12029, 15839, 18755, 21734, + 25244, 27430, 30001) }, + { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, + 24576, 27307, 30037) }, + }, + { + { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, + 24576, 27307, 30037) }, + { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, + 24576, 27307, 30037) }, + { AOM_CDF12(56, 370, 765, 27899, 28744, 29465, 30060, 30562, + 31471, 31806, 32229) }, + { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, + 24576, 27307, 30037) }, + }, + { + { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, + 24576, 27307, 30037) }, + { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, + 24576, 27307, 30037) }, + { AOM_CDF12(25781, 26621, 27994, 28993, 29530, 30097, 30597, + 31182, 31622, 32019, 32396) }, + { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845, + 24576, 27307, 30037) }, + }, + }, + { + { + { AOM_CDF2(16384) }, + { AOM_CDF2(2100) }, + { AOM_CDF2(1066) }, + { AOM_CDF2(938) }, + }, + { + { AOM_CDF2(16384) }, + { AOM_CDF2(37) }, + { AOM_CDF2(15) }, + { AOM_CDF2(12) }, + }, + { + { AOM_CDF2(16384) }, + { AOM_CDF2(29478) }, + { AOM_CDF2(29184) }, + { AOM_CDF2(27781) }, + }, + }, + }; +#else static const aom_cdf_prob default_inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES][CDF_SIZE( TX_TYPES)] = { @@ -1073,6 +1591,7 @@ { AOM_CDF2(748) }, }, }; +#endif // CONFIG_ATC_DCTX_ALIGNED #if CONFIG_CROSS_CHROMA_TX static const aom_cdf_prob @@ -1095,7 +1614,17 @@ static const aom_cdf_prob default_cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)] = { AOM_CDF8(1418, 2123, 13340, 18405, 26972, 28343, 32294) }; - +#if CONFIG_CFL_IMPROVEMENTS +static const aom_cdf_prob + default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = { + { AOM_CDF8(7650, 20740, 31430, 32520, 32700, 32730, 32740) }, + { AOM_CDF8(14400, 23680, 28230, 31270, 32290, 32530, 32640) }, + { AOM_CDF8(11560, 22430, 28510, 31430, 32430, 32610, 32680) }, + { AOM_CDF8(27000, 31430, 32310, 32610, 32730, 32740, 32750) }, + { AOM_CDF8(17320, 26210, 29100, 30820, 31550, 32150, 32430) }, + { AOM_CDF8(14990, 22180, 26430, 28600, 29820, 31200, 31980) } + }; +#else static const aom_cdf_prob default_cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)] = { { AOM_CDF16(7637, 20719, 31401, 32481, 32657, 32688, 32692, 32696, 32700, @@ -1111,7 +1640,7 @@ { AOM_CDF16(14738, 21678, 25779, 27901, 29024, 30302, 30980, 31843, 32144, 32413, 32520, 32594, 32622, 32656, 32660) } }; - +#endif // CONFIG_CFL_IMPROVEMENTS static const aom_cdf_prob default_switchable_interp_cdf[SWITCHABLE_FILTER_CONTEXTS][CDF_SIZE( SWITCHABLE_FILTERS)] = { @@ -1281,6 +1810,21 @@ { AOM_CDF2(16618) }, { AOM_CDF2(14980) }, { AOM_CDF2(15963) } }; #endif // CONFIG_C076_INTER_MOD_CTX + +#if CONFIG_CWP +static const aom_cdf_prob default_cwp_idx_cdf[MAX_CWP_CONTEXTS][MAX_CWP_NUM - 1] + [CDF_SIZE(2)] = { + { { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, + { AOM_CDF2(16384) } }, + { { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, + { AOM_CDF2(16384) } }, + }; +#endif // CONFIG_CWP + #if CONFIG_IMPROVED_JMVD static const aom_cdf_prob default_jmvd_scale_mode_cdf[CDF_SIZE(JOINT_NEWMV_SCALE_FACTOR_CNT)] = { @@ -1292,13 +1836,13 @@ }; #endif // CONFIG_IMPROVED_JMVD -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT static const aom_cdf_prob default_skip_drl_cdf[3][CDF_SIZE(2)] = { { AOM_CDF2(24394) }, { AOM_CDF2(22637) }, { AOM_CDF2(21474) }, }; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_C076_INTER_MOD_CTX #if CONFIG_OPTFLOW_REFINEMENT @@ -1634,6 +2178,20 @@ }; #endif // CONFIG_WARPMV +#if CONFIG_REFINEMV +static const aom_cdf_prob default_refinemv_flag_cdf[NUM_REFINEMV_CTX][CDF_SIZE( + REFINEMV_NUM_MODES)] = { + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } +}; +#endif // CONFIG_REFINEMV + static const aom_cdf_prob default_warp_delta_cdf[BLOCK_SIZES_ALL][CDF_SIZE( 2)] = { { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(4015) }, { AOM_CDF2(5407) }, { AOM_CDF2(4988) }, @@ -1652,6 +2210,19 @@ 2)] = { { AOM_CDF2(15903) } }; static const aom_cdf_prob default_warp_ref_idx2_cdf[WARP_REF_CONTEXTS][CDF_SIZE( 2)] = { { AOM_CDF2(18242) } }; +#if CONFIG_CWG_D067_IMPROVED_WARP +static const aom_cdf_prob + default_warpmv_with_mvd_flag_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)] = { + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, + { AOM_CDF2(16384) } + }; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST static const aom_cdf_prob default_warp_delta_param_cdf[2][CDF_SIZE(WARP_DELTA_NUM_SYMBOLS)] = { @@ -1721,7 +2292,7 @@ static const aom_cdf_prob default_bawp_cdf[CDF_SIZE(2)] = { AOM_CDF2(23664) }; #endif // CONFIG_BAWP -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT #if CONFIG_NEW_CONTEXT_MODELING static const aom_cdf_prob default_intra_inter_cdf [INTRA_INTER_SKIP_TXFM_CONTEXTS][INTRA_INTER_CONTEXTS][CDF_SIZE(2)] = { @@ -1753,7 +2324,7 @@ { AOM_CDF2(20186) }, { AOM_CDF2(26538) } }; -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT #if CONFIG_TIP static const aom_cdf_prob default_tip_cdf[TIP_CONTEXTS][CDF_SIZE(2)] = { @@ -2047,7 +2618,7 @@ { AOM_CDF2(32461) }, { AOM_CDF2(21488) } }; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS static const aom_cdf_prob default_identity_row_cdf_y[PALETTE_ROW_FLAG_CONTEXTS][CDF_SIZE(2)] = { { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } @@ -2280,7 +2851,7 @@ { AOM_CDF8(31190, 31329, 31516, 31679, 31825, 32026, 32322) }, }, }; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS #if CONFIG_NEW_TX_PARTITION static const aom_cdf_prob default_inter_4way_txfm_partition_cdf @@ -2392,14 +2963,14 @@ 30531) }; #endif // CONFIG_NEW_CONTEXT_MODELING -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT static const aom_cdf_prob default_intrabc_mode_cdf[CDF_SIZE(2)] = { AOM_CDF2( 16384) }; static const aom_cdf_prob default_intrabc_drl_idx_cdf[MAX_REF_BV_STACK_SIZE - 1][CDF_SIZE(2)] = { { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } }; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT static const aom_cdf_prob default_filter_intra_mode_cdf[CDF_SIZE( FILTER_INTRA_MODES)] = { AOM_CDF5(8949, 12776, 17211, 29558) }; @@ -2625,10 +3196,10 @@ int av1_get_palette_color_index_context(const uint8_t *color_map, int stride, int r, int c, int palette_size, uint8_t *color_order, int *color_idx -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS , int row_flag, int prev_row_flag -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS ) { assert(palette_size <= PALETTE_MAX_SIZE); assert(r > 0 || c > 0); @@ -2687,12 +3258,12 @@ if (color_idx != NULL) *color_idx = inverse_color_order[color_map[r * stride + c]]; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS // Special context value for the first (and only) index of an identity row and // when the previous row is also an identity row. if (c == 0 && row_flag && prev_row_flag) return PALETTE_COLOR_INDEX_CONTEXTS - 1; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS // Get hash value of context. int color_index_ctx_hash = 0; @@ -2713,10 +3284,10 @@ int av1_fast_palette_color_index_context(const uint8_t *color_map, int stride, int r, int c, int *color_idx -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS , int row_flag, int prev_row_flag -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS ) { assert(r > 0 || c > 0); @@ -2806,12 +3377,12 @@ } } -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS // Special context value for the first (and only) index of an identity row and // when the previous row is also an identity row. if (c == 0 && row_flag && prev_row_flag) return PALETTE_COLOR_INDEX_CONTEXTS - 1; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS // Get hash value of context. int color_index_ctx_hash = 0; @@ -2837,10 +3408,10 @@ (void)seq_params; av1_copy(fc->palette_y_size_cdf, default_palette_y_size_cdf); av1_copy(fc->palette_uv_size_cdf, default_palette_uv_size_cdf); -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS av1_copy(fc->identity_row_cdf_y, default_identity_row_cdf_y); av1_copy(fc->identity_row_cdf_uv, default_identity_row_cdf_uv); -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS av1_copy(fc->palette_y_color_index_cdf, default_palette_y_color_index_cdf); av1_copy(fc->palette_uv_color_index_cdf, default_palette_uv_color_index_cdf); #if !CONFIG_AIMC @@ -2886,6 +3457,10 @@ av1_copy(fc->drl_cdf[1], default_drl1_cdf); av1_copy(fc->drl_cdf[2], default_drl2_cdf); #endif // CONFIG_REF_MV_BANK +#if CONFIG_REFINEMV + av1_copy(fc->refinemv_flag_cdf, default_refinemv_flag_cdf); +#endif // CONFIG_REFINEMV + #if CONFIG_EXTENDED_WARP_PREDICTION av1_copy(fc->obmc_cdf, default_obmc_cdf); av1_copy(fc->warped_causal_cdf, default_warped_causal_cdf); @@ -2897,6 +3472,9 @@ av1_copy(fc->warp_ref_idx_cdf[0], default_warp_ref_idx0_cdf); av1_copy(fc->warp_ref_idx_cdf[1], default_warp_ref_idx1_cdf); av1_copy(fc->warp_ref_idx_cdf[2], default_warp_ref_idx2_cdf); +#if CONFIG_CWG_D067_IMPROVED_WARP + av1_copy(fc->warpmv_with_mvd_flag_cdf, default_warpmv_with_mvd_flag_cdf); +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST av1_copy(fc->warp_delta_param_cdf, default_warp_delta_param_cdf); av1_copy(fc->warp_extend_cdf, default_warp_extend_cdf); @@ -2904,15 +3482,19 @@ av1_copy(fc->motion_mode_cdf, default_motion_mode_cdf); av1_copy(fc->obmc_cdf, default_obmc_cdf); #endif // CONFIG_EXTENDED_WARP_PREDICTION -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT av1_copy(fc->skip_drl_cdf, default_skip_drl_cdf); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_BAWP av1_copy(fc->bawp_cdf, default_bawp_cdf); #endif // CONFIG_BAWP #if CONFIG_OPTFLOW_REFINEMENT av1_copy(fc->use_optflow_cdf, default_use_optflow_cdf); #endif // CONFIG_OPTFLOW_REFINEMENT + +#if CONFIG_CWP + av1_copy(fc->cwp_idx_cdf, default_cwp_idx_cdf); +#endif // CONFIG_CWP #if CONFIG_IMPROVED_JMVD av1_copy(fc->jmvd_scale_mode_cdf, default_jmvd_scale_mode_cdf); av1_copy(fc->jmvd_amvd_scale_mode_cdf, default_jmvd_amvd_scale_mode_cdf); @@ -2980,6 +3562,12 @@ av1_copy(fc->do_square_split_cdf, default_do_square_split_cdf); av1_copy(fc->rect_type_cdf, default_rect_type_cdf); av1_copy(fc->do_ext_partition_cdf, default_do_ext_partition_cdf); +#if CONFIG_UNEVEN_4WAY + av1_copy(fc->do_uneven_4way_partition_cdf, + default_do_uneven_4way_partition_cdf); + av1_copy(fc->uneven_4way_partition_type_cdf, + default_uneven_4way_partition_type_cdf); +#endif // CONFIG_UNEVEN_4WAY #else av1_copy(fc->partition_cdf, default_partition_cdf); #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -2987,12 +3575,12 @@ av1_copy(fc->inter_ext_tx_cdf, default_inter_ext_tx_cdf); av1_copy(fc->skip_mode_cdfs, default_skip_mode_cdfs); av1_copy(fc->skip_txfm_cdfs, default_skip_txfm_cdfs); -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT av1_copy(fc->intra_inter_cdf[0], default_intra_inter_cdf[0]); av1_copy(fc->intra_inter_cdf[1], default_intra_inter_cdf[1]); #else av1_copy(fc->intra_inter_cdf, default_intra_inter_cdf); -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT for (int i = 0; i < SPATIAL_PREDICTION_PROBS; i++) av1_copy(fc->seg.spatial_pred_seg_cdf[i], default_spatial_pred_seg_tree_cdf[i]); @@ -3010,10 +3598,10 @@ av1_copy(fc->cfl_sign_cdf, default_cfl_sign_cdf); av1_copy(fc->cfl_alpha_cdf, default_cfl_alpha_cdf); av1_copy(fc->intrabc_cdf, default_intrabc_cdf); -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT av1_copy(fc->intrabc_mode_cdf, default_intrabc_mode_cdf); av1_copy(fc->intrabc_drl_idx_cdf, default_intrabc_drl_idx_cdf); -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT av1_copy(fc->stx_cdf, default_stx_cdf); #if CONFIG_FLEX_MVRES av1_copy(fc->pb_mv_precision_cdf, default_pb_mv_precision_cdf);
diff --git a/av1/common/entropymode.h b/av1/common/entropymode.h index 7875226..15fb5d9 100644 --- a/av1/common/entropymode.h +++ b/av1/common/entropymode.h
@@ -31,7 +31,7 @@ #define INTER_OFFSET(mode) ((mode)-NEARMV) #define INTER_COMPOUND_OFFSET(mode) (uint8_t)((mode)-NEAR_NEARMV) // Number of possible contexts for a color index. -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS // As can be seen from av1_get_palette_color_index_context(), the possible // contexts are (2,0,0), (2,2,1), (3,2,0), (4,1,0), (5,0,0) pluss one // extra case for the first element of an identity row. These are mapped to @@ -43,7 +43,7 @@ // contexts are (2,0,0), (2,2,1), (3,2,0), (4,1,0), (5,0,0). These are mapped to // a value from 0 to 4 using 'palette_color_index_context_lookup' table. #define PALETTE_COLOR_INDEX_CONTEXTS 5 -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS // Palette Y mode context for a block is determined by number of neighboring // blocks (top and/or left) using a palette for Y plane. So, possible Y mode' @@ -69,7 +69,15 @@ #define KF_MODE_CONTEXTS 5 #define FSC_MODE_CONTEXTS 4 +#if CONFIG_ATC_DCTX_ALIGNED +#define FSC_BSIZE_CONTEXTS 6 +#else #define FSC_BSIZE_CONTEXTS 5 +#endif // CONFIG_ATC_DCTX_ALIGNED + +#if CONFIG_EXT_DIR +#define MRL_INDEX_CONTEXTS 3 +#endif // CONFIG_EXT_DIR #define COMPREF_BIT_TYPES 2 #define RANKED_REF0_TO_PRUNE 3 @@ -114,6 +122,11 @@ #define WARP_EXTEND_CTXS2 5 #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_REFINEMV +#define NUM_REFINEMV_CTX 24 +#define REFINEMV_NUM_MODES 2 +#endif // CONFIG_REFINEMV + struct AV1Common; typedef struct { @@ -134,6 +147,16 @@ [CDF_SIZE(2)]; aom_cdf_prob v_ac_sign_cdf[CROSS_COMPONENT_CONTEXTS][CDF_SIZE(2)]; #endif // CONFIG_CONTEXT_DERIVATION +#if CONFIG_ATC_DCTX_ALIGNED + aom_cdf_prob coeff_base_bob_cdf[SIG_COEF_CONTEXTS_BOB][CDF_SIZE(3)]; + aom_cdf_prob eob_flag_cdf16[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 6)]; + aom_cdf_prob eob_flag_cdf32[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 5)]; + aom_cdf_prob eob_flag_cdf64[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 4)]; + aom_cdf_prob eob_flag_cdf128[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 3)]; + aom_cdf_prob eob_flag_cdf256[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 2)]; + aom_cdf_prob eob_flag_cdf512[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 1)]; + aom_cdf_prob eob_flag_cdf1024[PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS)]; +#else aom_cdf_prob eob_flag_cdf16[PLANE_TYPES][2][CDF_SIZE(5)]; aom_cdf_prob eob_flag_cdf32[PLANE_TYPES][2][CDF_SIZE(6)]; aom_cdf_prob eob_flag_cdf64[PLANE_TYPES][2][CDF_SIZE(7)]; @@ -141,6 +164,7 @@ aom_cdf_prob eob_flag_cdf256[PLANE_TYPES][2][CDF_SIZE(9)]; aom_cdf_prob eob_flag_cdf512[PLANE_TYPES][2][CDF_SIZE(10)]; aom_cdf_prob eob_flag_cdf1024[PLANE_TYPES][2][CDF_SIZE(11)]; +#endif // CONFIG_ATC_DCTX_ALIGNED aom_cdf_prob coeff_base_eob_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS_EOB] [CDF_SIZE(3)]; aom_cdf_prob coeff_base_cdf[TX_SIZES][PLANE_TYPES][SIG_COEF_CONTEXTS] @@ -148,7 +172,7 @@ aom_cdf_prob idtx_sign_cdf[IDTX_SIGN_CONTEXTS][CDF_SIZE(2)]; aom_cdf_prob coeff_base_cdf_idtx[IDTX_SIG_COEF_CONTEXTS][CDF_SIZE(4)]; aom_cdf_prob coeff_br_cdf_idtx[IDTX_LEVEL_CONTEXTS][CDF_SIZE(BR_CDF_SIZE)]; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC aom_cdf_prob coeff_base_lf_cdf[TX_SIZES][PLANE_TYPES][LF_SIG_COEF_CONTEXTS] [CDF_SIZE(LF_BASE_SYMBOLS)]; aom_cdf_prob coeff_base_lf_eob_cdf[TX_SIZES][PLANE_TYPES] @@ -160,7 +184,7 @@ #else aom_cdf_prob coeff_br_cdf[TX_SIZES][PLANE_TYPES][LEVEL_CONTEXTS] [CDF_SIZE(BR_CDF_SIZE)]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #if CONFIG_PAR_HIDING aom_cdf_prob coeff_base_ph_cdf[COEFF_BASE_PH_CONTEXTS] [CDF_SIZE(NUM_BASE_LEVELS + 2)]; @@ -174,9 +198,14 @@ #endif // CONFIG_WARPMV aom_cdf_prob drl_cdf[3][DRL_MODE_CONTEXTS][CDF_SIZE(2)]; -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT aom_cdf_prob skip_drl_cdf[3][CDF_SIZE(2)]; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT + +#if CONFIG_REFINEMV + aom_cdf_prob refinemv_flag_cdf[NUM_REFINEMV_CTX] + [CDF_SIZE(REFINEMV_NUM_MODES)]; +#endif // CONFIG_REFINEMV #if CONFIG_OPTFLOW_REFINEMENT aom_cdf_prob use_optflow_cdf[INTER_COMPOUND_MODE_CONTEXTS][CDF_SIZE(2)]; @@ -186,6 +215,10 @@ aom_cdf_prob inter_compound_mode_cdf[INTER_COMPOUND_MODE_CONTEXTS] [CDF_SIZE(INTER_COMPOUND_MODES)]; #endif // CONFIG_OPTFLOW_REFINEMENT + +#if CONFIG_CWP + aom_cdf_prob cwp_idx_cdf[MAX_CWP_CONTEXTS][MAX_CWP_NUM - 1][CDF_SIZE(2)]; +#endif // CONFIG_CWP #if CONFIG_IMPROVED_JMVD aom_cdf_prob jmvd_scale_mode_cdf[CDF_SIZE(JOINT_NEWMV_SCALE_FACTOR_CNT)]; aom_cdf_prob jmvd_amvd_scale_mode_cdf[CDF_SIZE(JOINT_AMVD_SCALE_FACTOR_CNT)]; @@ -214,6 +247,9 @@ #endif // CONFIG_WARPMV #if CONFIG_WARP_REF_LIST aom_cdf_prob warp_ref_idx_cdf[3][WARP_REF_CONTEXTS][CDF_SIZE(2)]; +#if CONFIG_CWG_D067_IMPROVED_WARP + aom_cdf_prob warpmv_with_mvd_flag_cdf[BLOCK_SIZES_ALL][CDF_SIZE(2)]; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST aom_cdf_prob warp_delta_param_cdf[2][CDF_SIZE(WARP_DELTA_NUM_SYMBOLS)]; @@ -231,10 +267,10 @@ #endif // CONFIG_TIP aom_cdf_prob palette_y_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)]; aom_cdf_prob palette_uv_size_cdf[PALATTE_BSIZE_CTXS][CDF_SIZE(PALETTE_SIZES)]; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS aom_cdf_prob identity_row_cdf_y[PALETTE_ROW_FLAG_CONTEXTS][CDF_SIZE(2)]; aom_cdf_prob identity_row_cdf_uv[PALETTE_ROW_FLAG_CONTEXTS][CDF_SIZE(2)]; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS aom_cdf_prob palette_y_color_index_cdf[PALETTE_SIZES] [PALETTE_COLOR_INDEX_CONTEXTS] [CDF_SIZE(PALETTE_COLORS)]; @@ -268,12 +304,12 @@ aom_cdf_prob comp_group_idx_cdf[COMP_GROUP_IDX_CONTEXTS][CDF_SIZE(2)]; aom_cdf_prob skip_mode_cdfs[SKIP_MODE_CONTEXTS][CDF_SIZE(2)]; aom_cdf_prob skip_txfm_cdfs[SKIP_CONTEXTS][CDF_SIZE(2)]; -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT aom_cdf_prob intra_inter_cdf[INTRA_INTER_SKIP_TXFM_CONTEXTS] [INTRA_INTER_CONTEXTS][CDF_SIZE(2)]; #else aom_cdf_prob intra_inter_cdf[INTRA_INTER_CONTEXTS][CDF_SIZE(2)]; -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT nmv_context nmvc; nmv_context ndvc; #if CONFIG_NEW_CONTEXT_MODELING @@ -281,10 +317,10 @@ #else aom_cdf_prob intrabc_cdf[CDF_SIZE(2)]; #endif // CONFIG_NEW_CONTEXT_MODELING -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT aom_cdf_prob intrabc_mode_cdf[CDF_SIZE(2)]; aom_cdf_prob intrabc_drl_idx_cdf[MAX_REF_BV_STACK_SIZE - 1][CDF_SIZE(2)]; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT struct segmentation_probs seg; aom_cdf_prob filter_intra_cdfs[BLOCK_SIZES_ALL][CDF_SIZE(2)]; aom_cdf_prob filter_intra_mode_cdf[CDF_SIZE(FILTER_INTRA_MODES)]; @@ -324,7 +360,11 @@ aom_cdf_prob uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES] [CDF_SIZE(UV_INTRA_MODES)]; #endif // !CONFIG_AIMC +#if CONFIG_EXT_DIR + aom_cdf_prob mrl_index_cdf[MRL_INDEX_CONTEXTS][CDF_SIZE(MRL_LINE_NUMBER)]; +#else aom_cdf_prob mrl_index_cdf[CDF_SIZE(MRL_LINE_NUMBER)]; +#endif // CONFIG_EXT_DIR aom_cdf_prob fsc_mode_cdf[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS] [CDF_SIZE(FSC_MODES)]; #if CONFIG_IMPROVED_CFL @@ -348,7 +388,17 @@ [CDF_SIZE(2)]; aom_cdf_prob do_ext_partition_cdf[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS] [PARTITION_CONTEXTS][CDF_SIZE(2)]; +#if CONFIG_UNEVEN_4WAY + aom_cdf_prob do_uneven_4way_partition_cdf[PARTITION_STRUCTURE_NUM] + [NUM_RECT_PARTS][PARTITION_CONTEXTS] + [CDF_SIZE(2)]; + aom_cdf_prob uneven_4way_partition_type_cdf[PARTITION_STRUCTURE_NUM] + [NUM_RECT_PARTS] + [PARTITION_CONTEXTS] + [CDF_SIZE(NUM_UNEVEN_4WAY_PARTS)]; +#endif // CONFIG_UNEVEN_4WAY #else + // Partition type for a square block, without limitations. aom_cdf_prob partition_cdf[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS] [CDF_SIZE(EXT_PARTITION_TYPES)]; #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -378,8 +428,13 @@ aom_cdf_prob delta_lf_cdf[CDF_SIZE(DELTA_LF_PROBS + 1)]; aom_cdf_prob intra_ext_tx_cdf[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] [CDF_SIZE(TX_TYPES)]; +#if CONFIG_ATC_DCTX_ALIGNED + aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SETS_INTER][EOB_TX_CTXS][EXT_TX_SIZES] + [CDF_SIZE(TX_TYPES)]; +#else aom_cdf_prob inter_ext_tx_cdf[EXT_TX_SETS_INTER][EXT_TX_SIZES] [CDF_SIZE(TX_TYPES)]; +#endif // CONFIG_ATC_DCTX_ALIGNED aom_cdf_prob cfl_sign_cdf[CDF_SIZE(CFL_JOINT_SIGNS)]; aom_cdf_prob cfl_alpha_cdf[CFL_ALPHA_CONTEXTS][CDF_SIZE(CFL_ALPHABET_SIZE)]; aom_cdf_prob stx_cdf[TX_SIZES][CDF_SIZE(STX_TYPES)]; @@ -432,7 +487,7 @@ { 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 4, 5, 3, 6, 7, 8 }, }; -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC static const int av1_md_type2idx[EXT_TX_SIZES][INTRA_MODES][TX_TYPES] = { { { 0, 2, 3, 1, 0, 0, 0, 4, 5, 0, 0, 0, 0, 6, 0, 0 }, // mode_class: 0 @@ -572,7 +627,7 @@ ? av1_md_idx2type[size_idx][av1_md_class[intra_mode]][tx_idx] : av1_ext_tx_inv[tx_set_type][tx_idx]; } -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC void av1_set_default_ref_deltas(int8_t *ref_deltas); void av1_set_default_mode_deltas(int8_t *mode_deltas); @@ -674,7 +729,7 @@ int av1_get_palette_color_index_context(const uint8_t *color_map, int stride, int r, int c, int palette_size, uint8_t *color_order, int *color_idx -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS , int row_flag, int prev_row_flag #endif @@ -683,7 +738,7 @@ // exploiting the fact that the encoder does not need to maintain a color order. int av1_fast_palette_color_index_context(const uint8_t *color_map, int stride, int r, int c, int *color_idx -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS , int row_flag, int prev_row_flag #endif
diff --git a/av1/common/enums.h b/av1/common/enums.h index 7876d29..a86f03e 100644 --- a/av1/common/enums.h +++ b/av1/common/enums.h
@@ -28,6 +28,15 @@ /*!\cond */ #undef MAX_SB_SIZE +#define BAWP_BUGFIX 1 + +#if CONFIG_REFINEMV +#define SINGLE_STEP_SEARCH 0 +#endif // CONFIG_REFINEMV + +#if CONFIG_D071_IMP_MSK_BLD +#define DEFAULT_IMP_MSK_WT 0 // default implict masked blending weight +#endif // CONFIG_D071_IMP_MSK_BLD #if CONFIG_WEDGE_MOD_EXT /*WEDGE_0 is defined in the three o'clock direciton, the angles are defined in @@ -59,17 +68,20 @@ #define H_WEDGE_ANGLES 10 #define NUM_WEDGE_DIST 4 #define MAX_WEDGE_TYPES 68 +#define WEDGE_BLD_SIG 1 // 0 for linear blending, 1 for sigmoid blending +#define WEDGE_BLD_LUT_SIZE 128 #endif // CONFIG_WEDGE_MOD_EXT -#if CONFIG_ADAPTIVE_DS_FILTER -#define DS_FRAME_LEVEL 1 // Signal at key frame -#endif - -#if CONFIG_WARP_REF_LIST && CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_WARP_REF_LIST && CONFIG_MVP_IMPROVEMENT #define WARP_CU_BANK 1 #else #define WARP_CU_BANK 0 -#endif // CONFIG_WARP_REF_LIST && CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_WARP_REF_LIST && CONFIG_MVP_IMPROVEMENT + +#if CONFIG_REFINEMV +#define REFINEMV_SUBBLOCK_WIDTH 16 +#define REFINEMV_SUBBLOCK_HEIGHT 16 +#endif // CONFIG_REFINEMV // Cross-Component Sample Offset (CCSO) #if CONFIG_CCSO @@ -179,14 +191,14 @@ #define IST_8x8_WIDTH 64 #define IST_8x8_HEIGHT 32 -#if CONFIG_ATC_NEWTXSETS -// TX sizes used for mode dependent TX sets -#define MODE_DEPTX_TXSIZES 19 -#endif // CONFIG_ATC_NEWTXSETS - #define FSC_MODES 2 +#if CONFIG_ATC_DCTX_ALIGNED +#define FSC_MAXWIDTH 32 +#define FSC_MAXHEIGHT 32 +#else #define FSC_MAXWIDTH 16 #define FSC_MAXHEIGHT 16 +#endif // CONFIG_ATC_DCTX_ALIGNED #define FSC_MINWIDTH 4 #define FSC_MINHEIGHT 4 @@ -303,14 +315,37 @@ // #if CONFIG_EXT_RECUR_PARTITIONS // HORZ_3 VERT_3 -// +--------------+ +---+------+---+ -// | | | | | | -// +--------------+ | | | | -// | | | | | | -// | | | | | | -// +--------------+ | | | | -// | | | | | | -// +--------------+ +---+------+---+ +// +---------------+ +---+------+---+ +// | | | | | | +// +---------------+ | | | | +// | | | | |______| | +// | | | | | | | +// +---------------+ | | | | +// | | | | | | +// +---------------+ +---+------+---+ +#if CONFIG_UNEVEN_4WAY +// HORZ_4A HORZ_4B +// +---------------+ +---------------+ +// | | | | +// +---------------+ +---------------+ +// | | | | +// | | | | +// +---------------+ | | +// | | | | +// | | +---------------+ +// | | | | +// | | | | +// +---------------+ +---------------+ +// | | | | +// +---------------+ +---------------+ +// +// VERT_4A VERT_4B +// +-------------------------+ +-------------------------+ +// | | | | | | | | | | +// | | | | | | | | | | +// | | | | | | | | | | +// +-------------------------+ +-------------------------+ +#endif // CONFIG_UNEVEN_4WAY #else // HORZ_A HORZ_B VERT_A VERT_B // +---+---+ +-------+ +---+---+ +---+---+ @@ -332,10 +367,20 @@ PARTITION_VERT, PARTITION_HORZ_3, // 3 horizontal sub-partitions with ratios 4:1, 2:1 and 4:1 PARTITION_VERT_3, // 3 vertical sub-partitions with ratios 4:1, 2:1 and 4:1 +#if CONFIG_UNEVEN_4WAY + PARTITION_HORZ_4A, // 4 horizontal uneven sub-partitions (1:2:4:1). + PARTITION_HORZ_4B, // 4 horizontal uneven sub-partitions (1:4:2:1). + PARTITION_VERT_4A, // 4 vertical uneven sub-partitions (1:2:4:1). + PARTITION_VERT_4B, // 4 vertical uneven sub-partitions (1:4:2:1). +#endif // CONFIG_UNEVEN_4WAY PARTITION_SPLIT, + EXT_PARTITION_TYPES = PARTITION_SPLIT, + ALL_PARTITION_TYPES = EXT_PARTITION_TYPES + 1, PARTITION_TYPES = PARTITION_VERT + 1, - EXT_PARTITION_TYPES = PARTITION_VERT_3 + 1, - ALL_PARTITION_TYPES = PARTITION_SPLIT + 1, +#if !CONFIG_UNEVEN_4WAY + LIMITED_PARTITION_TYPES = PARTITION_TYPES - 1, + LIMITED_EXT_PARTITION_TYPES = EXT_PARTITION_TYPES - 1, +#endif // !CONFIG_UNEVEN_4WAY PARTITION_INVALID = 255 } UENUM1BYTE(PARTITION_TYPE); #else // CONFIG_EXT_RECUR_PARTITIONS @@ -355,6 +400,7 @@ PARTITION_INVALID = 255 } UENUM1BYTE(PARTITION_TYPE); #endif // CONFIG_EXT_RECUR_PARTITIONS + // Rectangular partition types. enum { HORZ = 0, @@ -362,6 +408,16 @@ NUM_RECT_PARTS, RECT_INVALID = NUM_RECT_PARTS } UENUM1BYTE(RECT_PART_TYPE); + +#if CONFIG_UNEVEN_4WAY +// Uneven 4-way partition types. +enum { + UNEVEN_4A = 0, + UNEVEN_4B, + NUM_UNEVEN_4WAY_PARTS, +} UENUM1BYTE(UNEVEN_4WAY_PART_TYPE); +#endif // CONFIG_UNEVEN_4WAY + typedef char PARTITION_CONTEXT; #define PARTITION_PLOFFSET 4 // number of probability models per block size @@ -568,21 +624,24 @@ EXT_TX_SET_DTT9_IDTX_1DDCT, // Discrete Trig transforms w/ flip (9) + Identity (1) + 1D Hor/Ver (6) EXT_TX_SET_ALL16, -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC EXT_NEW_TX_SET, -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC EXT_TX_SET_TYPES } UENUM1BYTE(TxSetType); +#if CONFIG_ATC_DCTX_ALIGNED +#define EOB_TX_CTXS 3 +#endif // CONFIG_ATC_DCTX_ALIGNED #define EXT_TX_SIZES 4 // number of sizes that use extended transforms #define EXT_TX_SETS_INTER 4 // Sets of transform selections for INTER -#if CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET +#if CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET #define EXT_TX_SETS_INTRA 2 // Sets of transform selections for INTRA #else #define EXT_TX_SETS_INTRA 3 // Sets of transform selections for INTRA -#endif // CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET +#endif // CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC #define INTRA_TX_SET1 7 #if CONFIG_ATC_REDUCED_TXSET #define INTRA_TX_SET2 2 @@ -590,7 +649,7 @@ #else #define INTRA_TX_SET1 6 #define INTRA_TX_SET2 4 -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC enum { UNIDIR_COMP_REFERENCE, @@ -600,7 +659,11 @@ enum { PLANE_TYPE_Y, PLANE_TYPE_UV, PLANE_TYPES } UENUM1BYTE(PLANE_TYPE); +#if CONFIG_CFL_IMPROVEMENTS +#define CFL_ALPHABET_SIZE_LOG2 3 +#else #define CFL_ALPHABET_SIZE_LOG2 4 +#endif // CONFIG_CFL_IMPROVEMENTS #define CFL_ALPHABET_SIZE (1 << CFL_ALPHABET_SIZE_LOG2) #define CFL_MAGS_SIZE ((2 << CFL_ALPHABET_SIZE_LOG2) + 1) #define CFL_IDX_U(idx) (idx >> CFL_ALPHABET_SIZE_LOG2) @@ -636,6 +699,10 @@ #define CFL_CONTEXT_V(js) \ (CFL_SIGN_V(js) * CFL_SIGNS + CFL_SIGN_U(js) - CFL_SIGNS) +#if CONFIG_SEP_COMP_DRL +#define SEP_COMP_DRL_SIZE 3 +#endif // CONFIG_SEP_COMP_DRL + enum { PALETTE_MAP, COLOR_MAP_TYPES, @@ -897,9 +964,9 @@ #define WARPMV_MODE_CONTEXT 10 #endif // CONFIG_WARPMV -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT #define MAX_REF_BV_STACK_SIZE 4 -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT #define GLOBALMV_OFFSET 3 #define REFMV_OFFSET 4 @@ -915,6 +982,19 @@ #define INTER_COMPOUND_MODE_CONTEXTS 8 #endif // CONFIG_C076_INTER_MOD_CTX +#if CONFIG_CWP +// Number of supported factors for compound weighted prediction +#define MAX_CWP_NUM 5 +// maximum value for the supported factors +#define CWP_MAX 20 +// minimum value for the supported factors +#define CWP_MIN -4 +// Weighting factor for simple averge prediction +#define CWP_EQUAL 8 +#define CWP_WEIGHT_BITS 4 +#define MAX_CWP_CONTEXTS 2 +#endif + #define DELTA_Q_SMALL 3 #define DELTA_Q_PROBS (DELTA_Q_SMALL) #define DEFAULT_DELTA_Q_RES_PERCEPTUAL 4 @@ -1089,7 +1169,7 @@ PROJ_SPATIAL, /**< Project from spatial neighborhood */ PROJ_PARAM_BANK, /**< Project from circular buffer */ PROJ_DEFAULT, /**< Default values */ - WARP_PROJ_TYPES = 5, /**< Num projection types */ + WARP_PROJ_TYPES = 4, /**< Num projection types */ } WarpProjectionType; #endif // CONFIG_WARP_REF_LIST
diff --git a/av1/common/idct.c b/av1/common/idct.c index a9a3f1f..4462ef6 100644 --- a/av1/common/idct.c +++ b/av1/common/idct.c
@@ -318,8 +318,9 @@ } #endif // CONFIG_CROSS_CHROMA_TX -void av1_inverse_transform_block(const MACROBLOCKD *xd, tran_low_t *dqcoeff, - int plane, TX_TYPE tx_type, TX_SIZE tx_size, +void av1_inverse_transform_block(const MACROBLOCKD *xd, + const tran_low_t *dqcoeff, int plane, + TX_TYPE tx_type, TX_SIZE tx_size, uint16_t *dst, int stride, int eob, int reduced_tx_set) { if (!eob) return; @@ -338,9 +339,14 @@ assert(((intra_mode >= PAETH_PRED || filter) && txfm_param.sec_tx_type) == 0); (void)intra_mode; (void)filter; - av1_inv_stxfm(dqcoeff, &txfm_param); - av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); + // Work buffer for secondary transform + DECLARE_ALIGNED(32, tran_low_t, temp_dqcoeff[MAX_SB_SQUARE]); + memcpy(temp_dqcoeff, dqcoeff, sizeof(tran_low_t) * tx_size_2d[tx_size]); + + av1_inv_stxfm(temp_dqcoeff, &txfm_param); + + av1_highbd_inv_txfm_add(temp_dqcoeff, dst, stride, &txfm_param); } // Inverse secondary transform
diff --git a/av1/common/idct.h b/av1/common/idct.h index b50d972..2652f7c 100644 --- a/av1/common/idct.h +++ b/av1/common/idct.h
@@ -39,8 +39,9 @@ CctxType cctx_type); #endif // CONFIG_CROSS_CHROMA_TX -void av1_inverse_transform_block(const MACROBLOCKD *xd, tran_low_t *dqcoeff, - int plane, TX_TYPE tx_type, TX_SIZE tx_size, +void av1_inverse_transform_block(const MACROBLOCKD *xd, + const tran_low_t *dqcoeff, int plane, + TX_TYPE tx_type, TX_SIZE tx_size, uint16_t *dst, int stride, int eob, int reduced_tx_set); void av1_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
diff --git a/av1/common/mv.h b/av1/common/mv.h index 42cac87..472c9bd 100644 --- a/av1/common/mv.h +++ b/av1/common/mv.h
@@ -391,7 +391,7 @@ #define SUBEXPFIN_K 3 -#if CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_EXTENDED_WARP_PREDICTION || CONFIG_IMPROVED_GLOBAL_MOTION #define GM_TRANS_PREC_BITS 3 #define GM_ABS_TRANS_BITS 14 #define GM_ABS_TRANS_ONLY_BITS (GM_ABS_TRANS_BITS - GM_TRANS_PREC_BITS + 3) @@ -401,7 +401,11 @@ #define GM_TRANS_ONLY_DECODE_FACTOR (1 << GM_TRANS_ONLY_PREC_DIFF) #define GM_ALPHA_PREC_BITS 10 +#if CONFIG_IMPROVED_GLOBAL_MOTION +#define GM_ABS_ALPHA_BITS 8 +#else #define GM_ABS_ALPHA_BITS 7 +#endif // CONFIG_IMPROVED_GLOBAL_MOTION #define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS) #define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF) #else @@ -417,7 +421,7 @@ #define GM_ABS_ALPHA_BITS 12 #define GM_ALPHA_PREC_DIFF (WARPEDMODEL_PREC_BITS - GM_ALPHA_PREC_BITS) #define GM_ALPHA_DECODE_FACTOR (1 << GM_ALPHA_PREC_DIFF) -#endif // CONFIG_EXTENDED_WARP_PREDICTION +#endif // CONFIG_EXTENDED_WARP_PREDICTION || CONFIG_IMPROVED_GLOBAL_MOTION #define GM_ROW3HOMO_PREC_BITS 16 #define GM_ABS_ROW3HOMO_BITS 11 @@ -425,7 +429,11 @@ (WARPEDMODEL_ROW3HOMO_PREC_BITS - GM_ROW3HOMO_PREC_BITS) #define GM_ROW3HOMO_DECODE_FACTOR (1 << GM_ROW3HOMO_PREC_DIFF) +#if CONFIG_IMPROVED_GLOBAL_MOTION +#define GM_TRANS_MAX ((1 << GM_ABS_TRANS_BITS) - 1) +#else #define GM_TRANS_MAX (1 << GM_ABS_TRANS_BITS) +#endif // CONFIG_IMPROVED_GLOBAL_MOTION #define GM_ALPHA_MAX (1 << GM_ABS_ALPHA_BITS) #define GM_ROW3HOMO_MAX (1 << GM_ABS_ROW3HOMO_BITS) @@ -533,6 +541,10 @@ // candidate, and so does not allow WARP_EXTEND int row_offset; int col_offset; +#if CONFIG_CWP + // Record the cwp index of the neighboring blocks + int8_t cwp_idx; +#endif // CONFIG_CWP #endif // CONFIG_EXTENDED_WARP_PREDICTION } CANDIDATE_MV;
diff --git a/av1/common/mvref_common.c b/av1/common/mvref_common.c index 64ca39e..f22423d 100644 --- a/av1/common/mvref_common.c +++ b/av1/common/mvref_common.c
@@ -20,12 +20,12 @@ #endif // CONFIG_TIP #include "av1/common/warped_motion.h" -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT typedef struct single_mv_candidate { int_mv mv; MV_REFERENCE_FRAME ref_frame; } SINGLE_MV_CANDIDATE; -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #define MFMV_STACK_SIZE 3 @@ -145,7 +145,7 @@ mv->mv.as_int = 0; #endif // CONFIG_TIP -#if CONFIG_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT if (is_inter_ref_frame(mi->ref_frame[0]) && mi->ref_frame[1] == NONE_FRAME) { if ((abs(mi->mv[0].as_mv.row) <= REFMVS_LIMIT) && @@ -159,7 +159,7 @@ #endif // CONFIG_TIP } } else { -#endif // CONFIG_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT for (int idx = 0; idx < 2; ++idx) { MV_REFERENCE_FRAME ref_frame = mi->ref_frame[idx]; if (is_inter_ref_frame(ref_frame)) { @@ -177,9 +177,9 @@ #endif // CONFIG_TIP } } -#if CONFIG_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT } -#endif // CONFIG_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT mv++; } @@ -187,25 +187,25 @@ } } -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT // Fetch MVP candidates from derived SMVP into MVP candidate list // when there is no enough MVP candidates. static AOM_INLINE void fill_mvp_from_derived_smvp( const MV_REFERENCE_FRAME rf[2], CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight, uint8_t *refmv_count, CANDIDATE_MV *derived_mv_stack, uint8_t derived_mv_count, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT const MB_MODE_INFO *mbmi, MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT const int max_ref_mv_count) { int index = 0; int derived_idx = 0; if (rf[1] == NONE_FRAME) { -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT assert(!mbmi->skip_mode); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT for (derived_idx = 0; derived_idx < derived_mv_count; ++derived_idx) { for (index = 0; index < *refmv_count; ++index) { @@ -222,6 +222,9 @@ ref_mv_stack[index].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[index].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[index].cwp_idx = derived_mv_stack[derived_idx].cwp_idx; +#endif // CONFIG_CWP ref_mv_weight[index] = REF_CAT_LEVEL; ++(*refmv_count); } @@ -233,10 +236,10 @@ derived_mv_stack[derived_idx].this_mv.as_int) && (ref_mv_stack[index].comp_mv.as_int == derived_mv_stack[derived_idx].comp_mv.as_int)) { -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (!mbmi->skip_mode || (ref_frame_idx0[index] == rf[0] && ref_frame_idx1[index] == rf[1])) -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT break; } } @@ -249,19 +252,22 @@ ref_mv_stack[index].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[index].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_CWP + ref_mv_stack[index].cwp_idx = derived_mv_stack[derived_idx].cwp_idx; +#endif // CONFIG_CWP +#if CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode) { ref_frame_idx0[index] = rf[0]; ref_frame_idx1[index] = rf[1]; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT ref_mv_weight[index] = REF_CAT_LEVEL; ++(*refmv_count); } } } } -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_TIP static AOM_INLINE void derive_ref_mv_candidate_from_tip_mode( @@ -323,6 +329,9 @@ ref_mv_stack[index].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[index].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[index].cwp_idx = candidate->cwp_idx; +#endif // CONFIG_CWP ++(*refmv_count); } if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count; @@ -374,9 +383,9 @@ static AOM_INLINE void add_ref_mv_candidate( #if CONFIG_TIP -#if !CONFIG_SMVP_IMPROVEMENT +#if !CONFIG_MVP_IMPROVEMENT const AV1_COMMON *cm, -#endif // !CONFIG_SMVP_IMPROVEMENT +#endif // !CONFIG_MVP_IMPROVEMENT int mi_row, int mi_col, int mi_row_cand, int mi_col_cand, #endif // CONFIG_TIP const MB_MODE_INFO *const candidate, @@ -387,16 +396,16 @@ uint8_t *ref_match_count, uint8_t *newmv_count, CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight, int_mv *gm_mv_candidates, const WarpedMotionParams *gm_params, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT const MB_MODE_INFO *mbmi, MV_REFERENCE_FRAME ref_frame_idx0[MAX_REF_MV_STACK_SIZE], MV_REFERENCE_FRAME ref_frame_idx1[MAX_REF_MV_STACK_SIZE], -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT const AV1_COMMON *cm, int add_more_mvs, SINGLE_MV_CANDIDATE *single_mv, uint8_t *single_mv_count, CANDIDATE_MV *derived_mv_stack, uint16_t *derived_mv_weight, uint8_t *derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_IBC_SR_EXT uint8_t is_intrabc, #endif // CONFIG_IBC_SR_EXT @@ -425,7 +434,7 @@ const TIP *tip_ref = &cm->tip_ref; #endif // CONFIG_TIP -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode) { #if CONFIG_TIP if (!is_tip_ref_frame(candidate->ref_frame[0]) && @@ -464,18 +473,21 @@ ref_mv_stack[index].comp_mv = this_refmv[1]; ref_frame_idx0[index] = candidate->ref_frame[0]; ref_frame_idx1[index] = candidate->ref_frame[1]; +#if CONFIG_CWP + ref_mv_stack[index].cwp_idx = candidate->cwp_idx; +#endif // CONFIG_CWP ref_mv_weight[index] = weight; ++(*refmv_count); } } return; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT if (rf[1] == NONE_FRAME) { -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT assert(!mbmi->skip_mode); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT // single reference frame for (ref = 0; ref < 2; ++ref) { @@ -523,13 +535,16 @@ ref_mv_stack[index].row_offset = row_offset; ref_mv_stack[index].col_offset = col_offset; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[index].cwp_idx = candidate->cwp_idx; +#endif // CONFIG_CWP ref_mv_weight[index] = weight; ++(*refmv_count); } if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count; ++*ref_match_count; } -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT else if (add_more_mvs && is_inter_ref_frame(candidate->ref_frame[ref]) && #if CONFIG_IBC_SR_EXT rf[0] != INTRA_FRAME && @@ -586,11 +601,14 @@ *derived_mv_count < MAX_REF_MV_STACK_SIZE) { derived_mv_stack[index].this_mv = this_refmv; derived_mv_weight[index] = weight; +#if CONFIG_CWP + derived_mv_stack[index].cwp_idx = candidate->cwp_idx; +#endif // CONFIG_CWP ++(*derived_mv_count); } } } -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT } } else { #if CONFIG_TIP @@ -606,9 +624,9 @@ // compound reference frame if (candidate->ref_frame[0] == rf[0] && candidate->ref_frame[1] == rf[1]) { -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode) return; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT int_mv this_refmv[2]; @@ -640,25 +658,31 @@ ref_mv_stack[index].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[index].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[index].cwp_idx = candidate->cwp_idx; +#endif // CONFIG_CWP ++(*refmv_count); } if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count; ++*ref_match_count; } -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT else if (add_more_mvs) { // Compound reference frame, but only have one reference frame // is the same as the reference frame of the neighboring block int candidate_ref_idx0 = -1; int candidate_ref_idx1 = -1; + int which_cand_ref = -1; if (candidate->ref_frame[0] == rf[0] || candidate->ref_frame[1] == rf[0]) { candidate_ref_idx0 = 0; candidate_ref_idx1 = 1; + which_cand_ref = (candidate->ref_frame[0] == rf[0]) ? 0 : 1; } else if (candidate->ref_frame[0] == rf[1] || candidate->ref_frame[1] == rf[1]) { candidate_ref_idx0 = 1; candidate_ref_idx1 = 0; + which_cand_ref = (candidate->ref_frame[0] == rf[1]) ? 0 : 1; } if (candidate_ref_idx0 != -1 && candidate_ref_idx1 != -1) { @@ -671,7 +695,7 @@ #if CONFIG_C071_SUBBLK_WARPMV submi, #endif // CONFIG_C071_SUBBLK_WARPMV - candidate_ref_idx0); + which_cand_ref); int cand_idx = 0; for (cand_idx = 0; cand_idx < *single_mv_count; ++cand_idx) { @@ -700,6 +724,9 @@ derived_mv_stack[index].this_mv = this_refmv[0]; derived_mv_stack[index].comp_mv = this_refmv[1]; derived_mv_weight[index] = weight; +#if CONFIG_CWP + derived_mv_stack[index].cwp_idx = candidate->cwp_idx; +#endif // CONFIG_CWP ++(*derived_mv_count); } } @@ -722,7 +749,7 @@ } } } -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT } #if CONFIG_TIP } @@ -805,9 +832,8 @@ } } #endif // CONFIG_WARP_REF_LIST -// both CONFIG_SMVP_IMPROVEMENT and CONFIG_C043_MVP_IMPROVEMENTS are ture case, -// scan_row_mbmi does not called -#if !(CONFIG_SMVP_IMPROVEMENT && CONFIG_C043_MVP_IMPROVEMENTS) +// when CONFIG_MVP_IMPROVEMENT is ture, scan_row_mbmi does not called +#if !CONFIG_MVP_IMPROVEMENT static AOM_INLINE void scan_row_mbmi( const AV1_COMMON *cm, const MACROBLOCKD *xd, #if CONFIG_TIP || CONFIG_EXT_RECUR_PARTITIONS @@ -817,14 +843,14 @@ CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight, uint8_t *refmv_count, uint8_t *ref_match_count, uint8_t *newmv_count, int_mv *gm_mv_candidates, int max_row_offset, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT int add_more_mvs, SINGLE_MV_CANDIDATE *single_mv, uint8_t *single_mv_count, CANDIDATE_MV *derived_mv_stack, uint16_t *derived_mv_weight, uint8_t *derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST WARP_CANDIDATE warp_param_stack[MAX_WARP_REF_CANDIDATES], int max_num_of_warp_candidates, uint8_t *valid_num_warp_candidates, @@ -853,6 +879,7 @@ const int plane_type = (xd->tree_type == CHROMA_PART); for (int i = 0; i < end_mi;) { #if CONFIG_EXT_RECUR_PARTITIONS + if (xd->mi_col + col_offset + i >= cm->mi_params.mi_cols) break; const int sb_mi_size = mi_size_wide[cm->sb_size]; const int mask_row = mi_row & (sb_mi_size - 1); const int mask_col = mi_col & (sb_mi_size - 1); @@ -867,6 +894,7 @@ } #endif // CONFIG_EXT_RECUR_PARTITIONS const MB_MODE_INFO *const candidate = candidate_mi0[col_offset + i]; + assert(candidate != NULL); #if CONFIG_C071_SUBBLK_WARPMV const SUBMB_INFO *const submi = submi_mi0[col_offset + i]; #endif // CONFIG_C071_SUBBLK_WARPMV @@ -878,7 +906,7 @@ else if (abs(row_offset) > 1) len = AOMMAX(len, width_8x8); -#if CONFIG_COMPLEXITY_SCALABLE_MVP +#if CONFIG_MVP_IMPROVEMENT // Don't add weight to row_offset < -1 which is in the outer area uint16_t weight = row_offset < -1 ? 0 : 2; #else @@ -887,7 +915,7 @@ if (xd->width >= width_8x8 && xd->width <= n4_w) { uint16_t inc = AOMMIN(-max_row_offset + row_offset + 1, mi_size_high[candidate_bsize]); -#if !CONFIG_COMPLEXITY_SCALABLE_MVP +#if !CONFIG_MVP_IMPROVEMENT // Obtain range used in weight calculation. weight = AOMMAX(weight, inc); #endif @@ -911,9 +939,9 @@ add_ref_mv_candidate( #if CONFIG_TIP -#if !CONFIG_SMVP_IMPROVEMENT +#if !CONFIG_MVP_IMPROVEMENT cm, -#endif // !CONFIG_SMVP_IMPROVEMENT +#endif // !CONFIG_MVP_IMPROVEMENT mi_row, mi_col, cand_mi_row, cand_mi_col, #endif // CONFIG_TIP candidate, @@ -922,13 +950,13 @@ #endif // CONFIG_C071_SUBBLK_WARPMV rf, refmv_count, ref_match_count, newmv_count, ref_mv_stack, ref_mv_weight, gm_mv_candidates, cm->global_motion, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT xd->mi[0], ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT cm, add_more_mvs, single_mv, single_mv_count, derived_mv_stack, derived_mv_weight, derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_IBC_SR_EXT xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART], #endif // CONFIG_IBC_SR_EXT @@ -945,9 +973,9 @@ i += len; } } -#endif // !(CONFIG_SMVP_IMPROVEMENT && CONFIG_C043_MVP_IMPROVEMENTS) +#endif // !CONFIG_MVP_IMPROVEMENT -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT // update processed_cols variable, when scan_col_mbmi() is not used for adjacent // neigbhors static AOM_INLINE void update_processed_cols(const MACROBLOCKD *xd, int mi_row, @@ -971,7 +999,7 @@ } } } -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT static AOM_INLINE void scan_col_mbmi( const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, @@ -981,14 +1009,14 @@ const MV_REFERENCE_FRAME rf[2], int col_offset, CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight, uint8_t *refmv_count, uint8_t *ref_match_count, uint8_t *newmv_count, int_mv *gm_mv_candidates, int max_col_offset, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT int add_more_mvs, SINGLE_MV_CANDIDATE *single_mv, uint8_t *single_mv_count, CANDIDATE_MV *derived_mv_stack, uint16_t *derived_mv_weight, uint8_t *derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST WARP_CANDIDATE warp_param_stack[MAX_WARP_REF_CANDIDATES], int max_num_of_warp_candidates, uint8_t *valid_num_warp_candidates, @@ -1012,6 +1040,7 @@ for (i = 0; i < end_mi;) { #if CONFIG_EXT_RECUR_PARTITIONS + if (xd->mi_row + row_offset + i >= cm->mi_params.mi_rows) break; const int sb_mi_size = mi_size_wide[cm->sb_size]; const int mask_row = mi_row & (sb_mi_size - 1); const int mask_col = mi_col & (sb_mi_size - 1); @@ -1026,6 +1055,7 @@ #endif // CONFIG_EXT_RECUR_PARTITIONS const MB_MODE_INFO *const candidate = xd->mi[(row_offset + i) * xd->mi_stride + col_offset]; + assert(candidate != NULL); #if CONFIG_C071_SUBBLK_WARPMV const SUBMB_INFO *const submi = xd->submi[(row_offset + i) * xd->mi_stride + col_offset]; @@ -1039,7 +1069,7 @@ else if (abs(col_offset) > 1) len = AOMMAX(len, n8_h_8); -#if CONFIG_COMPLEXITY_SCALABLE_MVP +#if CONFIG_MVP_IMPROVEMENT // Don't add weight to col_offset < -1 which is in the outer area uint16_t weight = col_offset < -1 ? 0 : 2; #else @@ -1048,7 +1078,7 @@ if (xd->height >= n8_h_8 && xd->height <= n4_h) { int inc = AOMMIN(-max_col_offset + col_offset + 1, mi_size_wide[candidate_bsize]); -#if !CONFIG_COMPLEXITY_SCALABLE_MVP +#if !CONFIG_MVP_IMPROVEMENT // Obtain range used in weight calculation. weight = AOMMAX(weight, inc); #endif @@ -1072,9 +1102,9 @@ add_ref_mv_candidate( #if CONFIG_TIP -#if !CONFIG_SMVP_IMPROVEMENT +#if !CONFIG_MVP_IMPROVEMENT cm, -#endif // !CONFIG_SMVP_IMPROVEMENT +#endif // !CONFIG_MVP_IMPROVEMENT mi_row, mi_col, cand_mi_row, cand_mi_col, #endif // CONFIG_TIP candidate, @@ -1083,13 +1113,13 @@ #endif // CONFIG_C071_SUBBLK_WARPMV rf, refmv_count, ref_match_count, newmv_count, ref_mv_stack, ref_mv_weight, gm_mv_candidates, cm->global_motion, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT xd->mi[0], ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT cm, add_more_mvs, single_mv, single_mv_count, derived_mv_stack, derived_mv_weight, derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_IBC_SR_EXT xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART], #endif // CONFIG_IBC_SR_EXT @@ -1130,14 +1160,14 @@ const int mi_col, const MV_REFERENCE_FRAME rf[2], int row_offset, int col_offset, CANDIDATE_MV *ref_mv_stack, uint16_t *ref_mv_weight, uint8_t *ref_match_count, uint8_t *newmv_count, int_mv *gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT int add_more_mvs, SINGLE_MV_CANDIDATE *single_mv, uint8_t *single_mv_count, CANDIDATE_MV *derived_mv_stack, uint16_t *derived_mv_weight, uint8_t *derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST WARP_CANDIDATE warp_param_stack[MAX_WARP_REF_CANDIDATES], int max_num_of_warp_candidates, uint8_t *valid_num_warp_candidates, @@ -1162,7 +1192,7 @@ #endif // CONFIG_C071_SUBBLK_WARPMV const int len = mi_size_wide[BLOCK_8X8]; -#if CONFIG_COMPLEXITY_SCALABLE_MVP +#if CONFIG_MVP_IMPROVEMENT // Don't add weight to (-1,-1) which is in the outer area uint16_t weight = row_offset == -1 && col_offset == -1 ? 0 : 2; #endif @@ -1183,9 +1213,9 @@ add_ref_mv_candidate( #if CONFIG_TIP -#if !CONFIG_SMVP_IMPROVEMENT +#if !CONFIG_MVP_IMPROVEMENT cm, -#endif // !CONFIG_SMVP_IMPROVEMENT +#endif // !CONFIG_MVP_IMPROVEMENT mi_row, mi_col, cand_mi_row, cand_mi_col, #endif // CONFIG_TIP candidate, @@ -1194,20 +1224,20 @@ #endif // CONFIG_C071_SUBBLK_WARPMV rf, refmv_count, ref_match_count, newmv_count, ref_mv_stack, ref_mv_weight, gm_mv_candidates, cm->global_motion, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT xd->mi[0], ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT cm, add_more_mvs, single_mv, single_mv_count, derived_mv_stack, derived_mv_weight, derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_IBC_SR_EXT xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART], #endif // CONFIG_IBC_SR_EXT #if CONFIG_EXTENDED_WARP_PREDICTION row_offset, col_offset, #endif // CONFIG_EXTENDED_WARP_PREDICTION -#if CONFIG_COMPLEXITY_SCALABLE_MVP +#if CONFIG_MVP_IMPROVEMENT weight * len #else 2 * len @@ -1254,7 +1284,7 @@ return has_tr; } -#if CONFIG_C043_MVP_IMPROVEMENTS || CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_MVP_IMPROVEMENT || CONFIG_EXTENDED_WARP_PREDICTION static int has_bottom_left(const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, int mi_col, int n4_h) { const int sb_mi_size = mi_size_wide[cm->sb_size]; @@ -1284,7 +1314,7 @@ return xd->is_mi_coded[av1_get_sdp_idx(xd->tree_type)][bl_offset]; } } -#endif // CONFIG_C043_MVP_IMPROVEMENTS || CONFIG_EXTENDED_WARP_PREDICTION +#endif // CONFIG_MVP_IMPROVEMENT || CONFIG_EXTENDED_WARP_PREDICTION #else static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, int mi_col, int bs) { @@ -1292,9 +1322,9 @@ const int mask_row = mi_row & (sb_mi_size - 1); const int mask_col = mi_col & (sb_mi_size - 1); -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT if (bs > mi_size_wide[BLOCK_64X64]) return 0; -#endif // !CONFIG_C043_MVP_IMPROVEMENTS +#endif // !CONFIG_MVP_IMPROVEMENT // In a split partition all apart from the bottom right has a top right int has_tr = !((mask_row & bs) && (mask_col & bs)); @@ -1340,7 +1370,7 @@ return has_tr; } -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT static int has_bottom_left(const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, int mi_col, int bs) { const int sb_mi_size = mi_size_wide[cm->sb_size]; @@ -1396,10 +1426,10 @@ return has_bl; } -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #endif // CONFIG_EXT_RECUR_PARTITIONS -#if !CONFIG_C063_TMVP_IMPROVEMENT +#if !CONFIG_MVP_IMPROVEMENT static int check_sb_border(const int mi_row, const int mi_col, const int row_offset, const int col_offset) { const int sb_mi_size = mi_size_wide[BLOCK_64X64]; @@ -1412,7 +1442,7 @@ return 1; } -#endif // !CONFIG_C063_TMVP_IMPROVEMENT +#endif // !CONFIG_MVP_IMPROVEMENT static int add_tpl_ref_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, int mi_col, MV_REFERENCE_FRAME ref_frame, @@ -1423,16 +1453,16 @@ #endif // !CONFIG_C076_INTER_MOD_CTX , uint8_t *const refmv_count, -#if CONFIG_C063_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT int *added_tmvp_cnt, -#endif // CONFIG_C063_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE], uint16_t ref_mv_weight[MAX_REF_MV_STACK_SIZE] -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT , MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1 -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if !CONFIG_C076_INTER_MOD_CTX , int16_t *mode_context @@ -1469,9 +1499,17 @@ #endif // CONFIG_TIP const uint16_t weight_unit = 1; // mi_size_wide[BLOCK_8X8]; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int cur_frame_index = cm->cur_frame->display_order_hint; +#else const int cur_frame_index = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC const RefCntBuffer *const buf_0 = get_ref_frame_buf(cm, rf[0]); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int frame0_index = buf_0->display_order_hint; +#else const int frame0_index = buf_0->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC const int cur_offset_0 = get_relative_dist(&cm->seq_params.order_hint_info, cur_frame_index, frame0_index); int idx; @@ -1505,9 +1543,9 @@ } #endif // !CONFIG_C076_INTER_MOD_CTX -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT assert(!xd->mi[0]->skip_mode); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT for (idx = 0; idx < *refmv_count; ++idx) if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break; @@ -1520,16 +1558,23 @@ ref_mv_stack[idx].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[idx].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[idx].cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP ref_mv_weight[idx] = 2 * weight_unit; ++(*refmv_count); -#if CONFIG_C063_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT ++(*added_tmvp_cnt); -#endif // CONFIG_C063_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT } } else { // Process compound inter mode const RefCntBuffer *const buf_1 = get_ref_frame_buf(cm, rf[1]); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int frame1_index = buf_1->display_order_hint; +#else const int frame1_index = buf_1->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC const int cur_offset_1 = get_relative_dist(&cm->seq_params.order_hint_info, cur_frame_index, frame1_index); int_mv comp_refmv; @@ -1554,7 +1599,7 @@ } #endif // !CONFIG_C076_INTER_MOD_CTX -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (xd->mi[0]->skip_mode) { for (idx = 0; idx < *refmv_count; ++idx) { if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int && @@ -1568,16 +1613,19 @@ if (idx == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) { ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int; ref_mv_stack[idx].comp_mv.as_int = comp_refmv.as_int; +#if CONFIG_CWP + ref_mv_stack[idx].cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP ref_frame_idx0[idx] = rf[0]; ref_frame_idx1[idx] = rf[1]; ref_mv_weight[idx] = 2 * weight_unit; ++(*refmv_count); -#if CONFIG_C063_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT ++(*added_tmvp_cnt); -#endif // CONFIG_C063_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT } } else { -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT for (idx = 0; idx < *refmv_count; ++idx) { if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int && comp_refmv.as_int == ref_mv_stack[idx].comp_mv.as_int) @@ -1594,15 +1642,18 @@ ref_mv_stack[idx].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[idx].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[idx].cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP ref_mv_weight[idx] = 2 * weight_unit; ++(*refmv_count); -#if CONFIG_C063_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT ++(*added_tmvp_cnt); -#endif // CONFIG_C063_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT } -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT } return 1; @@ -1691,6 +1742,9 @@ ref_mv_stack[stack_idx].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[stack_idx].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[stack_idx].cwp_idx = candidate->cwp_idx; +#endif // CONFIG_CWP // TODO(jingning): Set an arbitrary small number here. The weight // doesn't matter as long as it is properly initialized. @@ -1736,13 +1790,16 @@ ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[*refmv_count].cwp_idx = cand_mv.cwp_idx; +#endif // CONFIG_CWP ++*refmv_count; return true; } #endif // CONFIG_REF_MV_BANK -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT // Add a BV candidate to ref MV stack without duplicate check static AOM_INLINE bool add_to_ref_bv_list(CANDIDATE_MV cand_mv, CANDIDATE_MV *ref_mv_stack, @@ -1750,20 +1807,23 @@ uint8_t *refmv_count) { ref_mv_stack[*refmv_count] = cand_mv; ref_mv_weight[*refmv_count] = REF_CAT_LEVEL; +#if CONFIG_CWP + ref_mv_stack[*refmv_count].cwp_idx = cand_mv.cwp_idx; +#endif // CONFIG_CWP ++*refmv_count; return true; } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT static AOM_INLINE void setup_ref_mv_list( const AV1_COMMON *cm, const MACROBLOCKD *xd, MV_REFERENCE_FRAME ref_frame, uint8_t *const refmv_count, CANDIDATE_MV ref_mv_stack[MAX_REF_MV_STACK_SIZE], uint16_t ref_mv_weight[MAX_REF_MV_STACK_SIZE], -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT MV_REFERENCE_FRAME *ref_frame_idx0, MV_REFERENCE_FRAME *ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT int_mv mv_ref_list[MAX_MV_REF_CANDIDATES], int_mv *gm_mv_candidates, int mi_row, int mi_col #if !CONFIG_C076_INTER_MOD_CTX @@ -1779,15 +1839,15 @@ ) { #if CONFIG_EXT_RECUR_PARTITIONS const int has_tr = has_top_right(cm, xd, mi_row, mi_col, xd->width); -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT const int has_bl = has_bottom_left(cm, xd, mi_row, mi_col, xd->height); -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #else const int bs = AOMMAX(xd->width, xd->height); const int has_tr = has_top_right(cm, xd, mi_row, mi_col, bs); -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT const int has_bl = has_bottom_left(cm, xd, mi_row, mi_col, bs); -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #endif // CONFIG_EXT_RECUR_PARTITIONS MV_REFERENCE_FRAME rf[2]; @@ -1795,11 +1855,10 @@ int max_row_offset = 0, max_col_offset = 0; const int row_adj = (xd->height < mi_size_high[BLOCK_8X8]) && (mi_row & 0x01); const int col_adj = (xd->width < mi_size_wide[BLOCK_8X8]) && (mi_col & 0x01); - // both CONFIG_SMVP_IMPROVEMENT and CONFIG_C043_MVP_IMPROVEMENTS are ture - // case, processed_rows does not needed -#if !(CONFIG_SMVP_IMPROVEMENT && CONFIG_C043_MVP_IMPROVEMENTS) + // when CONFIG_MVP_IMPROVEMENT is true, processed_rows does not needed +#if !CONFIG_MVP_IMPROVEMENT int processed_rows = 0; -#endif // !(CONFIG_SMVP_IMPROVEMENT && CONFIG_C043_MVP_IMPROVEMENTS) +#endif // !CONFIG_MVP_IMPROVEMENT int processed_cols = 0; av1_set_ref_frame(rf, ref_frame); @@ -1812,16 +1871,42 @@ for (int k = 0; k < MAX_REF_MV_STACK_SIZE; k++) { ref_mv_stack[k].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[k].col_offset = OFFSET_NONSPATIAL; +#if CONFIG_CWP + ref_mv_stack[k].cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP } #endif +#if CONFIG_WARP_REF_LIST && CONFIG_CWG_D067_IMPROVED_WARP + // derive a warp model from the 3 corner MVs + if (warp_param_stack && valid_num_warp_candidates && + *valid_num_warp_candidates < max_num_of_warp_candidates) { + int mvs_32[2 * 3]; + int pts[2 * 3]; + int np = 0; + WarpedMotionParams cand_warp_param = default_warp_params; + const int valid_points = + generate_points_from_corners(xd, pts, mvs_32, &np, ref_frame); + const int valid_model = + get_model_from_corner_mvs(&cand_warp_param, pts, valid_points, mvs_32, + xd->mi[0]->sb_type[PLANE_TYPE_Y]); + if (valid_model && !cand_warp_param.invalid && + !is_this_param_already_in_list(*valid_num_warp_candidates, + warp_param_stack, cand_warp_param)) { + insert_neighbor_warp_candidate(warp_param_stack, &cand_warp_param, + *valid_num_warp_candidates, PROJ_SPATIAL); + (*valid_num_warp_candidates)++; + } + } +#endif // CONFIG_WARP_REF_LIST && CONFIG_CWG_D067_IMPROVED_WARP + // Find valid maximum row/col offset. if (xd->up_available) { -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT max_row_offset = -(MVREF_ROWS << 1) + row_adj; #else max_row_offset = -(MVREF_ROW_COLS << 1) + row_adj; -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT if (xd->height < mi_size_high[BLOCK_8X8]) max_row_offset = -(2 << 1) + row_adj; @@ -1830,11 +1915,11 @@ } if (xd->left_available) { -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT max_col_offset = -(MVREF_COLS << 1) + col_adj; #else max_col_offset = -(MVREF_ROW_COLS << 1) + col_adj; -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT if (xd->width < mi_size_wide[BLOCK_8X8]) max_col_offset = -(2 << 1) + col_adj; @@ -1846,26 +1931,26 @@ uint8_t row_match_count = 0; uint8_t newmv_count = 0; -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT SINGLE_MV_CANDIDATE single_mv[MAX_REF_MV_STACK_SIZE]; uint8_t single_mv_count = 0; CANDIDATE_MV derived_mv_stack[MAX_REF_MV_STACK_SIZE]; uint16_t derived_mv_weight[MAX_REF_MV_STACK_SIZE]; uint8_t derived_mv_count = 0; -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT if (xd->left_available) { scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, (xd->height - 1), -1, ref_mv_stack, ref_mv_weight, &col_match_count, &newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -1878,13 +1963,13 @@ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, (xd->width - 1), ref_mv_stack, ref_mv_weight, &row_match_count, &newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -1895,13 +1980,13 @@ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, 0, -1, ref_mv_stack, ref_mv_weight, &col_match_count, &newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -1914,13 +1999,13 @@ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, 0, ref_mv_stack, ref_mv_weight, &row_match_count, &newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -1931,13 +2016,13 @@ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, xd->height, -1, ref_mv_stack, ref_mv_weight, &col_match_count, &newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -1948,13 +2033,13 @@ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, xd->width, ref_mv_stack, ref_mv_weight, &row_match_count, &newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -1967,13 +2052,13 @@ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, -1, ref_mv_stack, ref_mv_weight, &dummy_ref_match_count, &dummy_new_mv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -1984,13 +2069,13 @@ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, (xd->height >> 1), -1, ref_mv_stack, ref_mv_weight, &col_match_count, &newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -2003,13 +2088,13 @@ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, (xd->width >> 1), ref_mv_stack, ref_mv_weight, &row_match_count, &newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -2026,13 +2111,13 @@ mi_col, rf, -1, ref_mv_stack, ref_mv_weight, refmv_count, &row_match_count, &newmv_count, gm_mv_candidates, max_row_offset, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -2048,13 +2133,13 @@ rf, -1, ref_mv_stack, ref_mv_weight, refmv_count, &col_match_count, &newmv_count, gm_mv_candidates, max_col_offset, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -2066,19 +2151,19 @@ scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, xd->width, ref_mv_stack, ref_mv_weight, &row_match_count, &newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 1, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, #endif // CONFIG_WARP_REF_LIST refmv_count); -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if !CONFIG_C076_INTER_MOD_CTX const uint8_t nearest_match = (row_match_count > 0) + (col_match_count > 0); @@ -2098,13 +2183,13 @@ #if !CONFIG_C076_INTER_MOD_CTX int is_available = 0; #endif //! CONFIG_C076_INTER_MOD_CTX -#if !CONFIG_C063_TMVP_IMPROVEMENT +#if !CONFIG_MVP_IMPROVEMENT const int voffset = AOMMAX(mi_size_high[BLOCK_8X8], xd->height); const int hoffset = AOMMAX(mi_size_wide[BLOCK_8X8], xd->width); -#endif // !CONFIG_C063_TMVP_IMPROVEMENT +#endif // !CONFIG_MVP_IMPROVEMENT const int blk_row_end = AOMMIN(xd->height, mi_size_high[BLOCK_64X64]); const int blk_col_end = AOMMIN(xd->width, mi_size_wide[BLOCK_64X64]); -#if !CONFIG_C063_TMVP_IMPROVEMENT +#if !CONFIG_MVP_IMPROVEMENT const int tpl_sample_pos[3][2] = { { voffset, -2 }, { voffset, hoffset }, @@ -2114,7 +2199,7 @@ (xd->height < mi_size_high[BLOCK_64X64]) && (xd->width >= mi_size_wide[BLOCK_8X8]) && (xd->width < mi_size_wide[BLOCK_64X64]); -#endif // !CONFIG_C063_TMVP_IMPROVEMENT +#endif // !CONFIG_MVP_IMPROVEMENT const int step_h = (xd->height >= mi_size_high[BLOCK_64X64]) ? mi_size_high[BLOCK_16X16] @@ -2123,11 +2208,11 @@ ? mi_size_wide[BLOCK_16X16] : mi_size_wide[BLOCK_8X8]; -#if CONFIG_C063_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT int added_tmvp_cnt = 0; -#endif // CONFIG_C063_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT -#if CONFIG_C063_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT // Use reversed horizontal scan order to check TMVP candidates for (int blk_row = blk_row_end - step_h; blk_row >= 0; blk_row -= step_h) { for (int blk_col = blk_col_end - step_w; blk_col >= 0; @@ -2136,7 +2221,7 @@ #else for (int blk_row = 0; blk_row < blk_row_end; blk_row += step_h) { for (int blk_col = 0; blk_col < blk_col_end; blk_col += step_w) { -#endif // CONFIG_C063_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if !CONFIG_C076_INTER_MOD_CTX int ret = #endif //! CONFIG_C076_INTER_MOD_CTX @@ -2147,25 +2232,25 @@ #endif //! CONFIG_C076_INTER_MOD_CTX , refmv_count, -#if CONFIG_C063_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT &added_tmvp_cnt, -#endif // CONFIG_C063_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT ref_mv_stack, ref_mv_weight -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT , ref_frame_idx0, ref_frame_idx1 -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if !CONFIG_C076_INTER_MOD_CTX , mode_context #endif // !CONFIG_C076_INTER_MOD_CTX ); #if !CONFIG_C076_INTER_MOD_CTX -#if CONFIG_C063_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT if (added_tmvp_cnt) is_available = ret; #else if (blk_row == 0 && blk_col == 0) is_available = ret; -#endif // CONFIG_C063_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #endif //! CONFIG_C076_INTER_MOD_CTX } } @@ -2173,7 +2258,7 @@ #if !CONFIG_C076_INTER_MOD_CTX if (is_available == 0) mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET); #endif //! CONFIG_C076_INTER_MOD_CTX -#if !CONFIG_C063_TMVP_IMPROVEMENT +#if !CONFIG_MVP_IMPROVEMENT for (int i = 0; i < 3 && allow_extension; ++i) { const int blk_row = tpl_sample_pos[i][0]; const int blk_col = tpl_sample_pos[i][1]; @@ -2184,39 +2269,39 @@ gm_mv_candidates, #endif //! CONFIG_C076_INTER_MOD_CTX refmv_count, ref_mv_stack, ref_mv_weight, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1 -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if !CONFIG_C076_INTER_MOD_CTX mode_context #endif //! CONFIG_C076_INTER_MOD_CTX ); } -#endif // !CONFIG_C063_TMVP_IMPROVEMENT +#endif // !CONFIG_MVP_IMPROVEMENT } uint8_t dummy_newmv_count = 0; -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT // Scan the second outer area. scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, -1, ref_mv_stack, ref_mv_weight, &row_match_count, &dummy_newmv_count, gm_mv_candidates, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX -#if CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_MVP_IMPROVEMENT 0, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, #endif // CONFIG_WARP_REF_LIST refmv_count); -#endif // !CONFIG_C043_MVP_IMPROVEMENTS +#endif // !CONFIG_MVP_IMPROVEMENT -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT for (int idx = 2; idx <= MVREF_COLS; ++idx) { const int col_offset = -(idx << 1) + 1 + col_adj; if (abs(col_offset) <= abs(max_col_offset) && @@ -2228,9 +2313,9 @@ rf, col_offset, ref_mv_stack, ref_mv_weight, refmv_count, &col_match_count, &dummy_newmv_count, gm_mv_candidates, max_col_offset, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT 0, single_mv, &single_mv_count, derived_mv_stack, derived_mv_weight, &derived_mv_count, #if CONFIG_WARP_REF_LIST @@ -2254,9 +2339,9 @@ mi_col, rf, row_offset, ref_mv_stack, ref_mv_weight, refmv_count, &row_match_count, &dummy_newmv_count, gm_mv_candidates, max_row_offset, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -2273,9 +2358,9 @@ rf, col_offset, ref_mv_stack, ref_mv_weight, refmv_count, &col_match_count, &dummy_newmv_count, gm_mv_candidates, max_col_offset, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_WARP_REF_LIST warp_param_stack, max_num_of_warp_candidates, valid_num_warp_candidates, ref_frame, @@ -2283,10 +2368,10 @@ &processed_cols); } -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if !CONFIG_C076_INTER_MOD_CTX -#if CONFIG_COMPLEXITY_SCALABLE_MVP +#if CONFIG_MVP_IMPROVEMENT // These contexts are independent of the outer area search int new_ctx = 2 * nearest_match + (newmv_count > 0); int ref_ctx = 2 * nearest_match + (newmv_count < 3); @@ -2335,7 +2420,7 @@ ref_mv_stack[idx] = tmp_mv; ref_mv_weight[idx - 1] = ref_mv_weight[idx]; ref_mv_weight[idx] = tmp_ref_mv_weight; -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (xd->mi[0]->skip_mode) { const MV_REFERENCE_FRAME temp_ref0 = ref_frame_idx0[idx - 1]; const MV_REFERENCE_FRAME temp_ref1 = ref_frame_idx1[idx - 1]; @@ -2345,14 +2430,14 @@ ref_frame_idx1[idx - 1] = ref_frame_idx1[idx]; ref_frame_idx1[idx] = temp_ref1; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT nr_len = idx; } } len = nr_len; } -#if !CONFIG_COMPLEXITY_SCALABLE_MVP +#if !CONFIG_MVP_IMPROVEMENT len = *refmv_count; while (len > nearest_refmv_count) { int nr_len = nearest_refmv_count; @@ -2371,15 +2456,16 @@ } #endif -#if (CONFIG_REF_MV_BANK && CONFIG_C043_MVP_IMPROVEMENTS) +#if (CONFIG_REF_MV_BANK && CONFIG_MVP_IMPROVEMENT) if (cm->seq_params.enable_refmvbank) { const int ref_mv_limit = AOMMIN(cm->features.max_drl_bits + 1, MAX_REF_MV_STACK_SIZE); + // If open slots are available, fetch reference MVs from the ref mv banks. if (*refmv_count < ref_mv_limit -#if !CONFIG_BVP_IMPROVEMENT +#if !CONFIG_IBC_BV_IMPROVEMENT && ref_frame != INTRA_FRAME -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT ) { const REF_MV_BANK *ref_mv_bank = &xd->ref_mv_bank; const CANDIDATE_MV *queue = ref_mv_bank->rmb_buffer[ref_frame]; @@ -2393,40 +2479,40 @@ ++idx_bank) { const int idx = (start_idx + count - 1 - idx_bank) % REF_MV_BANK_SIZE; const CANDIDATE_MV cand_mv = queue[idx]; -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT bool rmb_candi_exist = -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT check_rmb_cand(cand_mv, ref_mv_stack, ref_mv_weight, refmv_count, is_comp, xd->mi_row, xd->mi_col, block_width, block_height, cm->width, cm->height); -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (xd->mi[0]->skip_mode && rmb_candi_exist) { ref_frame_idx0[*refmv_count - 1] = rf[0]; ref_frame_idx1[*refmv_count - 1] = rf[1]; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT } } } -#endif // (CONFIG_REF_MV_BANK && CONFIG_C043_MVP_IMPROVEMENTS) +#endif // (CONFIG_REF_MV_BANK && CONFIG_MVP_IMPROVEMENT) -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT const int max_ref_mv_count = AOMMIN(cm->features.max_drl_bits + 1, MAX_REF_MV_STACK_SIZE); -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (xd->mi[0]->skip_mode) derived_mv_count = 0; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT if (*refmv_count < max_ref_mv_count && derived_mv_count > 0) { fill_mvp_from_derived_smvp(rf, ref_mv_stack, ref_mv_weight, refmv_count, derived_mv_stack, derived_mv_count, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT xd->mi[0], ref_frame_idx0, ref_frame_idx1, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT max_ref_mv_count); } -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->width); mi_width = AOMMIN(mi_width, cm->mi_params.mi_cols - mi_col); @@ -2495,6 +2581,9 @@ ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[*refmv_count].cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP } else { ref_mv_stack[*refmv_count].this_mv = comp_list[0][0]; ref_mv_stack[*refmv_count].comp_mv = comp_list[0][1]; @@ -2502,13 +2591,16 @@ ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[*refmv_count].cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP } -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (xd->mi[0]->skip_mode) { ref_frame_idx0[*refmv_count] = rf[0]; ref_frame_idx1[*refmv_count] = rf[1]; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT ref_mv_weight[*refmv_count] = 2; ++*refmv_count; } else { @@ -2519,12 +2611,15 @@ ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_CWP + ref_mv_stack[*refmv_count].cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP +#if CONFIG_SKIP_MODE_ENHANCEMENT if (xd->mi[0]->skip_mode) { ref_frame_idx0[*refmv_count] = rf[0]; ref_frame_idx1[*refmv_count] = rf[1]; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT ref_mv_weight[*refmv_count] = 2; ++*refmv_count; } @@ -2541,9 +2636,9 @@ } } else { // Handle single reference frame extension -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT assert(!xd->mi[0]->skip_mode); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_IBC_SR_EXT if (!xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART]) { #endif // CONFIG_IBC_SR_EXT @@ -2597,9 +2692,9 @@ // If there is extra space in the stack, copy the GLOBALMV vector into it. // This also guarantees the existence of at least one vector to search. if (*refmv_count < MAX_REF_MV_STACK_SIZE -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT && !xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART] -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT ) { int stack_idx; for (stack_idx = 0; stack_idx < *refmv_count; ++stack_idx) { @@ -2613,20 +2708,23 @@ ref_mv_stack[*refmv_count].row_offset = OFFSET_NONSPATIAL; ref_mv_stack[*refmv_count].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + ref_mv_stack[*refmv_count].cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP ref_mv_weight[*refmv_count] = REF_CAT_LEVEL; (*refmv_count)++; } } } -#if CONFIG_REF_MV_BANK && !CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_REF_MV_BANK && !CONFIG_MVP_IMPROVEMENT if (!cm->seq_params.enable_refmvbank) return; const int ref_mv_limit = AOMMIN(cm->features.max_drl_bits + 1, MAX_REF_MV_STACK_SIZE); // If open slots are available, fetch reference MVs from the ref mv banks. if (*refmv_count < ref_mv_limit -#if !CONFIG_BVP_IMPROVEMENT +#if !CONFIG_IBC_BV_IMPROVEMENT && ref_frame != INTRA_FRAME -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT ) { const REF_MV_BANK *ref_mv_bank = xd->ref_mv_bank_pt; const CANDIDATE_MV *queue = ref_mv_bank->rmb_buffer[ref_frame]; @@ -2640,21 +2738,21 @@ ++idx_bank) { const int idx = (start_idx + count - 1 - idx_bank) % REF_MV_BANK_SIZE; const CANDIDATE_MV cand_mv = queue[idx]; -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT bool rmb_candi_exist = -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT check_rmb_cand(cand_mv, ref_mv_stack, ref_mv_weight, refmv_count, is_comp, xd->mi_row, xd->mi_col, block_width, block_height, cm->width, cm->height); -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (xd->mi[0]->skip_mode && rmb_candi_exist) { ref_frame_idx0[*refmv_count - 1] = rf[0]; ref_frame_idx1[*refmv_count - 1] = rf[1]; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT } } -#endif // CONFIG_REF_MV_BANK && !CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_REF_MV_BANK && !CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST if (warp_param_stack && valid_num_warp_candidates && @@ -2715,7 +2813,7 @@ #endif // CONFIG_WARP_REF_LIST -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT // If there are open slots in reference BV candidate list // fetch reference BVs from the default BVPs if (xd->mi[0]->use_intrabc[xd->tree_type == CHROMA_PART]) { @@ -2740,7 +2838,7 @@ add_to_ref_bv_list(tmp_mv, ref_mv_stack, ref_mv_weight, refmv_count); } } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT } #if CONFIG_WARP_REF_LIST @@ -2870,6 +2968,10 @@ bool derive_wrl = (warp_param_stack && valid_num_warp_candidates && max_num_of_warp_candidates); derive_wrl &= (ref_frame < INTER_REFS_PER_FRAME); +#if CONFIG_SEP_COMP_DRL + if (has_second_drl(mi)) derive_wrl = 0; +#endif // CONFIG_SEP_COMP_DRL + derive_wrl &= is_motion_variation_allowed_bsize(mi->sb_type[PLANE_TYPE_Y], mi_row, mi_col); if (derive_wrl && valid_num_warp_candidates) { @@ -2878,7 +2980,7 @@ } #endif // CONFIG_WARP_REF_LIST -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (mi->skip_mode) { SKIP_MODE_MVP_LIST *skip_list = (SKIP_MODE_MVP_LIST *)&(xd->skip_mvp_candidate_list); @@ -2896,6 +2998,73 @@ #endif // CONFIG_WARP_REF_LIST ); } else { +#if CONFIG_SEP_COMP_DRL + MV_REFERENCE_FRAME rf[2]; + av1_set_ref_frame(rf, ref_frame); + if (!has_second_drl(mi)) + rf[0] = ref_frame; + else { + const BLOCK_SIZE bsize = mi->sb_type[PLANE_TYPE_Y]; +#if CONFIG_FLEX_MVRES + const int fr_mv_precision = cm->features.fr_mv_precision; + gm_mv[0] = get_warp_motion_vector(xd, &cm->global_motion[rf[0]], + fr_mv_precision, bsize, mi_col, mi_row); +#else + gm_mv[0] = get_warp_motion_vector(xd, &cm->global_motion[ref_frame], + allow_high_precision_mv, bsize, mi_col, + mi_row, force_integer_mv); +#endif + gm_mv[1].as_int = 0; + } + setup_ref_mv_list(cm, xd, rf[0], &ref_mv_count[rf[0]], ref_mv_stack[rf[0]], + ref_mv_weight[rf[0]], NULL, NULL, + mv_ref_list ? mv_ref_list[rf[0]] : NULL, gm_mv, mi_row, + mi_col +#if !CONFIG_C076_INTER_MOD_CTX + , + mode_context +#endif //! CONFIG_C076_INTER_MOD_CTX +#if CONFIG_WARP_REF_LIST + , + derive_wrl ? warp_param_stack[rf[0]] : NULL, + derive_wrl ? max_num_of_warp_candidates : 0, + derive_wrl ? &valid_num_warp_candidates[rf[0]] : NULL +#endif // CONFIG_WARP_REF_LIST + ); + + if (has_second_drl(mi)) { + assert(rf[0] == mi->ref_frame[0]); + assert(rf[1] == mi->ref_frame[1]); + const BLOCK_SIZE bsize = mi->sb_type[PLANE_TYPE_Y]; +#if CONFIG_FLEX_MVRES + const int fr_mv_precision = cm->features.fr_mv_precision; + gm_mv[0] = get_warp_motion_vector(xd, &cm->global_motion[rf[1]], + fr_mv_precision, bsize, mi_col, mi_row); +#else + gm_mv[0] = get_warp_motion_vector(xd, &cm->global_motion[ref_frame], + allow_high_precision_mv, bsize, mi_col, + mi_row, force_integer_mv); +#endif + gm_mv[1].as_int = 0; + + setup_ref_mv_list(cm, xd, rf[1], &ref_mv_count[rf[1]], + ref_mv_stack[rf[1]], ref_mv_weight[rf[1]], NULL, NULL, + mv_ref_list ? mv_ref_list[rf[1]] : NULL, gm_mv, mi_row, + mi_col +#if !CONFIG_C076_INTER_MOD_CTX + , + mode_context +#endif //! CONFIG_C076_INTER_MOD_CTX +#if CONFIG_WARP_REF_LIST + , + derive_wrl ? warp_param_stack[rf[1]] : NULL, + derive_wrl ? max_num_of_warp_candidates : 0, + derive_wrl ? &valid_num_warp_candidates[rf[1]] : NULL +#endif // CONFIG_WARP_REF_LIST + ); + } + if (derive_wrl) assert(rf[0] == ref_frame); +#else setup_ref_mv_list(cm, xd, ref_frame, &ref_mv_count[ref_frame], ref_mv_stack[ref_frame], ref_mv_weight[ref_frame], NULL, NULL, mv_ref_list ? mv_ref_list[ref_frame] : NULL, gm_mv, @@ -2912,6 +3081,7 @@ #endif // CONFIG_WARP_REF_LIST ); +#endif // CONFIG_SEP_COMP_DRL } #else setup_ref_mv_list(cm, xd, ref_frame, &ref_mv_count[ref_frame], @@ -2929,7 +3099,7 @@ derive_wrl ? &valid_num_warp_candidates[ref_frame] : NULL #endif // CONFIG_WARP_REF_LIST ); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT } #if CONFIG_FLEX_MVRES @@ -3018,6 +3188,57 @@ } #endif // !CONFIG_TIP +#if CONFIG_MF_IMPROVEMENT +// Get the temporal distance of start_frame to its closest ref frame +// that has interpolation property relative to current frame. Interpolation +// means start_frame and its ref frame are on two sides of current frame +static INLINE int get_dist_to_closest_interp_ref(const AV1_COMMON *const cm, + MV_REFERENCE_FRAME start_frame, + const int find_forward_ref) { + if (start_frame == -1) return INT_MAX; + const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info; + + const RefCntBuffer *const start_frame_buf = + get_ref_frame_buf(cm, start_frame); + + if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return INT_MAX; + +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int start_frame_order_hint = start_frame_buf->display_order_hint; + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else + const int start_frame_order_hint = start_frame_buf->order_hint; + const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + int abs_closest_ref_offset = INT_MAX; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int *const ref_order_hints = + &start_frame_buf->ref_display_order_hint[0]; +#else + const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0]; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + for (MV_REFERENCE_FRAME ref = 0; ref < INTER_REFS_PER_FRAME; ++ref) { + if (ref_order_hints[ref] != -1) { + const int start_to_ref_offset = get_relative_dist( + order_hint_info, start_frame_order_hint, ref_order_hints[ref]); + const int cur_to_ref_offset = get_relative_dist( + order_hint_info, cur_order_hint, ref_order_hints[ref]); + const int abs_start_to_ref_offset = abs(start_to_ref_offset); + const int is_two_sides = + (start_to_ref_offset > 0 && cur_to_ref_offset > 0 && + find_forward_ref == 1) || + (start_to_ref_offset < 0 && cur_to_ref_offset < 0 && + find_forward_ref == 0); + if (is_two_sides && abs_start_to_ref_offset < abs_closest_ref_offset) { + abs_closest_ref_offset = abs_start_to_ref_offset; + } + } + } + + return abs_closest_ref_offset; +} +#endif // CONFIG_MF_IMPROVEMENT + #if CONFIG_TIP // Note: motion_filed_projection finds motion vectors of current frame's // reference frame, and projects them to current frame. To make it clear, @@ -3035,8 +3256,13 @@ get_ref_frame_buf(cm, start_frame); if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int start_frame_order_hint = start_frame_buf->display_order_hint; + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else const int start_frame_order_hint = start_frame_buf->order_hint; const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int start_to_current_frame_offset = get_relative_dist( &cm->seq_params.order_hint_info, start_frame_order_hint, cur_order_hint); @@ -3053,7 +3279,12 @@ assert(start_frame_buf->width == cm->width && start_frame_buf->height == cm->height); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int *const ref_order_hints = + &start_frame_buf->ref_display_order_hint[0]; +#else const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0]; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) { if (ref_order_hints[rf] != -1) { ref_offset[rf] = @@ -3126,8 +3357,13 @@ get_ref_frame_buf(cm, start_frame); if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int start_frame_order_hint = start_frame_buf->display_order_hint; + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else const int start_frame_order_hint = start_frame_buf->order_hint; const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int start_to_current_frame_offset = get_relative_dist( &cm->seq_params.order_hint_info, start_frame_order_hint, cur_order_hint); @@ -3143,7 +3379,12 @@ assert(start_frame_buf->width == cm->width && start_frame_buf->height == cm->height); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int *const ref_order_hints = + &start_frame_buf->ref_display_order_hint[0]; +#else const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0]; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) { if (ref_order_hints[rf] != -1) { ref_offset[rf] = @@ -3206,7 +3447,7 @@ // Call Start frame's reference frames as reference frames. // Call ref_offset as frame distances between start frame and its reference // frames. -#if CONFIG_TMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT static int motion_field_projection_bwd(AV1_COMMON *cm, MV_REFERENCE_FRAME start_frame, int dir, int overwrite_mv) { @@ -3217,15 +3458,25 @@ get_ref_frame_buf(cm, start_frame); if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int start_frame_order_hint = start_frame_buf->display_order_hint; + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else const int start_frame_order_hint = start_frame_buf->order_hint; const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int start_to_current_frame_offset = get_relative_dist( &cm->seq_params.order_hint_info, start_frame_order_hint, cur_order_hint); assert(start_frame_buf->width == cm->width && start_frame_buf->height == cm->height); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int *const ref_order_hints = + &start_frame_buf->ref_display_order_hint[0]; +#else const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0]; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) { if (ref_order_hints[rf] != -1) ref_offset[rf] = @@ -3280,7 +3531,7 @@ return 1; } -#endif // CONFIG_TMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT static int motion_field_projection(AV1_COMMON *cm, MV_REFERENCE_FRAME start_frame, int dir, @@ -3292,15 +3543,25 @@ get_ref_frame_buf(cm, start_frame); if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int start_frame_order_hint = start_frame_buf->display_order_hint; + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else const int start_frame_order_hint = start_frame_buf->order_hint; const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int start_to_current_frame_offset = get_relative_dist( &cm->seq_params.order_hint_info, start_frame_order_hint, cur_order_hint); assert(start_frame_buf->width == cm->width && start_frame_buf->height == cm->height); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int *const ref_order_hints = + &start_frame_buf->ref_display_order_hint[0]; +#else const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0]; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) { if (ref_order_hints[rf] != -1) ref_offset[rf] = @@ -3360,6 +3621,16 @@ if (!order_hint_info->enable_order_hint) return -1; const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref); if (buf == NULL) return -1; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref_order_hint = buf->display_order_hint; + for (int r = 0; r < INTER_REFS_PER_FRAME; ++r) { + if (buf->ref_display_order_hint[r] == -1) continue; + const int ref_ref_order_hint = buf->ref_display_order_hint[r]; + if (get_relative_dist(order_hint_info, ref_order_hint, + ref_ref_order_hint) == 0) + return 1; + } +#else const int ref_order_hint = buf->order_hint; for (int r = 0; r < INTER_REFS_PER_FRAME; ++r) { if (buf->ref_order_hints[r] == -1) continue; @@ -3368,6 +3639,7 @@ ref_ref_order_hint) == 0) return 1; } +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC return 0; } @@ -3405,33 +3677,51 @@ cm->ref_frame_side[ref_frame] = 0; const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame); ref_buf[ref_frame] = buf; -#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD +#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int relative_dist = + get_relative_dist(order_hint_info, buf->display_order_hint, + cm->cur_frame->display_order_hint); +#else const int relative_dist = get_relative_dist( order_hint_info, buf->order_hint, cm->cur_frame->order_hint); +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC cm->ref_frame_relative_dist[ref_frame] = abs(relative_dist); -#endif // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD +#endif // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD } for (int index = 0; index < cm->ref_frames_info.num_future_refs; index++) { const int ref_frame = cm->ref_frames_info.future_refs[index]; cm->ref_frame_side[ref_frame] = 1; const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame); ref_buf[ref_frame] = buf; -#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD +#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int relative_dist = + get_relative_dist(order_hint_info, buf->display_order_hint, + cm->cur_frame->display_order_hint); +#else const int relative_dist = get_relative_dist( order_hint_info, buf->order_hint, cm->cur_frame->order_hint); +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC cm->ref_frame_relative_dist[ref_frame] = abs(relative_dist); -#endif // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD +#endif // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD } for (int index = 0; index < cm->ref_frames_info.num_cur_refs; index++) { const int ref_frame = cm->ref_frames_info.cur_refs[index]; cm->ref_frame_side[ref_frame] = -1; const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame); ref_buf[ref_frame] = buf; -#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD +#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int relative_dist = + get_relative_dist(order_hint_info, buf->display_order_hint, + cm->cur_frame->display_order_hint); +#else const int relative_dist = get_relative_dist( order_hint_info, buf->order_hint, cm->cur_frame->order_hint); +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC cm->ref_frame_relative_dist[ref_frame] = abs(relative_dist); -#endif // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD +#endif // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD } #if CONFIG_TIP @@ -3468,7 +3758,51 @@ closest_ref[dir][1] = ref_frame; } } -#if CONFIG_TMVP_IMPROVEMENT || CONFIG_TIP +#if CONFIG_MVP_IMPROVEMENT || CONFIG_TIP +#if CONFIG_MF_IMPROVEMENT + // Do projection on group 0 (closest past (backward MV), closest future), + // group 1(second closest future, second closest past (backward MV)), + // closest past (forward MV), and then second closest past (forward MVs), + // without overwriting the MVs. + // The projection order of the ref frames in group 0 and group 1 depends + // on the ref frame to its own first ref frame that has interpolation + // property relative to current frame. Interpolation means two frames are on + // two sides of current frame + for (int group_idx = 0; group_idx < 2; ++group_idx) { + const int past_ref_to_its_ref_dist = + get_dist_to_closest_interp_ref(cm, closest_ref[0][group_idx], 0); + const int future_ref_to_its_ref_dist = + get_dist_to_closest_interp_ref(cm, closest_ref[1][group_idx], 1); + if (future_ref_to_its_ref_dist < past_ref_to_its_ref_dist) { + if (closest_ref[1][group_idx] != -1 && n_refs_used < MFMV_STACK_SIZE) { + n_refs_used += + motion_field_projection(cm, closest_ref[1][group_idx], 0, 0); + } + + if (closest_ref[0][group_idx] != -1 && n_refs_used < MFMV_STACK_SIZE) { + n_refs_used += + motion_field_projection_bwd(cm, closest_ref[0][group_idx], 2, 0); + } + } else { + if (closest_ref[0][group_idx] != -1 && n_refs_used < MFMV_STACK_SIZE) { + n_refs_used += + motion_field_projection_bwd(cm, closest_ref[0][group_idx], 2, 0); + } + if (closest_ref[1][group_idx] != -1 && n_refs_used < MFMV_STACK_SIZE) { + n_refs_used += + motion_field_projection(cm, closest_ref[1][group_idx], 0, 0); + } + } + } + + if (closest_ref[0][0] != -1 && n_refs_used < MFMV_STACK_SIZE) { + n_refs_used += motion_field_projection(cm, closest_ref[0][0], 2, 0); + } + + if (closest_ref[0][1] != -1 && n_refs_used < MFMV_STACK_SIZE) { + motion_field_projection(cm, closest_ref[0][1], 2, 0); + } +#else // Do projection on closest past (backward MV), closest future, second // closest future, second closest past (backward MV), closest path (forward // MV), and then second closest past (forward MVs), without overwriting @@ -3496,6 +3830,7 @@ if (closest_ref[0][1] != -1 && n_refs_used < MFMV_STACK_SIZE) { motion_field_projection(cm, closest_ref[0][1], 2, 0); } +#endif // CONFIG_MF_IMPROVEMENT #else // Do projection on closest past and future refs if they exist if (closest_ref[0][0] != -1) { @@ -3516,24 +3851,32 @@ const int ret = motion_field_projection(cm, closest_ref[0][1], 2, 1); n_refs_used += ret; } -#endif // CONFIG_TMVP_IMPROVEMENT || CONFIG_TIP +#endif // CONFIG_MVP_IMPROVEMENT || CONFIG_TIP } -#if CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD +#if CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD void av1_setup_ref_frame_sides(AV1_COMMON *cm) { const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info; memset(cm->ref_frame_side, 0, sizeof(cm->ref_frame_side)); if (!order_hint_info->enable_order_hint) return; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC for (int ref_frame = 0; ref_frame < cm->ref_frames_info.num_total_refs; ref_frame++) { const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame); int order_hint = 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + if (buf != NULL) order_hint = buf->display_order_hint; +#else if (buf != NULL) order_hint = buf->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC const int relative_dist = get_relative_dist(order_hint_info, order_hint, cur_order_hint); if (relative_dist > 0) { @@ -3544,7 +3887,7 @@ cm->ref_frame_relative_dist[ref_frame] = abs(relative_dist); } } -#endif // CONFIG_SMVP_IMPROVEMENT || CONFIG_JOINT_MVD +#endif // CONFIG_MVP_IMPROVEMENT || CONFIG_JOINT_MVD static INLINE void record_samples(const MB_MODE_INFO *mbmi, #if CONFIG_COMPOUND_WARP_SAMPLES @@ -3847,7 +4190,11 @@ skip_mode_info->ref_frame_idx_0 = 0; } #else +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int cur_order_hint = cm->current_frame.display_order_hint; +#else const int cur_order_hint = cm->current_frame.order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int ref_order_hints[2] = { -1, INT_MAX }; int ref_idx[2] = { INVALID_IDX, INVALID_IDX }; @@ -3856,7 +4203,11 @@ const RefCntBuffer *const buf = get_ref_frame_buf(cm, i); if (buf == NULL) continue; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref_order_hint = buf->display_order_hint; +#else const int ref_order_hint = buf->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC if (get_relative_dist(order_hint_info, ref_order_hint, cur_order_hint) < 0) { // Forward reference @@ -3896,7 +4247,11 @@ const RefCntBuffer *const buf = get_ref_frame_buf(cm, i); if (buf == NULL) continue; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref_order_hint = buf->display_order_hint; +#else const int ref_order_hint = buf->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC if ((ref_order_hints[0] != -1 && get_relative_dist(order_hint_info, ref_order_hint, ref_order_hints[0]) < 0) && @@ -3966,6 +4321,9 @@ const int idx = (start_idx + count) % REF_MV_BANK_SIZE; queue[idx].this_mv = mbmi->mv[0]; if (is_comp) queue[idx].comp_mv = mbmi->mv[1]; +#if CONFIG_CWP + queue[idx].cwp_idx = mbmi->cwp_idx; +#endif // CONFIG_CWP if (count < REF_MV_BANK_SIZE) { ++ref_mv_bank->rmb_count[ref_frame]; } else { @@ -4011,8 +4369,8 @@ clamp(mv_row, MV_LOW + 1, MV_UPP - 1); submi[mi_y * mi_stride + mi_x]->mv[0].as_mv.col = clamp(mv_col, MV_LOW + 1, MV_UPP - 1); - span_submv(cm, (submi + mi_y * mi_stride + mi_x), mi_row, mi_col, - BLOCK_8X8); + span_submv(cm, (submi + mi_y * mi_stride + mi_x), mi_row + mi_y, + mi_col + mi_x, BLOCK_8X8); } } } @@ -4136,7 +4494,14 @@ num_wrl_cand * sizeof(wrl_list[0])); if (p_valid_num_candidates) { // for NEARMV mode, the maximum number of candidates is 1 - *p_valid_num_candidates = (mbmi->mode == NEARMV) ? 1 : num_wrl_cand; + *p_valid_num_candidates = (mbmi->mode == NEARMV +#if CONFIG_CWG_D067_IMPROVED_WARP + || mbmi->mode == AMVDNEWMV +#endif // CONFIG_CWG_D067_IMPROVED_WARP + + ) + ? 1 + : num_wrl_cand; } } @@ -4472,3 +4837,98 @@ return ctx; } #endif // CONFIG_WARPMV + +#if CONFIG_CWG_D067_IMPROVED_WARP +// return 1 if valid point is found +// return 0 if the point is not valid +static int fill_warp_corner_projected_point(const MB_MODE_INFO *neighbor_mi, + MV_REFERENCE_FRAME this_ref, + const int pos_col, + const int pos_row, int *pts, + int *mvs, int *n_points) { + // return if the source point is invalid + if (pos_col < 0 || pos_row < 0) return 0; + + if (!is_inter_ref_frame(neighbor_mi->ref_frame[0])) return 0; + if (neighbor_mi->ref_frame[0] != this_ref) return 0; + int mv_row; + int mv_col; + if (is_warp_mode(neighbor_mi->motion_mode)) { + int_mv warp_mv = + get_warp_motion_vector_xy_pos(&neighbor_mi->wm_params[0], pos_col, + pos_row, MV_PRECISION_ONE_EIGHTH_PEL); + mv_row = warp_mv.as_mv.row; + mv_col = warp_mv.as_mv.col; + } else { + mv_row = neighbor_mi->mv[0].as_mv.row; + mv_col = neighbor_mi->mv[0].as_mv.col; + } + pts[2 * (*n_points)] = pos_col; + pts[2 * (*n_points) + 1] = pos_row; + mvs[2 * (*n_points)] = mv_col; + mvs[2 * (*n_points) + 1] = mv_row; + ++(*n_points); + return 1; +} +// Check all 3 neighbors to generate projected points +int generate_points_from_corners(const MACROBLOCKD *xd, int *pts, int *mvs, + int *np, MV_REFERENCE_FRAME ref_frame) { + const TileInfo *const tile = &xd->tile; + POSITION mi_pos; + int valid_points = 0; + MV_REFERENCE_FRAME rf[2]; + av1_set_ref_frame(rf, ref_frame); + MV_REFERENCE_FRAME this_ref = rf[0]; + const int bw = xd->width * MI_SIZE; + const int bh = xd->height * MI_SIZE; + + // top-left + mi_pos.row = -1; + mi_pos.col = -1; + if (is_inside(tile, xd->mi_col, xd->mi_row, &mi_pos) && xd->up_available && + xd->left_available) { + const MB_MODE_INFO *neighbor_mi = + xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col]; + int pos_row = xd->mi_row * MI_SIZE; + int pos_col = xd->mi_col * MI_SIZE; + int valid = fill_warp_corner_projected_point(neighbor_mi, this_ref, pos_col, + pos_row, pts, mvs, np); + if (valid) { + valid_points++; + } + } + + // top-right + mi_pos.row = -1; + mi_pos.col = xd->width - 1; + if (is_inside(tile, xd->mi_col, xd->mi_row, &mi_pos) && xd->up_available) { + const MB_MODE_INFO *neighbor_mi = + xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col]; + int pos_row = xd->mi_row * MI_SIZE; + int pos_col = xd->mi_col * MI_SIZE + bw; + int valid = fill_warp_corner_projected_point(neighbor_mi, this_ref, pos_col, + pos_row, pts, mvs, np); + if (valid) { + valid_points++; + } + } + + // bottom-left + mi_pos.row = xd->height - 1; + mi_pos.col = -1; + if (is_inside(tile, xd->mi_col, xd->mi_row, &mi_pos) && xd->left_available) { + const MB_MODE_INFO *neighbor_mi = + xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col]; + int pos_row = xd->mi_row * MI_SIZE + bh; + int pos_col = xd->mi_col * MI_SIZE; + int valid = fill_warp_corner_projected_point(neighbor_mi, this_ref, pos_col, + pos_row, pts, mvs, np); + if (valid) { + valid_points++; + } + } + + assert(valid_points <= 3); + return valid_points; +} +#endif // CONFIG_CWG_D067_IMPROVED_WARP
diff --git a/av1/common/mvref_common.h b/av1/common/mvref_common.h index 831869a..41f3452 100644 --- a/av1/common/mvref_common.h +++ b/av1/common/mvref_common.h
@@ -22,12 +22,12 @@ extern "C" { #endif -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT #define MVREF_ROWS 1 #define MVREF_COLS 3 #else #define MVREF_ROW_COLS 3 -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT // Set the upper limit of the motion vector component magnitude. // This would make a motion vector fit in 26 bits. Plus 3 bits for the @@ -48,14 +48,26 @@ static AOM_INLINE int get_block_position(AV1_COMMON *cm, int *mi_r, int *mi_c, int blk_row, int blk_col, MV mv, int sign_bias) { +#if CONFIG_MF_IMPROVEMENT + const SequenceHeader *const seq_params = &cm->seq_params; + const int sb_size = block_size_high[seq_params->sb_size]; + const int mf_sb_size_log2 = (sb_size <= 64 ? mi_size_high_log2[BLOCK_64X64] + : seq_params->mib_size_log2) + + MI_SIZE_LOG2; + const int mf_sb_size = (1 << mf_sb_size_log2); + const int sb_tmvp_size = (mf_sb_size >> TMVP_MI_SZ_LOG2); + const int sb_tmvp_size_log2 = mf_sb_size_log2 - TMVP_MI_SZ_LOG2; + const int base_blk_row = (blk_row >> sb_tmvp_size_log2) << sb_tmvp_size_log2; + const int base_blk_col = (blk_col >> sb_tmvp_size_log2) << sb_tmvp_size_log2; +#else const int base_blk_row = (blk_row >> TMVP_MI_SZ_LOG2) << TMVP_MI_SZ_LOG2; const int base_blk_col = (blk_col >> TMVP_MI_SZ_LOG2) << TMVP_MI_SZ_LOG2; +#endif // CONFIG_MF_IMPROVEMENT // The motion vector in units of 1/8-pel const int shift = (3 + TMVP_MI_SZ_LOG2); const int row_offset = (mv.row >= 0) ? (mv.row >> shift) : -((-mv.row) >> shift); - const int col_offset = (mv.col >= 0) ? (mv.col >> shift) : -((-mv.col) >> shift); @@ -68,10 +80,17 @@ col >= (cm->mi_params.mi_cols >> TMVP_SHIFT_BITS)) return 0; +#if CONFIG_MF_IMPROVEMENT + if (row < base_blk_row - MAX_OFFSET_HEIGHT_LOG2 || + row >= base_blk_row + sb_tmvp_size + MAX_OFFSET_HEIGHT_LOG2 || + col < base_blk_col - sb_tmvp_size || + col >= base_blk_col + (sb_tmvp_size << 1)) +#else if (row < base_blk_row - MAX_OFFSET_HEIGHT_LOG2 || row >= base_blk_row + TMVP_MI_SIZE + MAX_OFFSET_HEIGHT_LOG2 || col < base_blk_col - MAX_OFFSET_WIDTH_LOG2 || col >= base_blk_col + TMVP_MI_SIZE + MAX_OFFSET_WIDTH_LOG2) +#endif // CONFIG_MF_IMPROVEMENT return 0; *mi_r = row; @@ -84,15 +103,24 @@ // clamp_mv_ref #define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC +#define DISPLAY_ORDER_HINT_BITS 31 +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + static INLINE int get_relative_dist(const OrderHintInfo *oh, int a, int b) { if (!oh->enable_order_hint) return 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + assert(a >= 0); + assert(b >= 0); + const int bits = DISPLAY_ORDER_HINT_BITS; +#else const int bits = oh->order_hint_bits_minus_1 + 1; assert(bits >= 1); assert(a >= 0 && a < (1 << bits)); assert(b >= 0 && b < (1 << bits)); - +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int diff = a - b; const int m = 1 << (bits - 1); diff = (diff & (m - 1)) - (diff & m); @@ -399,6 +427,31 @@ } } +#if CONFIG_SEP_COMP_DRL +/*!\brief Return ref_mv_idx_type of the current coding block + * conversion of two ref_mv_idx(s) into one value when there are two DRLs */ +static INLINE int av1_ref_mv_idx_type(const MB_MODE_INFO *mbmi, + const int *ref_mv_idx) { + assert(ref_mv_idx[0] < MAX_REF_MV_STACK_SIZE); + assert(ref_mv_idx[1] < MAX_REF_MV_STACK_SIZE); + if (has_second_drl(mbmi)) { + return ref_mv_idx[1] * MAX_REF_MV_STACK_SIZE + ref_mv_idx[0]; + } else { + assert(0 == ref_mv_idx[1]); + return ref_mv_idx[0]; + } +} + +/*!\brief Reset ref_mv_idx(s) based on the ref_mv_idx_type value */ +static INLINE void av1_set_ref_mv_idx(int *ref_mv_idx, int ref_mv_idx_type) { + assert(ref_mv_idx_type >= 0 && + ref_mv_idx_type < MAX_REF_MV_STACK_SIZE * MAX_REF_MV_STACK_SIZE); + ref_mv_idx[1] = ref_mv_idx_type / MAX_REF_MV_STACK_SIZE; + ref_mv_idx[0] = ref_mv_idx_type - ref_mv_idx[1] * MAX_REF_MV_STACK_SIZE; + return; +} +#endif // CONFIG_SEP_COMP_DRL + static INLINE void av1_set_ref_frame(MV_REFERENCE_FRAME *rf, MV_REFERENCE_FRAME ref_frame_type) { if (ref_frame_type == INTRA_FRAME || @@ -496,9 +549,9 @@ void av1_setup_frame_sign_bias(AV1_COMMON *cm); void av1_setup_skip_mode_allowed(AV1_COMMON *cm); void av1_setup_motion_field(AV1_COMMON *cm); -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT void av1_setup_ref_frame_sides(AV1_COMMON *cm); -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT static INLINE void av1_collect_neighbors_ref_counts(MACROBLOCKD *const xd) { av1_zero(xd->neighbors_ref_counts); @@ -760,12 +813,32 @@ // Special case for sub 8x8 chroma cases, to prevent referring to chroma // pixels outside current tile. - if (xd->is_chroma_ref && av1_num_planes(cm) > 1) { - const struct macroblockd_plane *const pd = &xd->plane[1]; - if (bw < 8 && pd->subsampling_x) - if (src_left_edge < tile_left_edge + 4 * SCALE_PX_TO_MV) return 0; - if (bh < 8 && pd->subsampling_y) - if (src_top_edge < tile_top_edge + 4 * SCALE_PX_TO_MV) return 0; + if (!cm->seq_params.enable_sdp || !frame_is_intra_only(cm)) { + if (xd->is_chroma_ref && av1_num_planes(cm) > 1) { + const struct macroblockd_plane *const pd = &xd->plane[1]; +#if CONFIG_EXT_RECUR_PARTITIONS + if (xd->mi && xd->mi[0]) { + const CHROMA_REF_INFO *chroma_ref_info = &xd->mi[0]->chroma_ref_info; + const int src_left_edge_chroma = + chroma_ref_info->mi_col_chroma_base * MI_SIZE * SCALE_PX_TO_MV + + dv.col; + const int src_top_edge_chroma = + chroma_ref_info->mi_row_chroma_base * MI_SIZE * SCALE_PX_TO_MV + + dv.row; + if (bw < 8 && pd->subsampling_x) + if (src_left_edge_chroma < tile_left_edge) return 0; + if (bh < 8 && pd->subsampling_y) + if (src_top_edge_chroma < tile_top_edge) return 0; + } else { +#endif + if (bw < 8 && pd->subsampling_x) + if (src_left_edge < tile_left_edge + 4 * SCALE_PX_TO_MV) return 0; + if (bh < 8 && pd->subsampling_y) + if (src_top_edge < tile_top_edge + 4 * SCALE_PX_TO_MV) return 0; +#if CONFIG_EXT_RECUR_PARTITIONS + } +#endif + } } #if CONFIG_IBC_SR_EXT @@ -1134,6 +1207,13 @@ return 1; } +#if CONFIG_CWG_D067_IMPROVED_WARP +// Check all 3 neighbors to generate projected points +int generate_points_from_corners(const MACROBLOCKD *xd, int *pts, int *mvs, + int *np, MV_REFERENCE_FRAME ref_frame); + +#endif // CONFIG_CWG_D067_IMPROVED_WARP + #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/common/obmc.h b/av1/common/obmc.h index a51820b..dfa5357 100644 --- a/av1/common/obmc.h +++ b/av1/common/obmc.h
@@ -21,12 +21,14 @@ static INLINE void foreach_overlappable_nb_above(const AV1_COMMON *cm, MACROBLOCKD *xd, int nb_max, overlappable_nb_visitor_t fun, - void *fun_ctxt) { + void *fun_ctxt, + bool count_only) { if (!xd->up_available) return; const int num_planes = av1_num_planes(cm); int nb_count = 0; const int mi_col = xd->mi_col; + // prev_row_mi points into the mi array, starting at the beginning of the // previous row. MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride; @@ -35,8 +37,41 @@ for (int above_mi_col = mi_col; above_mi_col < end_col && nb_count < nb_max; above_mi_col += mi_step) { MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col; - mi_step = AOMMIN(mi_size_wide[above_mi[0]->sb_type[PLANE_TYPE_Y]], - mi_size_wide[BLOCK_64X64]); + mi_step = mi_size_wide[above_mi[0]->sb_type[PLANE_TYPE_Y]]; +#if CONFIG_UNEVEN_4WAY + if (count_only) { + // In this case, we may only be parsing without decoding (e.g. in case of + // row-baed multi-threading). Hence, we do not have access to variables + // `above_mi[0]->chroma_ref_info` and `above_mi[0]->mi_col_start`. + // Also, if mi_step = 1, it must be a non-chroma ref block. So, we use + // mi_step = 2. + if (mi_step == 1) { + mi_step = 2; + } + } else { + // If we're considering a block that is NOT a chroma ref: + // - Move above_mi_col back to the base mi col, + // - Set above_mbmi to point at the block with chroma information, and + // - Set mi_step to step over all blocks that the chroma block covers. + const CHROMA_REF_INFO *chroma_ref_info = &above_mi[0]->chroma_ref_info; + if (!chroma_ref_info->is_chroma_ref) { + above_mi_col = chroma_ref_info->mi_col_chroma_base; + mi_step = mi_size_wide[chroma_ref_info->bsize_base]; + if (above_mi_col < mi_col) continue; + above_mi = prev_row_mi + above_mi_col; + assert(above_mi[0]->chroma_ref_info.bsize_base == + chroma_ref_info->bsize_base); + } + // If above block's left boundary is to the left of current block's left + // boundary, we need to find the common overlap. + if (above_mi[0]->mi_col_start < above_mi_col) { + const int extra_cols = above_mi_col - above_mi[0]->mi_col_start; + mi_step -= extra_cols; + assert(mi_step > 0); + } + } +#else + (void)count_only; // If we're considering a block with width 4, it should be treated as // half of a pair of blocks with chroma information in the second. Move // above_mi_col back to the start of the pair if needed, set above_mbmi @@ -47,11 +82,25 @@ above_mi = prev_row_mi + above_mi_col + 1; mi_step = 2; } +#endif // CONFIG_UNEVEN_4WAY + mi_step = AOMMIN(mi_step, mi_size_wide[BLOCK_64X64]); + int overlapped_mi_width = AOMMIN(xd->width, mi_step); +#if CONFIG_UNEVEN_4WAY + if (!IS_POWER_OF_TWO(overlapped_mi_width)) { + assert(!IS_POWER_OF_TWO(mi_step)); + const int mi_step_pow2 = 1 << get_msb(mi_step); + above_mi_col += (mi_step - mi_step_pow2); + mi_step = mi_step_pow2; + overlapped_mi_width = AOMMIN(xd->width, mi_step); + } +#endif // CONFIG_UNEVEN_4WAY + assert(IS_POWER_OF_TWO(overlapped_mi_width)); if (is_neighbor_overlappable(*above_mi, xd->tree_type)) { ++nb_count; - fun(xd, 0, above_mi_col - mi_col, AOMMIN(xd->width, mi_step), 0, - *above_mi, fun_ctxt, num_planes); + assert(above_mi_col >= mi_col); + fun(xd, 0, above_mi_col - mi_col, overlapped_mi_width, 0, *above_mi, + fun_ctxt, num_planes); } } } @@ -73,17 +122,52 @@ for (int left_mi_row = mi_row; left_mi_row < end_row && nb_count < nb_max; left_mi_row += mi_step) { MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride; - mi_step = AOMMIN(mi_size_high[left_mi[0]->sb_type[PLANE_TYPE_Y]], - mi_size_high[BLOCK_64X64]); + mi_step = mi_size_high[left_mi[0]->sb_type[PLANE_TYPE_Y]]; +#if CONFIG_UNEVEN_4WAY + // If we're considering a block that is NOT a chroma ref: + // - Move left_mi_col back to the base mi col, + // - Set left_mbmi to point at the block with chroma information, and + // - Set mi_step to step over all blocks that the chroma block covers. + const CHROMA_REF_INFO *chroma_ref_info = &left_mi[0]->chroma_ref_info; + if (!chroma_ref_info->is_chroma_ref) { + left_mi_row = chroma_ref_info->mi_row_chroma_base; + mi_step = mi_size_high[chroma_ref_info->bsize_base]; + if (left_mi_row < mi_row) continue; + left_mi = prev_col_mi + left_mi_row * xd->mi_stride; + assert(left_mi[0]->chroma_ref_info.bsize_base == + chroma_ref_info->bsize_base); + } + // If left block's top boundary is above current block's top boundary, we + // need to find the common overlap. + if (left_mi[0]->mi_row_start < left_mi_row) { + const int extra_cols = left_mi_row - left_mi[0]->mi_row_start; + mi_step -= extra_cols; + assert(mi_step > 0); + } +#else if (mi_step == 1) { left_mi_row &= ~1; left_mi = prev_col_mi + (left_mi_row + 1) * xd->mi_stride; mi_step = 2; } +#endif // CONFIG_UNEVEN_4WAY + mi_step = AOMMIN(mi_step, mi_size_high[BLOCK_64X64]); + int overlapped_mi_height = AOMMIN(xd->height, mi_step); +#if CONFIG_UNEVEN_4WAY + if (!IS_POWER_OF_TWO(overlapped_mi_height)) { + assert(!IS_POWER_OF_TWO(mi_step)); + const int mi_step_pow2 = 1 << get_msb(mi_step); + left_mi_row += (mi_step - mi_step_pow2); + mi_step = mi_step_pow2; + overlapped_mi_height = AOMMIN(xd->height, mi_step); + } +#endif // CONFIG_UNEVEN_4WAY + assert(IS_POWER_OF_TWO(overlapped_mi_height)); if (is_neighbor_overlappable(*left_mi, xd->tree_type)) { ++nb_count; - fun(xd, left_mi_row - mi_row, 0, AOMMIN(xd->height, mi_step), 1, *left_mi, + assert(left_mi_row >= mi_row); + fun(xd, left_mi_row - mi_row, 0, overlapped_mi_height, 1, *left_mi, fun_ctxt, num_planes); } }
diff --git a/av1/common/pef.c b/av1/common/pef.c index b84222c..c5587dd 100644 --- a/av1/common/pef.c +++ b/av1/common/pef.c
@@ -169,6 +169,9 @@ // setup PEF input structure void setup_pef_input(MACROBLOCKD *xd, int pef_mode, int plane, uint16_t *dst, int dst_stride, int bw, int bh, int_mv *mv_refined, +#if CONFIG_REFINEMV + REFINEMV_SUBMB_INFO *refinemv_subinfo, +#endif // CONFIG_REFINEMV PefFuncInput *pef_input) { pef_input->pef_mode = pef_mode; pef_input->plane = plane; @@ -180,6 +183,9 @@ pef_input->dst = dst; pef_input->dst_stride = dst_stride; pef_input->mv_refined = mv_refined; +#if CONFIG_REFINEMV + pef_input->refinemv_subinfo = refinemv_subinfo; +#endif // CONFIG_REFINEMV } #if CONFIG_OPTFLOW_REFINEMENT @@ -191,8 +197,17 @@ // check if the neighboring mvs are the same void check_mv(bool *diff_mv, int pef_mode, int mv_rows, int mv_cols, int mvs_stride, const TPL_MV_REF *tpl_mvs, int tip_step, - int n_blocks, int_mv *mv_refined, int opfl_step) { + int n_blocks, int_mv *mv_refined, int opfl_step +#if CONFIG_REFINEMV + , + REFINEMV_SUBMB_INFO *refinemv_subinfo, int refinemv_step +#endif // CONFIG_REFINEMV +) { +#if CONFIG_REFINEMV + if (pef_mode < 0 || pef_mode > 3) return; +#else if (pef_mode < 0 || pef_mode > 2) return; +#endif // CONFIG_REFINEMV if (pef_mode == 0) { // opfl mv const int_mv *cur_mv_refined_ref0 = &mv_refined[n_blocks * 2 + 0]; const int_mv *cur_mv_refined_ref1 = &mv_refined[n_blocks * 2 + 1]; @@ -200,6 +215,17 @@ cur_mv_refined_ref0[0].as_int != cur_mv_refined_ref0[-opfl_step].as_int; *diff_mv |= cur_mv_refined_ref1[0].as_int != cur_mv_refined_ref1[-opfl_step].as_int; +#if CONFIG_REFINEMV + } else if (pef_mode == 3) { // refinemv mv + const int_mv *cur_mv_refined_ref0 = &refinemv_subinfo->refinemv[0]; + const int_mv *cur_mv_refined_ref1 = &refinemv_subinfo->refinemv[1]; + const int_mv *prev_mv_refined_ref0 = + &refinemv_subinfo[-refinemv_step].refinemv[0]; + const int_mv *prev_mv_refined_ref1 = + &refinemv_subinfo[-refinemv_step].refinemv[1]; + *diff_mv = cur_mv_refined_ref0[0].as_int != prev_mv_refined_ref0[0].as_int; + *diff_mv |= cur_mv_refined_ref1[0].as_int != prev_mv_refined_ref1[0].as_int; +#endif // CONFIG_REFINEMV } else { // tip mv const TPL_MV_REF *cur_tpl_mv = tpl_mvs + mv_rows * mvs_stride + mv_cols; const TPL_MV_REF *prev_tpl_mv = cur_tpl_mv - tip_step; @@ -344,7 +370,16 @@ AOMMIN(prev_x_step, x_step) >= filt_len) { bool diff_mv = 0; check_mv(&diff_mv, pef_mode, mv_rows, mv_cols, mvs_stride, tpl_mvs, 1, - n_blocks, pef_input->mv_refined, 2); + n_blocks, pef_input->mv_refined, 2 +#if CONFIG_REFINEMV + , + (pef_mode == 3) ? (pef_input->refinemv_subinfo + + (j >> MI_SIZE_LOG2) * MAX_MIB_SIZE + + (i >> MI_SIZE_LOG2)) + : NULL, + 1 +#endif // CONFIG_REFINEMV + ); if (diff_mv) { filt_func filt_vert_func = (y_step == PEF_MCU_SZ && x_step == PEF_MCU_SZ) @@ -359,7 +394,16 @@ AOMMIN(prev_y_step, y_step) >= filt_len) { bool diff_mv = 0; check_mv(&diff_mv, pef_mode, mv_rows, mv_cols, mvs_stride, tpl_mvs, - mvs_stride, n_blocks, pef_input->mv_refined, wn); + mvs_stride, n_blocks, pef_input->mv_refined, wn +#if CONFIG_REFINEMV + , + (pef_mode == 3) ? (pef_input->refinemv_subinfo + + (j >> MI_SIZE_LOG2) * MAX_MIB_SIZE + + (i >> MI_SIZE_LOG2)) + : NULL, + MAX_MIB_SIZE +#endif // CONFIG_REFINEMV + ); if (diff_mv) { filt_func filt_horz_func = x_step == PEF_MCU_SZ ? highbd_filt_horz_pred @@ -425,7 +469,12 @@ const int dst_stride = dst_buf->stride; PefFuncInput pef_input; setup_pef_input(xd, 2, plane, dst, dst_stride, dst_buf->width, - dst_buf->height, NULL, &pef_input); + dst_buf->height, NULL, +#if CONFIG_REFINEMV + + NULL, +#endif // CONFIG_REFINEMV + &pef_input); enhance_sub_prediction_blocks(cm, xd, &pef_input); } } @@ -437,6 +486,10 @@ , int_mv *const mv_refined, int use_opfl #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + , + int use_refinemv, REFINEMV_SUBMB_INFO *refinemv_subinfo +#endif // CONFIG_REFINEMV ) { if (!cm->seq_params.enable_pef) return; if (!cm->features.allow_pef) return; @@ -445,7 +498,11 @@ const int use_tip = is_tip_ref_frame(mbmi->ref_frame[0]); if (use_tip) { PefFuncInput pef_input; - setup_pef_input(xd, 1, plane, dst, dst_stride, bw, bh, NULL, &pef_input); + setup_pef_input(xd, 1, plane, dst, dst_stride, bw, bh, NULL, +#if CONFIG_REFINEMV + NULL, +#endif // CONFIG_REFINEMV + &pef_input); enhance_sub_prediction_blocks(cm, xd, &pef_input); return; } @@ -455,10 +512,22 @@ if (use_opfl) { PefFuncInput pef_input; setup_pef_input(xd, 0, plane, dst, dst_stride, bw, bh, mv_refined, +#if CONFIG_REFINEMV + NULL, +#endif // CONFIG_REFINEMV &pef_input); enhance_sub_prediction_blocks(cm, xd, &pef_input); return; } #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + if (use_refinemv) { + PefFuncInput pef_input; + setup_pef_input(xd, 3, plane, dst, dst_stride, bw, bh, mv_refined, + refinemv_subinfo, &pef_input); + enhance_sub_prediction_blocks(cm, xd, &pef_input); + return; + } +#endif // CONFIG_REFINEMV return; }
diff --git a/av1/common/pef.h b/av1/common/pef.h index 1363c0f..1258970 100644 --- a/av1/common/pef.h +++ b/av1/common/pef.h
@@ -53,7 +53,8 @@ // Structure for PEF function input typedef struct { - // 0 for OPFL prediciton, 1 for TIP prediciton, 2 for TIP frame + // 0 for OPFL prediciton, 1 for TIP prediciton, 2 for TIP frame, 3 for + // refinemv prediction int pef_mode; int plane; int bw; @@ -64,6 +65,9 @@ uint16_t *dst; int dst_stride; int_mv *mv_refined; +#if CONFIG_REFINEMV + REFINEMV_SUBMB_INFO *refinemv_subinfo; +#endif // CONFIG_REFINEMV } PefFuncInput; typedef void (*filt_func)(uint16_t *s, int stride, int bd, uint16_t q_thresh, @@ -84,6 +88,10 @@ , int_mv *const mv_refined, int use_opfl #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + , + int use_refinemv, REFINEMV_SUBMB_INFO *refinemv_subinfo +#endif // CONFIG_REFINEMV ); #ifdef __cplusplus
diff --git a/av1/common/pred_common.c b/av1/common/pred_common.c index 39f5548..f90946b 100644 --- a/av1/common/pred_common.c +++ b/av1/common/pred_common.c
@@ -256,9 +256,9 @@ static void palette_add_to_cache(uint16_t *cache, int *n, uint16_t val) { // Do not add an already existing value -#if !CONFIG_INDEP_PALETTE_PARSING +#if !CONFIG_PALETTE_IMPROVEMENTS if (*n > 0 && val == cache[*n - 1]) return; -#endif //! CONFIG_INDEP_PALETTE_PARSING +#endif //! CONFIG_PALETTE_IMPROVEMENTS cache[(*n)++] = val; } @@ -286,7 +286,7 @@ while (above_n > 0 && left_n > 0) { uint16_t v_above = above_colors[above_idx]; uint16_t v_left = left_colors[left_idx]; -#if CONFIG_INDEP_PALETTE_PARSING +#if CONFIG_PALETTE_IMPROVEMENTS palette_add_to_cache(cache, &n, v_above); ++above_idx, --above_n; palette_add_to_cache(cache, &n, v_left); @@ -300,7 +300,7 @@ ++above_idx, --above_n; if (v_left == v_above) ++left_idx, --left_n; } -#endif // CONFIG_INDEP_PALETTE_PARSING +#endif // CONFIG_PALETTE_IMPROVEMENTS } while (above_n-- > 0) { uint16_t val = above_colors[above_idx++];
diff --git a/av1/common/pred_common.h b/av1/common/pred_common.h index dc4eebb..3917992 100644 --- a/av1/common/pred_common.h +++ b/av1/common/pred_common.h
@@ -283,11 +283,17 @@ const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]); const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]); int bck_frame_index = 0, fwd_frame_index = 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + int cur_frame_index = cm->cur_frame->display_order_hint; + + if (bck_buf != NULL) bck_frame_index = bck_buf->display_order_hint; + if (fwd_buf != NULL) fwd_frame_index = fwd_buf->display_order_hint; +#else int cur_frame_index = cm->cur_frame->order_hint; if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint; if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint; - +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int fwd = abs(get_relative_dist(&cm->seq_params.order_hint_info, fwd_frame_index, cur_frame_index)); int bck = abs(get_relative_dist(&cm->seq_params.order_hint_info, @@ -517,31 +523,34 @@ // The prediction flags in these dummy entries are initialized to 0. static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { const MB_MODE_INFO *mbmi = xd->mi[0]; + const MB_MODE_INFO *const above_mbmi = xd->above_mbmi; + const MB_MODE_INFO *const left_mbmi = xd->left_mbmi; const TX_SIZE max_tx_size = max_txsize_rect_lookup[mbmi->sb_type[PLANE_TYPE_Y]]; const int max_tx_wide = tx_size_wide[max_tx_size]; const int max_tx_high = tx_size_high[max_tx_size]; - const int default_ctx[MAX_NUM_NEIGHBORS] = { - xd->above_txfm_context[0] >= max_tx_wide, - xd->left_txfm_context[0] >= max_tx_high - }; + const int has_above = xd->up_available; + const int has_left = xd->left_available; - const int max_tx_threshold[MAX_NUM_NEIGHBORS] = { max_tx_wide, max_tx_high }; + int above = xd->above_txfm_context[0] >= max_tx_wide; + int left = xd->left_txfm_context[0] >= max_tx_high; - int ctx = 0; - for (int i = 0; i < MAX_NUM_NEIGHBORS; ++i) { - const MB_MODE_INFO *const neighbor = xd->neighbors[i]; - if (neighbor != NULL) { - if (is_inter_block(neighbor, xd->tree_type)) { - const int block_size = neighbor->sb_type[PLANE_TYPE_Y]; - ctx += (block_size_wide[block_size] >= max_tx_threshold[i]); - } else { - ctx += default_ctx[i]; - } - } - } + if (has_above) + if (is_inter_block(above_mbmi, xd->tree_type)) + above = block_size_wide[above_mbmi->sb_type[PLANE_TYPE_Y]] >= max_tx_wide; - return ctx; + if (has_left) + if (is_inter_block(left_mbmi, xd->tree_type)) + left = block_size_high[left_mbmi->sb_type[PLANE_TYPE_Y]] >= max_tx_high; + + if (has_above && has_left) + return (above + left); + else if (has_above) + return above; + else if (has_left) + return left; + else + return 0; } #ifdef __cplusplus
diff --git a/av1/common/reconinter.c b/av1/common/reconinter.c index 2825186..99759b2 100644 --- a/av1/common/reconinter.c +++ b/av1/common/reconinter.c
@@ -81,6 +81,12 @@ inter_pred_params->orig_block_width = block_width; inter_pred_params->orig_block_height = block_height; #endif // CONFIG_OPTFLOW_REFINEMENT + +#if CONFIG_REFINEMV + inter_pred_params->original_pu_width = block_width; + inter_pred_params->original_pu_height = block_height; +#endif // CONFIG_REFINEMV + inter_pred_params->pix_row = pix_row; inter_pred_params->pix_col = pix_col; inter_pred_params->subsampling_x = subsampling_x; @@ -92,6 +98,16 @@ inter_pred_params->mode = TRANSLATION_PRED; inter_pred_params->comp_mode = UNIFORM_SINGLE; +#if CONFIG_REFINEMV + inter_pred_params->use_ref_padding = 0; + inter_pred_params->ref_area = NULL; +#endif // CONFIG_REFINEMV + +#if CONFIG_D071_IMP_MSK_BLD + inter_pred_params->border_data.enable_bacp = 0; + inter_pred_params->border_data.bacp_block_data = NULL; +#endif // CONFIG_D071_IMP_MSK_BLD + if (is_intrabc) { inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params; inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params; @@ -119,6 +135,12 @@ if (is_tip_ref_frame(mi->ref_frame[ref])) return; #endif // CONFIG_TIP +#if CONFIG_REFINEMV + // We do not do refineMV for warp blocks + // We may need to return from here. + if (mi->refinemv_flag) return; +#endif // CONFIG_REFINEMV + if (xd->cur_frame_force_integer_mv) return; if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], @@ -179,6 +201,68 @@ }; #else /* clang-format off */ +#if WEDGE_BLD_SIG +// rounded cosine and sine look-up tables given by round(32*cos(i)) +static const int8_t wedge_cos_lut[WEDGE_ANGLES] = { + // 0, 1, 2, 4, 6, + 32, 31, 29, 23, 14, + // 8, 10, 12, 14, 15, + 0,-14,-23,-29,-31, + // 16, 17, 18, 20, 22, + -32,-31,-29,-23,-14, + // 24, 26, 28, 30, 31 + 0, 14, 23, 29, 31 +}; +static const int8_t wedge_sin_lut[WEDGE_ANGLES] = { + // 0, 1, 2, 4, 6, + 0, -8,-14,-23,-29, + // 8, 10, 12, 14, 15, + -32,-29,-23,-14, -8, + // 16, 17, 18, 20, 22, + 0, 8, 14, 23, 29, + // 24, 26, 28, 30, 31 + 32, 29, 23, 14, 8 +}; + +// rounded sigmoid function look-up talbe given by round(1/(1+exp(-x))) +static const int8_t pos_dist_2_bld_weight[WEDGE_BLD_LUT_SIZE]={ + 32, 32, 33, 33, 34, 34, 35, 35, + 36, 36, 37, 37, 38, 38, 39, 39, + 40, 40, 41, 41, 42, 42, 43, 43, + 43, 44, 44, 45, 45, 46, 46, 46, + 47, 47, 48, 48, 48, 49, 49, 49, + 50, 50, 50, 51, 51, 51, 52, 52, + 52, 53, 53, 53, 53, 54, 54, 54, + 55, 55, 55, 55, 55, 56, 56, 56, + 56, 57, 57, 57, 57, 57, 58, 58, + 58, 58, 58, 58, 59, 59, 59, 59, + 59, 59, 59, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, + 63, 63, 63, 63, 63, 63, 63, 64 +}; + +static const int8_t neg_dist_2_bld_weight[WEDGE_BLD_LUT_SIZE]={ + 32, 32, 31, 31, 30, 30, 29, 29, + 28, 28, 27, 27, 26, 26, 25, 25, + 24, 24, 23, 23, 22, 22, 21, 21, + 21, 20, 20, 19, 19, 18, 18, 18, + 17, 17, 16, 16, 16, 15, 15, 15, + 14, 14, 14, 13, 13, 13, 12, 12, + 12, 11, 11, 11, 11, 10, 10, 10, + 9, 9, 9, 9, 9, 8, 8, 8, + 8, 7, 7, 7, 7, 7, 6, 6, + 6, 6, 6, 6, 5, 5, 5, 5, + 5, 5, 5, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 0 +}; +#else static const int8_t wedge_cos_lut[WEDGE_ANGLES] = { // 0, 1, 2, 4, 6, 8, 8, 8, 4, 4, @@ -199,6 +283,7 @@ // 24, 26, 28, 30, 31 8, 8, 4, 4, 2 }; +#endif /* clang-format on */ #endif // !CONFIG_WEDGE_MOD_EXT @@ -273,6 +358,10 @@ smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL] [MAX_WEDGE_SQUARE]); +#if CONFIG_CWP +DECLARE_ALIGNED(16, static int8_t, cwp_mask[2][MAX_CWP_NUM][MAX_SB_SQUARE]); +#endif // CONFIG_CWP + static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2]; #if CONFIG_WEDGE_MOD_EXT @@ -403,6 +492,34 @@ }; #endif +#if CONFIG_CWP +// Init the cwp masks, called by init_cwp_masks +static AOM_INLINE void build_cwp_mask(int8_t *mask, int stride, + BLOCK_SIZE plane_bsize, int8_t w) { + const int bw = block_size_wide[plane_bsize]; + const int bh = block_size_high[plane_bsize]; + for (int i = 0; i < bh; ++i) { + for (int j = 0; j < bw; ++j) mask[j] = w; + mask += stride; + } +} +// Init the cwp masks +void init_cwp_masks() { + const int bs = BLOCK_128X128; + const int bw = block_size_wide[bs]; + for (int list_idx = 0; list_idx < 2; ++list_idx) { + for (int idx = 0; idx < MAX_CWP_NUM; ++idx) { + int8_t weight = cwp_weighting_factor[list_idx][idx] * 4; + build_cwp_mask(cwp_mask[list_idx][idx], bw, bs, weight); + } + } +} +// Return the associated cwp mask +const int8_t *av1_get_cwp_mask(int list_idx, int idx) { + return cwp_mask[list_idx][idx]; +} +#endif // CONFIG_CWP + static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg, BLOCK_SIZE sb_type) { const uint8_t *master; @@ -433,12 +550,17 @@ const uint8_t *av1_get_compound_type_mask( const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) { +#if !CONFIG_D071_IMP_MSK_BLD assert(is_masked_compound_type(comp_data->type)); +#endif // !CONFIG_D071_IMP_MSK_BLD (void)sb_type; switch (comp_data->type) { case COMPOUND_WEDGE: return av1_get_contiguous_soft_mask(comp_data->wedge_index, comp_data->wedge_sign, sb_type); +#if CONFIG_D071_IMP_MSK_BLD + case COMPOUND_AVERAGE: +#endif // CONFIG_D071_IMP_MSK_BLD case COMPOUND_DIFFWTD: return comp_data->seg_mask; default: assert(0); return NULL; } @@ -569,7 +691,14 @@ int y = ((n << 1) - h + 1) * wedge_sin_lut[angle]; for (int m = 0; m < w; m++, idx++) { int d = ((m << 1) - w + 1) * wedge_cos_lut[angle] + y; +#if WEDGE_BLD_SIG + const int clamp_d = clamp(d, -127, 127); + wedge_master_mask[0][angle][idx] = + clamp_d >= 0 ? pos_dist_2_bld_weight[clamp_d] + : neg_dist_2_bld_weight[-clamp_d]; +#else wedge_master_mask[0][angle][idx] = clamp((d + 32), 0, 64); +#endif wedge_master_mask[1][angle][idx] = 64 - wedge_master_mask[0][angle][idx]; } @@ -722,6 +851,14 @@ } } +#if CONFIG_REFINEMV +// Compute the SAD values for refineMV modes +int get_refinemv_sad(uint16_t *src1, uint16_t *src2, int width, int height, + int bd) { + return get_highbd_sad(src1, width, src2, width, bd, width, height); +} +#endif // CONFIG_REFINEMV + #if CONFIG_OPTFLOW_REFINEMENT // Restrict MV delta to 1 or 2 pixels. This restriction would reduce complexity // in hardware. @@ -745,7 +882,12 @@ const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf, InterPredParams *inter_pred_params, - CalcSubpelParamsFunc calc_subpel_params_func, int ref, uint16_t *pred_dst) { + CalcSubpelParamsFunc calc_subpel_params_func, int ref, uint16_t *pred_dst +#if CONFIG_REFINEMV + , + const MV *const src_mv, int pu_width, int pu_height +#endif // CONFIG_REFINEMV +) { assert(cm->seq_params.order_hint_info.enable_order_hint); const int is_intrabc = is_intrabc_block(mi, xd->tree_type); #if CONFIG_OPTFLOW_ON_TIP @@ -769,11 +911,18 @@ const struct scale_factors *const sf = is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref]; #endif // CONFIG_OPTFLOW_ON_TIP - const BLOCK_SIZE bsize = mi->sb_type[PLANE_TYPE_Y]; + const int ss_x = pd->subsampling_x; const int ss_y = pd->subsampling_y; +#if CONFIG_REFINEMV + const int row_start = (bw == 4) && ss_y ? -1 : 0; + const int col_start = (bh == 4) && ss_x ? -1 : 0; +#else + const BLOCK_SIZE bsize = mi->sb_type[PLANE_TYPE_Y]; const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0; const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0; +#endif // CONFIG_REFINEMV + const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x; const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y; @@ -788,6 +937,10 @@ av1_init_inter_params(inter_pred_params, bw, bh, pre_y, pre_x, pd->subsampling_x, pd->subsampling_y, xd->bd, mi->use_intrabc[0], sf, pre_buf, mi->interp_fltr); +#if CONFIG_REFINEMV + inter_pred_params->original_pu_width = pu_width; + inter_pred_params->original_pu_height = pu_height; +#endif // CONFIG_REFINEMV #if CONFIG_TIP const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2); @@ -805,7 +958,13 @@ if (inter_pred_params->mode == WARP_PRED) return; assert(mi->interinter_comp.type == COMPOUND_AVERAGE); - av1_build_one_inter_predictor(pred_dst, bw, &mi->mv[ref].as_mv, + + av1_build_one_inter_predictor(pred_dst, bw, +#if CONFIG_REFINEMV + src_mv, +#else + &mi->mv[ref].as_mv, +#endif // CONFIG_REFINEMV inter_pred_params, xd, mi_x, mi_y, ref, mc_buf, calc_subpel_params_func); } @@ -1222,6 +1381,10 @@ , int do_pred, int use_4x4 #endif // CONFIG_OPTFLOW_ON_TIP +#if CONFIG_REFINEMV + , + MV *best_mv_ref, int pu_width, int pu_height +#endif // CONFIG_REFINEMV ) { const int target_prec = MV_REFINE_PREC_BITS; const int n = opfl_get_subblock_size(bw, bh, plane @@ -1252,10 +1415,19 @@ get_ref_frame_buf(cm, mbmi->ref_frame[0]); const RefCntBuffer *const r1_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC d0 = get_relative_dist(&cm->seq_params.order_hint_info, - cm->cur_frame->order_hint, r0_buf->order_hint); + cm->cur_frame->display_order_hint, + r0_buf->display_order_hint); d1 = get_relative_dist(&cm->seq_params.order_hint_info, - cm->cur_frame->order_hint, r1_buf->order_hint); + cm->cur_frame->display_order_hint, + r1_buf->display_order_hint); +#else + d0 = get_relative_dist(&cm->seq_params.order_hint_info, + cm->cur_frame->order_hint, r0_buf->order_hint); + d1 = get_relative_dist(&cm->seq_params.order_hint_info, + cm->cur_frame->order_hint, r1_buf->order_hint); +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC #if CONFIG_OPTFLOW_ON_TIP } #endif // CONFIG_OPTFLOW_ON_TIP @@ -1268,10 +1440,20 @@ InterPredParams params0, params1; av1_opfl_build_inter_predictor(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y, mc_buf, ¶ms0, calc_subpel_params_func, 0, - dst0); + dst0 +#if CONFIG_REFINEMV + , + &best_mv_ref[0], pu_width, pu_height +#endif // CONFIG_REFINEMV + ); av1_opfl_build_inter_predictor(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y, mc_buf, ¶ms1, calc_subpel_params_func, 1, - dst1); + dst1 +#if CONFIG_REFINEMV + , + &best_mv_ref[1], pu_width, pu_height +#endif // CONFIG_REFINEMV + ); #if CONFIG_OPTFLOW_ON_TIP } #endif // CONFIG_OPTFLOW_ON_TIP @@ -1356,7 +1538,26 @@ return target_prec; } +#if CONFIG_D071_IMP_MSK_BLD +int is_out_of_frame_block(const InterPredParams *inter_pred_params, + int frame_width, int frame_height, int sub_block_id) { + for (int ref = 0; ref < 2; ref++) { + const BacpBlockData *const b_data = + &inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref]; + if (b_data->x0 < 0 || b_data->x0 > frame_width - 1 || b_data->x1 < 0 || + b_data->x1 > frame_width + || b_data->y0 < 0 || b_data->y0 > frame_height - 1 || b_data->y1 < 0 || + b_data->y1 > frame_height) { + return 1; + } + } + return 0; +} + +#endif // CONFIG_D071_IMP_MSK_BLD + +#if !CONFIG_D071_IMP_MSK_BLD // Makes the interpredictor for the region by dividing it up into nxn blocks // and running the interpredictor code on each one. void make_inter_pred_of_nxn(uint16_t *dst, int dst_stride, @@ -1383,6 +1584,7 @@ calc_subpel_params_func(&(mv_refined[n_blocks * 2 + ref].as_mv), inter_pred_params, xd, mi_x + i, mi_y + j, ref, 1, mc_buf, &pre, subpel_params, &src_stride); + av1_make_inter_predictor(pre, src_stride, dst, dst_stride, inter_pred_params, subpel_params); n_blocks++; @@ -1427,6 +1629,7 @@ &subpel_params); } #endif // CONFIG_OPTFLOW_REFINEMENT +#endif // !CONFIG_D071_IMP_MSK_BLD // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0 void av1_init_wedge_masks() { @@ -1440,8 +1643,19 @@ const CONV_BUF_TYPE *src1, int src1_stride, const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h, int w, InterPredParams *inter_pred_params) { +#if CONFIG_D071_IMP_MSK_BLD + const int ssy = (inter_pred_params->conv_params.plane && + comp_data->type == COMPOUND_AVERAGE) + ? 0 + : inter_pred_params->subsampling_y; + const int ssx = (inter_pred_params->conv_params.plane && + comp_data->type == COMPOUND_AVERAGE) + ? 0 + : inter_pred_params->subsampling_x; +#else const int ssy = inter_pred_params->subsampling_y; const int ssx = inter_pred_params->subsampling_x; +#endif // CONFIG_D071_IMP_MSK_BLD const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type); const int mask_stride = block_size_wide[sb_type]; aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1, @@ -1449,11 +1663,19 @@ &inter_pred_params->conv_params, inter_pred_params->bit_depth); } - -static void make_masked_inter_predictor(const uint16_t *pre, int pre_stride, - uint16_t *dst, int dst_stride, - InterPredParams *inter_pred_params, - const SubpelParams *subpel_params) { +#if !CONFIG_D071_IMP_MSK_BLD +static +#endif + void + make_masked_inter_predictor(const uint16_t *pre, int pre_stride, + uint16_t *dst, int dst_stride, + InterPredParams *inter_pred_params, + const SubpelParams *subpel_params +#if CONFIG_D071_IMP_MSK_BLD + , + int use_bacp, int sub_block_id +#endif // CONFIG_D071_IMP_MSK_BLD + ) { const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp; BLOCK_SIZE sb_type = inter_pred_params->sb_type; @@ -1482,12 +1704,183 @@ inter_pred_params->block_width, &inter_pred_params->conv_params, inter_pred_params->bit_depth); } + +#if CONFIG_D071_IMP_MSK_BLD + // Mask is generated from luma and reuse for chroma + const int generate_mask_for_this_plane = + (!inter_pred_params->conv_params.plane || + comp_data->type == COMPOUND_AVERAGE); + if (use_bacp && generate_mask_for_this_plane) { + uint8_t *mask = comp_data->seg_mask; + int mask_stride = block_size_wide[sb_type]; + BacpBlockData *b_data_0 = + &inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + 0]; + BacpBlockData *b_data_1 = + &inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + 1]; + + for (int i = 0; i < inter_pred_params->block_height; ++i) { + for (int j = 0; j < inter_pred_params->block_width; ++j) { + int x = b_data_0->x0 + j; + int y = b_data_0->y0 + i; + + int p0_available = + (x >= 0 && x < inter_pred_params->ref_frame_buf.width && y >= 0 && + y < inter_pred_params->ref_frame_buf.height); + + x = b_data_1->x0 + j; + y = b_data_1->y0 + i; + int p1_available = + (x >= 0 && x < inter_pred_params->ref_frame_buf.width && y >= 0 && + y < inter_pred_params->ref_frame_buf.height); + + if (p0_available && !p1_available) { + mask[j] = AOM_BLEND_A64_MAX_ALPHA - DEFAULT_IMP_MSK_WT; + } else if (!p0_available && p1_available) { + mask[j] = DEFAULT_IMP_MSK_WT; + } else if (comp_data->type == COMPOUND_AVERAGE) { + mask[j] = AOM_BLEND_A64_MAX_ALPHA >> 1; + } + } + mask += mask_stride; + } + } +#endif // CONFIG_D071_IMP_MSK_BLD + build_masked_compound_no_round( dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride, comp_data, sb_type, inter_pred_params->block_height, inter_pred_params->block_width, inter_pred_params); + +#if CONFIG_D071_IMP_MSK_BLD + // restore to previous state + inter_pred_params->conv_params.dst = org_dst; + inter_pred_params->conv_params.dst_stride = org_dst_stride; +#endif // CONFIG_D071_IMP_MSK_BLD } +#if CONFIG_D071_IMP_MSK_BLD && CONFIG_OPTFLOW_REFINEMENT +// Makes the interpredictor for the region by dividing it up into nxn blocks +// and running the interpredictor code on each one. +void make_inter_pred_of_nxn(uint16_t *dst, int dst_stride, + int_mv *const mv_refined, + InterPredParams *inter_pred_params, MACROBLOCKD *xd, + int mi_x, int mi_y, int ref, uint16_t **mc_buf, + CalcSubpelParamsFunc calc_subpel_params_func, int n, + SubpelParams *subpel_params) { + int n_blocks = 0; + int w = inter_pred_params->orig_block_width; + int h = inter_pred_params->orig_block_height; + assert(w % n == 0); + assert(h % n == 0); + CONV_BUF_TYPE *orig_conv_dst = inter_pred_params->conv_params.dst; + inter_pred_params->block_width = n; + inter_pred_params->block_height = n; + + uint16_t *pre; + int src_stride = 0; + + // Process whole nxn blocks. + for (int j = 0; j <= h - n; j += n) { + for (int i = 0; i <= w - n; i += n) { + calc_subpel_params_func(&(mv_refined[n_blocks * 2 + ref].as_mv), + inter_pred_params, xd, mi_x + i, mi_y + j, ref, 1, + mc_buf, &pre, subpel_params, &src_stride); + +#if CONFIG_D071_IMP_MSK_BLD + int use_bacp = 0; + assert(inter_pred_params->mask_comp.type == COMPOUND_AVERAGE); + assert(inter_pred_params->comp_mode == UNIFORM_COMP); + int stored_do_average = inter_pred_params->conv_params.do_average; + InterCompMode stored_comp_mode = inter_pred_params->comp_mode; + uint8_t *stored_seg_mask = inter_pred_params->mask_comp.seg_mask; + + if (inter_pred_params->border_data.enable_bacp) { + inter_pred_params->border_data.bacp_block_data[n_blocks * 2 + ref].x0 = + subpel_params->x0; + inter_pred_params->border_data.bacp_block_data[n_blocks * 2 + ref].x1 = + subpel_params->x1; + inter_pred_params->border_data.bacp_block_data[n_blocks * 2 + ref].y0 = + subpel_params->y0; + inter_pred_params->border_data.bacp_block_data[n_blocks * 2 + ref].y1 = + subpel_params->y1; + if (ref == 1) { + use_bacp = is_out_of_frame_block( + inter_pred_params, inter_pred_params->ref_frame_buf.width, + inter_pred_params->ref_frame_buf.height, n_blocks); + + if (use_bacp && + inter_pred_params->mask_comp.type == COMPOUND_AVERAGE) { + inter_pred_params->conv_params.do_average = 0; + inter_pred_params->comp_mode = MASK_COMP; + inter_pred_params->mask_comp.seg_mask = xd->seg_mask; + } + } + } + + assert(IMPLIES(ref == 0, !use_bacp)); + if (use_bacp) { + assert(inter_pred_params->comp_mode == MASK_COMP); + make_masked_inter_predictor(pre, src_stride, dst, dst_stride, + inter_pred_params, subpel_params, use_bacp, + n_blocks); + + } else { +#endif + + av1_make_inter_predictor(pre, src_stride, dst, dst_stride, + inter_pred_params, subpel_params); +#if CONFIG_D071_IMP_MSK_BLD + } + + // Restored to original inter_pred_params + if (use_bacp && inter_pred_params->mask_comp.type == COMPOUND_AVERAGE) { + inter_pred_params->conv_params.do_average = stored_do_average; + inter_pred_params->comp_mode = stored_comp_mode; + inter_pred_params->mask_comp.seg_mask = stored_seg_mask; + } +#endif // CONFIG_D071_IMP_MSK_BLD + n_blocks++; + dst += n; + inter_pred_params->conv_params.dst += n; + inter_pred_params->pix_col += n; + } + dst -= w; + inter_pred_params->conv_params.dst -= w; + inter_pred_params->pix_col -= w; + + dst += n * dst_stride; + inter_pred_params->conv_params.dst += + n * inter_pred_params->conv_params.dst_stride; + inter_pred_params->pix_row += n; + } + + inter_pred_params->conv_params.dst = orig_conv_dst; +} +// Use a second pass of motion compensation to rebuild inter predictor +void av1_opfl_rebuild_inter_predictor( + uint16_t *dst, int dst_stride, int plane, int_mv *const mv_refined, + InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y, + int ref, uint16_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func +#if CONFIG_OPTFLOW_ON_TIP + , + int use_4x4 +#endif // CONFIG_OPTFLOW_ON_TIP +) { + SubpelParams subpel_params; + int w = inter_pred_params->block_width; + int h = inter_pred_params->block_height; + int n = opfl_get_subblock_size(w, h, plane +#if CONFIG_OPTFLOW_ON_TIP + , + use_4x4 +#endif // CONFIG_OPTFLOW_ON_TIP + ); + make_inter_pred_of_nxn(dst, dst_stride, mv_refined, inter_pred_params, xd, + mi_x, mi_y, ref, mc_buf, calc_subpel_params_func, n, + &subpel_params); +} +#endif // CONFIG_D071_IMP_MSK_BLD && CONFIG_OPTFLOW_REFINEMENT + void av1_build_one_inter_predictor( uint16_t *dst, int dst_stride, const MV *const src_mv, InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y, @@ -1501,13 +1894,52 @@ #endif // CONFIG_OPTFLOW_REFINEMENT mc_buf, &src, &subpel_params, &src_stride); +#if CONFIG_D071_IMP_MSK_BLD + int use_bacp = 0; + int sub_block_id = 0; + if (inter_pred_params->border_data.enable_bacp) { + inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref].x0 = + subpel_params.x0; + inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref].x1 = + subpel_params.x1; + inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref].y0 = + subpel_params.y0; + inter_pred_params->border_data.bacp_block_data[2 * sub_block_id + ref].y1 = + subpel_params.y1; + if (ref == 1) { + use_bacp = is_out_of_frame_block( + inter_pred_params, inter_pred_params->ref_frame_buf.width, + inter_pred_params->ref_frame_buf.height, sub_block_id); + if (use_bacp && inter_pred_params->mask_comp.type == COMPOUND_AVERAGE) { + inter_pred_params->conv_params.do_average = 0; + inter_pred_params->comp_mode = MASK_COMP; + inter_pred_params->mask_comp.seg_mask = xd->seg_mask; + } + } + } + + assert(IMPLIES(ref == 0, !use_bacp)); +#endif // CONFIG_D071_IMP_MSK_BLD + if (inter_pred_params->comp_mode == UNIFORM_SINGLE || inter_pred_params->comp_mode == UNIFORM_COMP) { av1_make_inter_predictor(src, src_stride, dst, dst_stride, inter_pred_params, &subpel_params); +#if CONFIG_D071_IMP_MSK_BLD + assert(IMPLIES(use_bacp, ref == 0)); + assert(use_bacp == 0); +#endif // CONFIG_D071_IMP_MSK_BLD } else { make_masked_inter_predictor(src, src_stride, dst, dst_stride, - inter_pred_params, &subpel_params); + inter_pred_params, &subpel_params +#if CONFIG_D071_IMP_MSK_BLD + , + use_bacp, 0 +#endif // CONFIG_D071_IMP_MSK_BLD + ); +#if CONFIG_D071_IMP_MSK_BLD + assert(IMPLIES(inter_pred_params->border_data.enable_bacp, ref == 1)); +#endif // CONFIG_D071_IMP_MSK_BLD } } @@ -1595,7 +2027,12 @@ inter_pred_params, &subpel_params); } else { make_masked_inter_predictor(src, src_stride, dst, dst_stride, - inter_pred_params, &subpel_params); + inter_pred_params, &subpel_params +#if CONFIG_D071_IMP_MSK_BLD + , + 0, 0 +#endif // CONFIG_D071_IMP_MSK_BLD + ); } int shift = 8; @@ -1754,13 +2191,1088 @@ } } +#if CONFIG_REFINEMV +// Padding if the pixel position falls outside of the defined reference area +static void refinemv_highbd_pad_mc_border(const uint16_t *src, int src_stride, + uint16_t *dst, int dst_stride, int x0, + int y0, int b_w, int b_h, + const ReferenceArea *ref_area) { + // Get a pointer to the start of the real data for this row. + const uint16_t *ref_row = src - x0 - y0 * src_stride; + + if (y0 >= ref_area->pad_block.y1) + ref_row += (ref_area->pad_block.y1 - 1) * src_stride; + else if (y0 >= ref_area->pad_block.y0) + ref_row += y0 * src_stride; + else + ref_row += ref_area->pad_block.y0 * src_stride; + + do { + int right = 0, copy; + int left = x0 < ref_area->pad_block.x0 ? ref_area->pad_block.x0 - x0 : 0; + + if (left > b_w) left = b_w; + + if (x0 + b_w > ref_area->pad_block.x1) + right = x0 + b_w - ref_area->pad_block.x1; + + if (right > b_w) right = b_w; + + copy = b_w - left - right; + + if (left) aom_memset16(dst, ref_row[0], left); + + if (copy) memcpy(dst + left, ref_row + x0 + left, copy * sizeof(uint16_t)); + + if (right) + aom_memset16(dst + left + copy, ref_row[ref_area->pad_block.x1 - 1], + right); + + dst += dst_stride; + ++y0; + + if (y0 > ref_area->pad_block.y0 && y0 < ref_area->pad_block.y1) + ref_row += src_stride; + } while (--b_h); +} +// check if padding is required during motion compensation +// return 1 means reference pixel is outside of the reference range and padding +// is required return 0 means no padding. +int update_extend_mc_border_params(const struct scale_factors *const sf, + struct buf_2d *const pre_buf, MV32 scaled_mv, + PadBlock *block, int subpel_x_mv, + int subpel_y_mv, int do_warp, int is_intrabc, + int *x_pad, int *y_pad, + const ReferenceArea *ref_area) { + // Get reference width and height. + int frame_width = pre_buf->width; + int frame_height = pre_buf->height; + + // Do border extension if there is motion or + // width/height is not a multiple of 8 pixels. +#if CONFIG_OPTFLOW_REFINEMENT || CONFIG_TIP + // Extension is needed in optical flow refinement to obtain MV offsets + (void)scaled_mv; + if (!is_intrabc && !do_warp) { +#else + const int is_scaled = av1_is_scaled(sf); + if ((!is_intrabc) && (!do_warp) && + (is_scaled || scaled_mv.col || scaled_mv.row || (frame_width & 0x7) || + (frame_height & 0x7))) { +#endif // CONFIG_OPTFLOW_REFINEMENT || CONFIG_TIP + if (subpel_x_mv || (sf->x_step_q4 != SUBPEL_SHIFTS)) { + block->x0 -= AOM_INTERP_EXTEND - 1; + block->x1 += AOM_INTERP_EXTEND; + *x_pad = 1; + } + + if (subpel_y_mv || (sf->y_step_q4 != SUBPEL_SHIFTS)) { + block->y0 -= AOM_INTERP_EXTEND - 1; + block->y1 += AOM_INTERP_EXTEND; + *y_pad = 1; + } + + // Skip border extension if block is inside the frame. + if (block->x0 < 0 || block->x1 > frame_width - 1 || block->y0 < 0 || + block->y1 > frame_height - 1) { + return 1; + } + + if (ref_area) { + // Skip border extension if block is in the reference area. + if (block->x0 < ref_area->pad_block.x0 || + block->x1 > ref_area->pad_block.x1 || + block->y0 < ref_area->pad_block.y0 || + block->y1 > ref_area->pad_block.y1) { + return 1; + } + } + } + return 0; +}; + +// perform padding of the motion compensated block if requires. +// Padding is performed if the motion compensated block is partially out of the +// reference area. +static void refinemv_extend_mc_border( + const struct scale_factors *const sf, struct buf_2d *const pre_buf, + MV32 scaled_mv, PadBlock block, int subpel_x_mv, int subpel_y_mv, + int do_warp, int is_intrabc, uint16_t *paded_ref_buf, + int paded_ref_buf_stride, uint16_t **pre, int *src_stride, + const ReferenceArea *ref_area) { + int x_pad = 0, y_pad = 0; + if (update_extend_mc_border_params(sf, pre_buf, scaled_mv, &block, + subpel_x_mv, subpel_y_mv, do_warp, + is_intrabc, &x_pad, &y_pad, ref_area)) { + // printf(" Out of border \n"); + // Get reference block pointer. + const uint16_t *const buf_ptr = + pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0; + int buf_stride = pre_buf->stride; + const int b_w = block.x1 - block.x0; + const int b_h = block.y1 - block.y0; + + refinemv_highbd_pad_mc_border(buf_ptr, buf_stride, paded_ref_buf, + paded_ref_buf_stride, block.x0, block.y0, b_w, + b_h, ref_area); + *src_stride = paded_ref_buf_stride; + *pre = paded_ref_buf + + y_pad * (AOM_INTERP_EXTEND - 1) * paded_ref_buf_stride + + x_pad * (AOM_INTERP_EXTEND - 1); + } +} + +#if CONFIG_TIP +// Derive the sub-pixel related parameters of TIP blocks +// Sub-pel related parameters are stored in the structures pointed by +// "subpel_params" and "block" +void tip_dec_calc_subpel_params(const MV *const src_mv, + InterPredParams *const inter_pred_params, + int mi_x, int mi_y, uint16_t **pre, + SubpelParams *subpel_params, int *src_stride, + PadBlock *block, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + MV32 *scaled_mv, int *subpel_x_mv, + int *subpel_y_mv) { + const struct scale_factors *sf = inter_pred_params->scale_factors; + struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf; + +#if CONFIG_REFINEMV + const int bw = inter_pred_params->original_pu_width; + const int bh = inter_pred_params->original_pu_height; +#else +#if CONFIG_OPTFLOW_REFINEMENT + // Use original block size to clamp MV and to extend block boundary + const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width + : inter_pred_params->block_width; + const int bh = use_optflow_refinement ? inter_pred_params->orig_block_height + : inter_pred_params->block_height; +#else + const int bw = inter_pred_params->block_width; + const int bh = inter_pred_params->block_height; +#endif // CONFIG_OPTFLOW_REFINEMENT +#endif // CONFIG_REFINEMV + + const int is_scaled = av1_is_scaled(sf); + if (is_scaled) { + const int ssx = inter_pred_params->subsampling_x; + const int ssy = inter_pred_params->subsampling_y; + int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS; + int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS; +#if CONFIG_OPTFLOW_REFINEMENT + if (use_optflow_refinement) { + orig_pos_y += ROUND_POWER_OF_TWO_SIGNED(src_mv->row * (1 << SUBPEL_BITS), + MV_REFINE_PREC_BITS + ssy); + orig_pos_x += ROUND_POWER_OF_TWO_SIGNED(src_mv->col * (1 << SUBPEL_BITS), + MV_REFINE_PREC_BITS + ssx); + } else { + orig_pos_y += src_mv->row * (1 << (1 - ssy)); + orig_pos_x += src_mv->col * (1 << (1 - ssx)); + } +#else + orig_pos_y += src_mv->row * (1 << (1 - ssy)); + orig_pos_x += src_mv->col * (1 << (1 - ssx)); +#endif // CONFIG_OPTFLOW_REFINEMENT + int pos_y = sf->scale_value_y(orig_pos_y, sf); + int pos_x = sf->scale_value_x(orig_pos_x, sf); + pos_x += SCALE_EXTRA_OFF; + pos_y += SCALE_EXTRA_OFF; + + const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy); + const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx); + const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) + << SCALE_SUBPEL_BITS; + const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS; + pos_y = clamp(pos_y, top, bottom); + pos_x = clamp(pos_x, left, right); + + subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK; + subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK; + subpel_params->xs = sf->x_step_q4; + subpel_params->ys = sf->y_step_q4; + + // Get reference block top left coordinate. + block->x0 = pos_x >> SCALE_SUBPEL_BITS; + block->y0 = pos_y >> SCALE_SUBPEL_BITS; + +#if CONFIG_D071_IMP_MSK_BLD + block->x1 = + ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >> + SCALE_SUBPEL_BITS) + + 1; + block->y1 = + ((pos_y + (inter_pred_params->block_height - 1) * subpel_params->ys) >> + SCALE_SUBPEL_BITS) + + 1; +#else + // Get reference block bottom right coordinate. + block->x1 = + ((pos_x + (bw - 1) * subpel_params->xs) >> SCALE_SUBPEL_BITS) + 1; + block->y1 = + ((pos_y + (bh - 1) * subpel_params->ys) >> SCALE_SUBPEL_BITS) + 1; +#endif // CONFIG_D071_IMP_MSK_BLD + + MV temp_mv; + temp_mv = tip_clamp_mv_to_umv_border_sb(inter_pred_params, src_mv, bw, bh, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + inter_pred_params->subsampling_x, + inter_pred_params->subsampling_y); + *scaled_mv = av1_scale_mv(&temp_mv, mi_x, mi_y, sf); + scaled_mv->row += SCALE_EXTRA_OFF; + scaled_mv->col += SCALE_EXTRA_OFF; + + *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK; + *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK; + } else { + // Get block position in current frame. + int pos_x = inter_pred_params->pix_col << SUBPEL_BITS; + int pos_y = inter_pred_params->pix_row << SUBPEL_BITS; + + const MV mv_q4 = tip_clamp_mv_to_umv_border_sb( + inter_pred_params, src_mv, bw, bh, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + inter_pred_params->subsampling_x, inter_pred_params->subsampling_y); + subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS; + subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; + subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; + + // Get reference block top left coordinate. + pos_x += mv_q4.col; + pos_y += mv_q4.row; + pos_x = (pos_x >> SUBPEL_BITS); + pos_y = (pos_y >> SUBPEL_BITS); + block->x0 = pos_x; + block->y0 = pos_y; + + // Get reference block bottom right coordinate. +#if CONFIG_D071_IMP_MSK_BLD + block->x1 = pos_x + inter_pred_params->block_width; + block->y1 = pos_y + inter_pred_params->block_height; +#else + block->x1 = pos_x + bw; + block->y1 = pos_y + bh; +#endif // CONFIG_D071_IMP_MSK_BLD + + scaled_mv->row = mv_q4.row; + scaled_mv->col = mv_q4.col; + *subpel_x_mv = scaled_mv->col & SUBPEL_MASK; + *subpel_y_mv = scaled_mv->row & SUBPEL_MASK; + } + *pre = pre_buf->buf0 + block->y0 * pre_buf->stride + block->x0; + *src_stride = pre_buf->stride; +#if CONFIG_D071_IMP_MSK_BLD + if (inter_pred_params->border_data.enable_bacp) { + subpel_params->x0 = block->x0; + subpel_params->x1 = block->x1; + subpel_params->y0 = block->y0; + subpel_params->y1 = block->y1; + } +#endif // CONFIG_D071_IMP_MSK_BLD +} + +void tip_common_calc_subpel_params_and_extend( + const MV *const src_mv, InterPredParams *const inter_pred_params, + MACROBLOCKD *const xd, int mi_x, int mi_y, int ref, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params, + int *src_stride) { + (void)ref; + (void)mc_buf; + (void)xd; + + PadBlock block; + MV32 scaled_mv; + int subpel_x_mv, subpel_y_mv; + assert(inter_pred_params->use_ref_padding); + + tip_dec_calc_subpel_params(src_mv, inter_pred_params, mi_x, mi_y, pre, + subpel_params, src_stride, &block, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + &scaled_mv, &subpel_x_mv, &subpel_y_mv); + + const int paded_ref_buf_stride = + inter_pred_params->ref_area->paded_ref_buf_stride; + refinemv_extend_mc_border( + inter_pred_params->scale_factors, &inter_pred_params->ref_frame_buf, + scaled_mv, block, subpel_x_mv, subpel_y_mv, + inter_pred_params->mode == WARP_PRED, inter_pred_params->is_intrabc, + &inter_pred_params->ref_area->paded_ref_buf[0], paded_ref_buf_stride, pre, + src_stride, inter_pred_params->ref_area); +} +#endif + +void dec_calc_subpel_params(const MV *const src_mv, + InterPredParams *const inter_pred_params, + const MACROBLOCKD *const xd, int mi_x, int mi_y, + uint16_t **pre, SubpelParams *subpel_params, + int *src_stride, PadBlock *block, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + MV32 *scaled_mv, int *subpel_x_mv, + int *subpel_y_mv) { + const struct scale_factors *sf = inter_pred_params->scale_factors; + struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf; + +#if CONFIG_REFINEMV + const int bw = inter_pred_params->original_pu_width; + const int bh = inter_pred_params->original_pu_height; +#else + +#if CONFIG_OPTFLOW_REFINEMENT + // Use original block size to clamp MV and to extend block boundary + const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width + : inter_pred_params->block_width; + const int bh = use_optflow_refinement ? inter_pred_params->orig_block_height + : inter_pred_params->block_height; +#else + const int bw = inter_pred_params->block_width; + const int bh = inter_pred_params->block_height; +#endif // CONFIG_OPTFLOW_REFINEMENT +#endif // CONFIG_REFINEMV + + const int is_scaled = av1_is_scaled(sf); + if (is_scaled) { + int ssx = inter_pred_params->subsampling_x; + int ssy = inter_pred_params->subsampling_y; + int orig_pos_y = inter_pred_params->pix_row << SUBPEL_BITS; + int orig_pos_x = inter_pred_params->pix_col << SUBPEL_BITS; +#if CONFIG_OPTFLOW_REFINEMENT + if (use_optflow_refinement) { + orig_pos_y += ROUND_POWER_OF_TWO_SIGNED(src_mv->row * (1 << SUBPEL_BITS), + MV_REFINE_PREC_BITS + ssy); + orig_pos_x += ROUND_POWER_OF_TWO_SIGNED(src_mv->col * (1 << SUBPEL_BITS), + MV_REFINE_PREC_BITS + ssx); + } else { + orig_pos_y += src_mv->row * (1 << (1 - ssy)); + orig_pos_x += src_mv->col * (1 << (1 - ssx)); + } +#else + orig_pos_y += src_mv->row * (1 << (1 - ssy)); + orig_pos_x += src_mv->col * (1 << (1 - ssx)); +#endif // CONFIG_OPTFLOW_REFINEMENT + int pos_y = sf->scale_value_y(orig_pos_y, sf); + int pos_x = sf->scale_value_x(orig_pos_x, sf); + pos_x += SCALE_EXTRA_OFF; + pos_y += SCALE_EXTRA_OFF; + + const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy); + const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx); + const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) + << SCALE_SUBPEL_BITS; + const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS; + pos_y = clamp(pos_y, top, bottom); + pos_x = clamp(pos_x, left, right); + + subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK; + subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK; + subpel_params->xs = sf->x_step_q4; + subpel_params->ys = sf->y_step_q4; + + // Get reference block top left coordinate. + block->x0 = pos_x >> SCALE_SUBPEL_BITS; + block->y0 = pos_y >> SCALE_SUBPEL_BITS; + + // Get reference block bottom right coordinate. + block->x1 = + ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >> + SCALE_SUBPEL_BITS) + + 1; + block->y1 = + ((pos_y + (inter_pred_params->block_height - 1) * subpel_params->ys) >> + SCALE_SUBPEL_BITS) + + 1; + + MV temp_mv; + temp_mv = clamp_mv_to_umv_border_sb(xd, src_mv, bw, bh, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + inter_pred_params->subsampling_x, + inter_pred_params->subsampling_y); + *scaled_mv = av1_scale_mv(&temp_mv, mi_x, mi_y, sf); + scaled_mv->row += SCALE_EXTRA_OFF; + scaled_mv->col += SCALE_EXTRA_OFF; + + *subpel_x_mv = scaled_mv->col & SCALE_SUBPEL_MASK; + *subpel_y_mv = scaled_mv->row & SCALE_SUBPEL_MASK; + } else { + // Get block position in current frame. + int pos_x = inter_pred_params->pix_col << SUBPEL_BITS; + int pos_y = inter_pred_params->pix_row << SUBPEL_BITS; + + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, src_mv, bw, bh, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + inter_pred_params->subsampling_x, inter_pred_params->subsampling_y); + subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS; + subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; + subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; + + // Get reference block top left coordinate. + pos_x += mv_q4.col; + pos_y += mv_q4.row; + block->x0 = pos_x >> SUBPEL_BITS; + block->y0 = pos_y >> SUBPEL_BITS; + + // Get reference block bottom right coordinate. + block->x1 = + (pos_x >> SUBPEL_BITS) + (inter_pred_params->block_width - 1) + 1; + block->y1 = + (pos_y >> SUBPEL_BITS) + (inter_pred_params->block_height - 1) + 1; + + scaled_mv->row = mv_q4.row; + scaled_mv->col = mv_q4.col; + *subpel_x_mv = scaled_mv->col & SUBPEL_MASK; + *subpel_y_mv = scaled_mv->row & SUBPEL_MASK; + } + *pre = pre_buf->buf0 + block->y0 * pre_buf->stride + block->x0; + *src_stride = pre_buf->stride; + +#if CONFIG_D071_IMP_MSK_BLD + if (inter_pred_params->border_data.enable_bacp) { + subpel_params->x0 = block->x0; + subpel_params->x1 = block->x1; + subpel_params->y0 = block->y0; + subpel_params->y1 = block->y1; + } +#endif // CONFIG_D071_IMP_MSK_BLD +} + +void common_calc_subpel_params_and_extend( + const MV *const src_mv, InterPredParams *const inter_pred_params, + MACROBLOCKD *const xd, int mi_x, int mi_y, int ref, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params, + int *src_stride) { + (void)ref; + (void)mc_buf; + + PadBlock block; + MV32 scaled_mv; + int subpel_x_mv, subpel_y_mv; + assert(inter_pred_params->use_ref_padding); + dec_calc_subpel_params(src_mv, inter_pred_params, xd, mi_x, mi_y, pre, + subpel_params, src_stride, &block, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + &scaled_mv, &subpel_x_mv, &subpel_y_mv); + + // printf(" Use ref padding \n"); + const int paded_ref_buf_stride = + inter_pred_params->ref_area->paded_ref_buf_stride; + refinemv_extend_mc_border( + inter_pred_params->scale_factors, &inter_pred_params->ref_frame_buf, + scaled_mv, block, subpel_x_mv, subpel_y_mv, + inter_pred_params->mode == WARP_PRED, inter_pred_params->is_intrabc, + &inter_pred_params->ref_area->paded_ref_buf[0], paded_ref_buf_stride, pre, + src_stride, inter_pred_params->ref_area); +} + +static void get_ref_area_info(const MV *const src_mv, + InterPredParams *const inter_pred_params, + MACROBLOCKD *const xd, int mi_x, int mi_y, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + uint16_t **pre, SubpelParams *subpel_params, + int *src_stride, ReferenceArea *ref_area, + int is_tip) { + PadBlock block; + MV32 scaled_mv; + int subpel_x_mv, subpel_y_mv; + + if (is_tip) { + tip_dec_calc_subpel_params(src_mv, inter_pred_params, mi_x, mi_y, pre, + subpel_params, src_stride, &block, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + &scaled_mv, &subpel_x_mv, &subpel_y_mv); + + } else { + dec_calc_subpel_params(src_mv, inter_pred_params, xd, mi_x, mi_y, pre, + subpel_params, src_stride, &block, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + &scaled_mv, &subpel_x_mv, &subpel_y_mv); + } + + struct buf_2d *const pre_buf = &inter_pred_params->ref_frame_buf; + int frame_height = pre_buf->height; + int frame_width = pre_buf->width; + block.x0 -= REF_LEFT_BORDER; + block.x1 += REF_RIGHT_BORDER; + block.y0 -= REF_TOP_BORDER; + block.y1 += REF_BOTTOM_BORDER; + + ref_area->pad_block.x0 = CLIP(block.x0, 0, frame_width - 1); + ref_area->pad_block.y0 = CLIP(block.y0, 0, frame_height - 1); + ref_area->pad_block.x1 = CLIP(block.x1, 0, frame_width); + ref_area->pad_block.y1 = CLIP(block.y1, 0, frame_height); +} + +void av1_get_reference_area_with_padding(const AV1_COMMON *cm, MACROBLOCKD *xd, + int plane, MB_MODE_INFO *mi, int bw, + int bh, int mi_x, int mi_y, + ReferenceArea ref_area[2], + const int comp_pixel_x, + const int comp_pixel_y) { + const int is_tip = mi->ref_frame[0] == TIP_FRAME; + assert(IMPLIES(!is_tip, has_second_ref(mi))); + assert(!is_intrabc_block(mi, xd->tree_type)); + struct macroblockd_plane *const pd = &xd->plane[plane]; + + int row_start = 0; + int col_start = 0; + const int mi_row = -xd->mb_to_top_edge >> MI_SUBPEL_SIZE_LOG2; + const int mi_col = -xd->mb_to_left_edge >> MI_SUBPEL_SIZE_LOG2; + row_start = plane ? (mi->chroma_ref_info.mi_row_chroma_base - mi_row) : 0; + col_start = plane ? (mi->chroma_ref_info.mi_col_chroma_base - mi_col) : 0; + + const int pre_x = is_tip + ? comp_pixel_x + : ((mi_x + MI_SIZE * col_start) >> pd->subsampling_x); + const int pre_y = is_tip + ? comp_pixel_y + : ((mi_y + MI_SIZE * row_start) >> pd->subsampling_y); + + for (int ref = 0; ref < 2; ++ref) { + const struct scale_factors *const sf = + is_tip ? cm->tip_ref.ref_scale_factor[ref] + : xd->block_ref_scale_factors[ref]; + const struct buf_2d *const pre_buf = + is_tip ? &cm->tip_ref.tip_plane[plane].pred[ref] : &pd->pre[ref]; + + // initialize the reference buffer + ref_area[ref].pad_block.x0 = 0; + ref_area[ref].pad_block.y0 = 0; + ref_area[ref].pad_block.x1 = cm->width; + ref_area[ref].pad_block.y1 = cm->height; + ref_area[ref].paded_ref_buf_stride = REF_BUFFER_WIDTH; + + InterPredParams inter_pred_params; + av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x, + pd->subsampling_x, pd->subsampling_y, xd->bd, + mi->use_intrabc[0], sf, pre_buf, + is_tip ? MULTITAP_SHARP : mi->interp_fltr); + + inter_pred_params.original_pu_width = bw; + inter_pred_params.original_pu_height = bh; + +#if CONFIG_TIP + const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2); + const int height = (cm->mi_params.mi_rows << MI_SIZE_LOG2); + inter_pred_params.dist_to_top_edge = -GET_MV_SUBPEL(pre_y); + inter_pred_params.dist_to_bottom_edge = GET_MV_SUBPEL(height - bh - pre_y); + inter_pred_params.dist_to_left_edge = -GET_MV_SUBPEL(pre_x); + inter_pred_params.dist_to_right_edge = GET_MV_SUBPEL(width - bw - pre_x); +#endif + + SubpelParams subpel_params; + uint16_t *src; + int src_stride; + + assert(!inter_pred_params.use_ref_padding); + + MV *src_mv = ref == 0 ? &mi->mv[0].as_mv : &mi->mv[1].as_mv; + get_ref_area_info(src_mv, &inter_pred_params, xd, mi_x, mi_y, +#if CONFIG_OPTFLOW_REFINEMENT + 0, /* use_optflow_refinement */ +#endif // CONFIG_OPTFLOW_REFINEMENT + &src, &subpel_params, &src_stride, &ref_area[ref], + is_tip); + } +} + +int av1_refinemv_build_predictors_and_get_sad( + MACROBLOCKD *xd, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf, + CalcSubpelParamsFunc calc_subpel_params_func, uint16_t *dst_ref0, + uint16_t *dst_ref1, MV mv0, MV mv1, InterPredParams *inter_pred_params) { + for (int ref = 0; ref < 2; ref++) { + SubpelParams subpel_params; + uint16_t *src; + int src_stride; + uint16_t *dst_ref = ref == 0 ? dst_ref0 : dst_ref1; + MV *src_mv = ref == 0 ? &mv0 : &mv1; + calc_subpel_params_func(src_mv, &inter_pred_params[ref], xd, mi_x, mi_y, + ref, +#if CONFIG_OPTFLOW_REFINEMENT + 0, /* use_optflow_refinement */ +#endif // CONFIG_OPTFLOW_REFINEMENT + mc_buf, &src, &subpel_params, &src_stride); + assert(inter_pred_params[ref].comp_mode == UNIFORM_SINGLE || + inter_pred_params[ref].comp_mode == UNIFORM_COMP); + av1_make_inter_predictor(src, src_stride, dst_ref, bw, + &inter_pred_params[ref], &subpel_params); + } + + return get_refinemv_sad(dst_ref0, dst_ref1, bw, bh, xd->bd); +} +void apply_mv_refinement(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, + MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y, + uint16_t **mc_buf, + CalcSubpelParamsFunc calc_subpel_params_func, + int pre_x, int pre_y, uint16_t *dst_ref0, + uint16_t *dst_ref1, MV *best_mv_ref, int pu_width, + int pu_height) { + // initialize basemv as best MV + best_mv_ref[0] = mi->mv[0].as_mv; + best_mv_ref[1] = mi->mv[1].as_mv; + + const MV center_mvs[2] = { best_mv_ref[0], best_mv_ref[1] }; + assert(mi->refinemv_flag < REFINEMV_NUM_MODES); + assert(cm->seq_params.enable_refinemv); + + // Generate MV independent inter_pred_params for both references + InterPredParams inter_pred_params[2]; + for (int ref = 0; ref < 2; ref++) { + const int is_compound = 0; + const int is_intrabc = is_intrabc_block(mi, xd->tree_type); + const int is_tip = mi->ref_frame[0] == TIP_FRAME; + + assert(is_intrabc == 0); + assert(plane == 0); + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; + + const struct scale_factors *const sf = + is_tip ? cm->tip_ref.ref_scale_factor[ref] + : (is_intrabc ? &cm->sf_identity + : xd->block_ref_scale_factors[ref]); + const struct buf_2d *const pre_buf = + is_tip ? &cm->tip_ref.tip_plane[plane].pred[ref] + : (is_intrabc ? dst_buf : &pd->pre[ref]); + + av1_init_inter_params(&inter_pred_params[ref], bw, bh, pre_y, pre_x, + pd->subsampling_x, pd->subsampling_y, xd->bd, + mi->use_intrabc[0], sf, pre_buf, BILINEAR); + +#if CONFIG_REFINEMV + inter_pred_params[ref].original_pu_width = pu_width; + inter_pred_params[ref].original_pu_height = pu_height; +#endif // CONFIG_REFINEMV + +#if CONFIG_TIP + const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2); + const int height = (cm->mi_params.mi_rows << MI_SIZE_LOG2); + inter_pred_params[ref].dist_to_top_edge = -GET_MV_SUBPEL(pre_y); + inter_pred_params[ref].dist_to_bottom_edge = + GET_MV_SUBPEL(height - bh - pre_y); + inter_pred_params[ref].dist_to_left_edge = -GET_MV_SUBPEL(pre_x); + inter_pred_params[ref].dist_to_right_edge = + GET_MV_SUBPEL(width - bw - pre_x); +#endif + + inter_pred_params[ref].conv_params = get_conv_params_no_round( + 0, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd); + + assert(inter_pred_params[ref].mode == TRANSLATION_PRED); + assert(inter_pred_params[ref].comp_mode == UNIFORM_SINGLE); + assert(inter_pred_params[ref].conv_params.is_compound == 0); + assert(inter_pred_params[ref].conv_params.do_average == 0); + assert(mi->interinter_comp.type == COMPOUND_AVERAGE); + } + +#if !SINGLE_STEP_SEARCH + // Search integer-delta values + int search_range = 2; +#endif + + int switchable_refinemv_flags = + (mi->ref_frame[0] != TIP_FRAME) && switchable_refinemv_flag(cm, mi); + assert(mi->refinemv_flag); + + // If we signal the refinemv_flags we do not select sad0 + // Set sad0 a large value so that it does not be selected + int sad0 = switchable_refinemv_flags + ? (INT32_MAX >> 1) + : av1_refinemv_build_predictors_and_get_sad( + xd, bw, bh, mi_x, mi_y, mc_buf, calc_subpel_params_func, + dst_ref0, dst_ref1, center_mvs[0], center_mvs[1], + inter_pred_params); + + assert(IMPLIES(mi->ref_frame[0] == TIP_FRAME, bw == 8 && bh == 8)); + if (mi->ref_frame[0] == TIP_FRAME) { + const int tip_sad_thres = bw * bh; + if (!switchable_refinemv_flags && sad0 < tip_sad_thres) return; + } + + if (!switchable_refinemv_flags) { + int shift = 3; + int th = (bw * bh) << 1; + sad0 -= (sad0 >> shift); + assert(sad0 >= 0); + if (sad0 < th) return; + } + + int min_sad = sad0; + MV refined_mv0, refined_mv1; + refined_mv0 = center_mvs[0]; + refined_mv1 = center_mvs[1]; + int et_sad_th = (bw * bh) << 1; + +#if !SINGLE_STEP_SEARCH + uint8_t already_searched[5][5]; + for (int i = 0; i < 5; i++) { + for (int j = 0; j < 5; j++) { + already_searched[i][j] = 0; + } + } +#endif + + MV best_offset = { 0, 0 }; + +#if SINGLE_STEP_SEARCH + const int num_neighbors = 24; + static const MV neighbors[24] = { + { -1, -1 }, { -1, 0 }, { -1, 1 }, { 0, 1 }, { 1, 1 }, { 1, 0 }, + { 1, -1 }, { 0, -1 }, { 0, -2 }, { -1, -2 }, { -2, -2 }, { -2, -1 }, + { -2, 0 }, { -2, 1 }, { -2, 2 }, { -1, 2 }, { 0, 2 }, { 1, 2 }, + { 2, 2 }, { 2, 1 }, { 2, 0 }, { 2, -1 }, { 2, -2 }, { 1, -2 } + + }; + +#else + const int num_neighbors = 8; + // Apply two-step full pel refinement + static const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 }, + { 1, -1 }, { 1, 1 }, { -1, -1 }, { -1, 1 } }; + + const int num_iterations = search_range; + already_searched[0 + search_range][0 + search_range] = + 1; // center point is already searched before + for (int ite = 0; ite < num_iterations; ++ite) { +#endif // SINGLE_STEP_SEARCH + + int best_idx = -1; + + for (int idx = 0; idx < num_neighbors; ++idx) { + MV offset = { best_offset.row + neighbors[idx].row, + best_offset.col + neighbors[idx].col }; +#if !SINGLE_STEP_SEARCH + if (already_searched[offset.row + search_range][offset.col + search_range]) + continue; +#endif + refined_mv0.row = center_mvs[0].row + 8 * offset.row; + refined_mv0.col = center_mvs[0].col + 8 * offset.col; + refined_mv1.row = center_mvs[1].row - 8 * offset.row; + refined_mv1.col = center_mvs[1].col - 8 * offset.col; + + int this_sad = av1_refinemv_build_predictors_and_get_sad( + xd, bw, bh, mi_x, mi_y, mc_buf, calc_subpel_params_func, dst_ref0, + dst_ref1, refined_mv0, refined_mv1, inter_pred_params); + +#if !SINGLE_STEP_SEARCH + already_searched[offset.row + search_range][offset.col + search_range] = 1; +#endif + + if (this_sad < min_sad) { + min_sad = this_sad; + best_idx = idx; + // if the SAD is less than predefined threshold consider this candidate + // as good enough to skip rest of the search. + if (min_sad < et_sad_th) { + best_mv_ref[0] = refined_mv0; + best_mv_ref[1] = refined_mv1; + return; + } + } + } + + // if the center is best, skip rest of the search. + if (best_idx == -1) { + best_mv_ref[0].row = center_mvs[0].row + 8 * best_offset.row; + best_mv_ref[0].col = center_mvs[0].col + 8 * best_offset.col; + best_mv_ref[1].row = center_mvs[1].row - 8 * best_offset.row; + best_mv_ref[1].col = center_mvs[1].col - 8 * best_offset.col; + + return; + } + + if (best_idx >= 0) { + best_offset.row += neighbors[best_idx].row; + best_offset.col += neighbors[best_idx].col; + } +#if !SINGLE_STEP_SEARCH +} +#endif + +best_mv_ref[0].row = center_mvs[0].row + 8 * best_offset.row; +best_mv_ref[0].col = center_mvs[0].col + 8 * best_offset.col; +best_mv_ref[1].row = center_mvs[1].row - 8 * best_offset.row; +best_mv_ref[1].col = center_mvs[1].col - 8 * best_offset.col; + +assert(min_sad <= sad0); + +assert(IMPLIES(switchable_refinemv_flags, + !(best_mv_ref[0].row == center_mvs[0].row && + best_mv_ref[0].col == center_mvs[0].col && + best_mv_ref[1].row == center_mvs[1].row && + best_mv_ref[1].col == center_mvs[1].col))); +} + +static void build_inter_predictors_8x8_and_bigger_refinemv( + const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, MB_MODE_INFO *mi, + int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf, + CalcSubpelParamsFunc calc_subpel_params_func, uint16_t *dst, int dst_stride, + int pu_width, int pu_height, uint16_t *dst0_16_refinemv, + uint16_t *dst1_16_refinemv, int16_t *opt_gx0, int16_t *opt_gx1, + int row_start, int col_start, MV *sb_refined_mv, MV *chroma_refined_mv, + int build_for_refine_mv_only, ReferenceArea ref_area[2]) { + const int is_compound = has_second_ref(mi); + struct macroblockd_plane *const pd = &xd->plane[plane]; + assert(!is_intrabc_block(mi, xd->tree_type)); + assert(is_compound); + assert(!mi->bawp_flag); + assert(!build_for_obmc); + assert(!is_masked_compound_type(mi->interinter_comp.type)); + assert(!is_tip_ref_frame(mi->ref_frame[0])); + +#if CONFIG_CWP + assert(mi->cwp_idx == CWP_EQUAL); +#endif + + int is_global[2] = { 0, 0 }; + for (int ref = 0; ref < 1 + is_compound; ++ref) { +#if CONFIG_TIP + if (!is_tip_ref_frame(mi->ref_frame[ref])) { +#endif // CONFIG_TIP + const WarpedMotionParams *const wm = + &xd->global_motion[mi->ref_frame[ref]]; + is_global[ref] = is_global_mv_block(mi, wm->wmtype); +#if CONFIG_TIP + } +#endif // CONFIG_TIP + } + + assert(!is_global[0] && !is_global[1]); + + const int pre_x = (mi_x + MI_SIZE * col_start) >> pd->subsampling_x; + const int pre_y = (mi_y + MI_SIZE * row_start) >> pd->subsampling_y; + + int apply_refinemv = (plane == 0); + + MV best_mv_ref[2] = { { mi->mv[0].as_mv.row, mi->mv[0].as_mv.col }, + { mi->mv[1].as_mv.row, mi->mv[1].as_mv.col } }; + if (apply_refinemv) { + uint16_t *dst_ref0 = NULL, *dst_ref1 = NULL; + dst_ref0 = &dst0_16_refinemv[0]; + dst_ref1 = &dst1_16_refinemv[0]; + + assert(IMPLIES(!mi->skip_mode, + is_refinemv_allowed(cm, mi, mi->sb_type[PLANE_TYPE_Y]))); + assert(IMPLIES(mi->skip_mode, is_refinemv_allowed_skip_mode(cm, mi))); + apply_mv_refinement(cm, xd, plane, mi, bw, bh, mi_x, mi_y, mc_buf, + calc_subpel_params_func, pre_x, pre_y, dst_ref0, + dst_ref1, best_mv_ref, pu_width, pu_height); + if (sb_refined_mv) { + // store the DMVR refined MV so that chroma can use it + sb_refined_mv[0] = best_mv_ref[0]; + sb_refined_mv[1] = best_mv_ref[1]; + } + assert(IMPLIES(plane, !build_for_refine_mv_only)); + // if build_for_refine_mv_only is non-zero, we build only to get the + // refinemv values The actual prediction values are not necessary + if (build_for_refine_mv_only) { + return; + } + } else { + best_mv_ref[0] = chroma_refined_mv[0]; + best_mv_ref[1] = chroma_refined_mv[1]; + } + +#if CONFIG_OPTFLOW_REFINEMENT + int_mv mv_refined[2 * N_OF_OFFSETS]; + const int use_optflow_refinement = + (mi->mode >= NEAR_NEARMV_OPTFLOW || + (cm->features.opfl_refine_type == REFINE_ALL && + mi->mode != GLOBAL_GLOBALMV && + mi->interinter_comp.type == COMPOUND_AVERAGE)) && + is_compound && is_opfl_refine_allowed(cm, mi); + assert(IMPLIES(use_optflow_refinement, + cm->features.opfl_refine_type != REFINE_NONE)); + assert(IMPLIES(use_optflow_refinement, !build_for_obmc)); + + // Optical flow refinement with masked comp types or with non-sharp + // interpolation filter should only exist in REFINE_ALL. + assert(IMPLIES( + use_optflow_refinement && mi->interinter_comp.type != COMPOUND_AVERAGE, + cm->features.opfl_refine_type == REFINE_ALL)); + assert(IMPLIES(use_optflow_refinement && mi->interp_fltr != MULTITAP_SHARP, + cm->features.opfl_refine_type == REFINE_ALL)); + + // Arrays to hold optical flow offsets. + int vx0[N_OF_OFFSETS] = { 0 }; + int vx1[N_OF_OFFSETS] = { 0 }; + int vy0[N_OF_OFFSETS] = { 0 }; + int vy1[N_OF_OFFSETS] = { 0 }; + + // Pointers to gradient and dst buffers + int16_t *gx0, *gy0, *gx1, *gy1; + uint16_t *dst0 = NULL, *dst1 = NULL; + + if (use_optflow_refinement && plane == 0) { + // Allocate gradient and dst buffers + // gx0 = aom_memalign(32, 2 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx0)); + // gx1 = aom_memalign(32, 2 * MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*gx1)); + gx0 = &opt_gx0[0]; + gx1 = &opt_gx1[0]; + gy0 = gx0 + (REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT); + gy1 = gx1 + (REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT); + + // Initialize refined mv + const MV mv0 = best_mv_ref[0]; + const MV mv1 = best_mv_ref[1]; + + for (int mvi = 0; mvi < N_OF_OFFSETS; mvi++) { + mv_refined[mvi * 2].as_mv = mv0; + mv_refined[mvi * 2 + 1].as_mv = mv1; + } + // Refine MV using optical flow. The final output MV will be in 1/16 + // precision. + dst0 = &dst0_16_refinemv[0]; + dst1 = &dst1_16_refinemv[0]; + // dst0 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t)); + // dst1 = aom_calloc(1, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(uint16_t)); + + av1_get_optflow_based_mv_highbd(cm, xd, plane, mi, mv_refined, bw, bh, mi_x, + mi_y, mc_buf, calc_subpel_params_func, gx0, + gy0, gx1, gy1, vx0, vy0, vx1, vy1, dst0, + dst1 +#if CONFIG_OPTFLOW_ON_TIP + , + 1, 1 +#endif // CONFIG_OPTFLOW_ON_TIP + , + best_mv_ref, pu_width, pu_height); + } +#endif // CONFIG_OPTFLOW_REFINEMENT + +#if CONFIG_D071_IMP_MSK_BLD + BacpBlockData bacp_block_data[2 * N_OF_OFFSETS]; + uint8_t use_bacp = !build_for_obmc && use_border_aware_compound(cm, mi) && + mi->cwp_idx == CWP_EQUAL && + cm->features.enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD + + for (int ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = xd->block_ref_scale_factors[ref]; + struct buf_2d *const pre_buf = &pd->pre[ref]; + + const MV mv = best_mv_ref[ref]; + const WarpTypesAllowed warp_types = { is_global[ref], + is_warp_mode(mi->motion_mode) }; + InterPredParams inter_pred_params; + av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x, + pd->subsampling_x, pd->subsampling_y, xd->bd, + mi->use_intrabc[0], sf, pre_buf, mi->interp_fltr); + +#if CONFIG_REFINEMV + inter_pred_params.use_ref_padding = 1; + inter_pred_params.ref_area = &ref_area[ref]; +#endif // CONFIG_REFINEMV + + inter_pred_params.original_pu_width = pu_width; + inter_pred_params.original_pu_height = pu_height; + + if (is_compound) av1_init_comp_mode(&inter_pred_params); +#if CONFIG_D071_IMP_MSK_BLD + inter_pred_params.border_data.enable_bacp = use_bacp; + inter_pred_params.border_data.bacp_block_data = + &bacp_block_data[0]; // Always point to the first ref +#endif // CONFIG_D071_IMP_MSK_BLD + inter_pred_params.conv_params = get_conv_params_no_round( + ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd); + + if (!build_for_obmc) + av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi); + +#if CONFIG_D071_IMP_MSK_BLD + if (is_compound) { + inter_pred_params.sb_type = mi->sb_type[PLANE_TYPE_Y]; + inter_pred_params.mask_comp = mi->interinter_comp; + } +#endif // CONFIG_D071_IMP_MSK_BLD + +#if CONFIG_OPTFLOW_REFINEMENT + if (use_optflow_refinement && plane == 0) { + int n = opfl_get_subblock_size(bw, bh, plane +#if CONFIG_OPTFLOW_ON_TIP + , + 1 +#endif // CONFIG_OPTFLOW_ON_TIP + ); + inter_pred_params.interp_filter_params[0] = + av1_get_interp_filter_params_with_block_size(mi->interp_fltr, n); + + inter_pred_params.interp_filter_params[1] = + av1_get_interp_filter_params_with_block_size(mi->interp_fltr, n); + + av1_opfl_rebuild_inter_predictor(dst, dst_stride, plane, mv_refined, + &inter_pred_params, xd, mi_x, mi_y, ref, + mc_buf, calc_subpel_params_func +#if CONFIG_OPTFLOW_ON_TIP + , + 1 +#endif // CONFIG_OPTFLOW_ON_TIP + ); + continue; + } +#endif // CONFIG_OPTFLOW_REFINEMENT + + av1_build_one_inter_predictor(dst, dst_stride, &mv, &inter_pred_params, xd, + mi_x, mi_y, ref, mc_buf, + calc_subpel_params_func); + } + +#if CONFIG_PEF + if (use_optflow_refinement && plane == 0) { + enhance_prediction(cm, xd, plane, dst, dst_stride, bw, bh +#if CONFIG_OPTFLOW_REFINEMENT + , + mv_refined, use_optflow_refinement +#endif // CONFIG_OPTFLOW_REFINEMENT + +#if CONFIG_REFINEMV + , + 0, NULL +#endif // CONFIG_REFINEMV + ); + } +#endif // CONFIG_PEF +} + +#endif // CONFIG_REFINEMV + static void build_inter_predictors_8x8_and_bigger( const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, MB_MODE_INFO *mi, #if CONFIG_BAWP const BUFFER_SET *dst_orig, #endif // CONFIG_BAWP int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf, - CalcSubpelParamsFunc calc_subpel_params_func) { + CalcSubpelParamsFunc calc_subpel_params_func +#if CONFIG_REFINEMV + , + int build_for_refine_mv_only +#endif // CONFIG_REFINEMV +) { const int is_compound = has_second_ref(mi); const int is_intrabc = is_intrabc_block(mi, xd->tree_type); assert(IMPLIES(is_intrabc, !is_compound)); @@ -1768,6 +3280,115 @@ struct buf_2d *const dst_buf = &pd->dst; uint16_t *const dst = dst_buf->buf; +#if CONFIG_REFINEMV + assert(IMPLIES(mi->refinemv_flag, !is_intrabc)); + assert(IMPLIES(mi->refinemv_flag && !build_for_obmc, is_compound)); + assert(IMPLIES( + !build_for_obmc && mi->refinemv_flag && switchable_refinemv_flag(cm, mi), + mi->interinter_comp.type == COMPOUND_AVERAGE)); + assert(IMPLIES(mi->refinemv_flag, mi->bawp_flag == 0)); + assert(IMPLIES(mi->refinemv_flag, mi->interp_fltr == MULTITAP_SHARP)); + + int apply_sub_block_refinemv = mi->refinemv_flag && (!build_for_obmc) && + !is_tip_ref_frame(mi->ref_frame[0]); + + if (apply_sub_block_refinemv && default_refinemv_modes(mi)) + apply_sub_block_refinemv &= (mi->comp_group_idx == 0 && + mi->interinter_comp.type == COMPOUND_AVERAGE); + + if (apply_sub_block_refinemv) { +#if CONFIG_CWP + assert(IMPLIES(mi->refinemv_flag, mi->cwp_idx == CWP_EQUAL)); +#endif + int refinemv_sb_size_width = + AOMMIN((REFINEMV_SUBBLOCK_WIDTH >> pd->subsampling_x), bw); + int refinemv_sb_size_height = + AOMMIN(REFINEMV_SUBBLOCK_HEIGHT >> pd->subsampling_y, bh); + uint16_t + dst0_16_refinemv[REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT]; + uint16_t + dst1_16_refinemv[REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT]; + DECLARE_ALIGNED( + 32, int16_t, + opt_gx0[2 * REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT]); + DECLARE_ALIGNED( + 32, int16_t, + opt_gx1[2 * REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT]); + + ReferenceArea ref_area[2]; + av1_get_reference_area_with_padding(cm, xd, plane, mi, bw, bh, mi_x, mi_y, + ref_area, 0, 0); + + int dst_stride = dst_buf->stride; + CONV_BUF_TYPE *tmp_conv_dst = xd->tmp_conv_dst; + assert(bw % refinemv_sb_size_width == 0); + assert(bh % refinemv_sb_size_height == 0); + for (int h = 0; h < bh; h += refinemv_sb_size_height) { + for (int w = 0; w < bw; w += refinemv_sb_size_width) { + dst_buf->buf = dst + h * dst_stride + w; + xd->tmp_conv_dst = tmp_conv_dst + h * MAX_SB_SIZE + w; + + const int mi_row = -xd->mb_to_top_edge >> MI_SUBPEL_SIZE_LOG2; + const int mi_col = -xd->mb_to_left_edge >> MI_SUBPEL_SIZE_LOG2; + int row_start = + plane ? (mi->chroma_ref_info.mi_row_chroma_base - mi_row) : 0; + int col_start = + plane ? (mi->chroma_ref_info.mi_col_chroma_base - mi_col) : 0; + MV luma_refined_mv[2] = { { mi->mv[0].as_mv.row, mi->mv[0].as_mv.col }, + { mi->mv[1].as_mv.row, + mi->mv[1].as_mv.col } }; + + MV chroma_refined_mv[2] = { + { mi->mv[0].as_mv.row, mi->mv[0].as_mv.col }, + { mi->mv[1].as_mv.row, mi->mv[1].as_mv.col } + }; + + if (plane != 0) { + int luma_h = (h << pd->subsampling_y); + int luma_w = (w << pd->subsampling_x); + REFINEMV_SUBMB_INFO *refinemv_subinfo = + &xd->refinemv_subinfo[(luma_h >> MI_SIZE_LOG2) * MAX_MIB_SIZE + + (luma_w >> MI_SIZE_LOG2)]; + chroma_refined_mv[0] = refinemv_subinfo->refinemv[0].as_mv; + chroma_refined_mv[1] = refinemv_subinfo->refinemv[1].as_mv; + } + // mi_x, and mi_y are the top-left position of the luma samples of the + // sub-block + build_inter_predictors_8x8_and_bigger_refinemv( + cm, xd, plane, mi, build_for_obmc, refinemv_sb_size_width, + refinemv_sb_size_height, mi_x + w * (1 << pd->subsampling_x), + mi_y + h * (1 << pd->subsampling_y), mc_buf, + calc_subpel_params_func, dst_buf->buf, dst_stride, bw, bh, + dst0_16_refinemv, dst1_16_refinemv, opt_gx0, opt_gx1, row_start, + col_start, plane == 0 ? luma_refined_mv : NULL, chroma_refined_mv, + build_for_refine_mv_only, ref_area); + + if (plane == 0) { + REFINEMV_SUBMB_INFO *refinemv_subinfo = + &xd->refinemv_subinfo[(h >> MI_SIZE_LOG2) * MAX_MIB_SIZE + + (w >> MI_SIZE_LOG2)]; + fill_subblock_refine_mv(refinemv_subinfo, refinemv_sb_size_width, + refinemv_sb_size_height, luma_refined_mv[0], + luma_refined_mv[1]); + } + } + } + +#if CONFIG_PEF + enhance_prediction(cm, xd, plane, dst, dst_stride, bw, bh +#if CONFIG_OPTFLOW_REFINEMENT + , + NULL, 0 +#endif // CONFIG_OPTFLOW_REFINEMENT + , + apply_sub_block_refinemv, &xd->refinemv_subinfo[0]); +#endif // CONFIG_PEF + dst_buf->buf = dst; + xd->tmp_conv_dst = tmp_conv_dst; + return; + } +#endif // CONFIG_REFINEMV + int is_global[2] = { 0, 0 }; for (int ref = 0; ref < 1 + is_compound; ++ref) { #if CONFIG_TIP @@ -1791,17 +3412,23 @@ } const int pre_x = (mi_x + MI_SIZE * col_start) >> pd->subsampling_x; const int pre_y = (mi_y + MI_SIZE * row_start) >> pd->subsampling_y; - +#if CONFIG_REFINEMV + MV best_mv_ref[2] = { { mi->mv[0].as_mv.row, mi->mv[0].as_mv.col }, + { mi->mv[1].as_mv.row, mi->mv[1].as_mv.col } }; +#endif // CONFIG_REFINEMV #if CONFIG_OPTFLOW_REFINEMENT int_mv mv_refined[2 * N_OF_OFFSETS]; const int use_optflow_refinement = (mi->mode >= NEAR_NEARMV_OPTFLOW || (cm->features.opfl_refine_type == REFINE_ALL && mi->mode != GLOBAL_GLOBALMV && +#if CONFIG_CWP + mi->cwp_idx == CWP_EQUAL && +#endif // CONFIG_CWP mi->interinter_comp.type == COMPOUND_AVERAGE)) && is_compound && is_opfl_refine_allowed(cm, mi); assert(IMPLIES(use_optflow_refinement, - cm->features.opfl_refine_type == REFINE_SWITCHABLE)); + cm->features.opfl_refine_type != REFINE_NONE)); assert(IMPLIES(use_optflow_refinement, !build_for_obmc)); // Optical flow refinement with masked comp types or with non-sharp @@ -1838,8 +3465,13 @@ gy1 = g1_buf + MAX_SB_SQUARE; // Initialize refined mv - const MV mv0 = mi->mv[0].as_mv; - const MV mv1 = mi->mv[1].as_mv; +#if CONFIG_REFINEMV + const MV mv0 = best_mv_ref[0]; + const MV mv1 = best_mv_ref[1]; +#else + const MV mv0 = mi->mv[0].as_mv; + const MV mv1 = mi->mv[1].as_mv; +#endif // CONFIG_REFINEMV for (int mvi = 0; mvi < n_blocks; mvi++) { mv_refined[mvi * 2].as_mv = mv0; mv_refined[mvi * 2 + 1].as_mv = mv1; @@ -1855,10 +3487,21 @@ , 1, 1 #endif // CONFIG_OPTFLOW_ON_TIP +#if CONFIG_REFINEMV + , + best_mv_ref, bw, bh +#endif // CONFIG_REFINEMV ); } #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_D071_IMP_MSK_BLD + BacpBlockData bacp_block_data[2 * N_OF_OFFSETS]; + uint8_t use_bacp = !build_for_obmc && use_border_aware_compound(cm, mi) && + mi->cwp_idx == CWP_EQUAL && + cm->features.enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD + for (int ref = 0; ref < 1 + is_compound; ++ref) { const struct scale_factors *const sf = is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref]; @@ -1872,15 +3515,31 @@ pd->subsampling_x, pd->subsampling_y, xd->bd, mi->use_intrabc[0], sf, pre_buf, mi->interp_fltr); if (is_compound) av1_init_comp_mode(&inter_pred_params); +#if CONFIG_D071_IMP_MSK_BLD + inter_pred_params.border_data.enable_bacp = use_bacp; + inter_pred_params.border_data.bacp_block_data = + &bacp_block_data[0]; // Always point to the first ref +#endif // CONFIG_D071_IMP_MSK_BLD + inter_pred_params.conv_params = get_conv_params_no_round( ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd); if (!build_for_obmc) av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi); - if (is_masked_compound_type(mi->interinter_comp.type)) { +#if CONFIG_D071_IMP_MSK_BLD + if (is_compound) { inter_pred_params.sb_type = mi->sb_type[PLANE_TYPE_Y]; inter_pred_params.mask_comp = mi->interinter_comp; + } +#endif // CONFIG_D071_IMP_MSK_BLD + + if (is_masked_compound_type(mi->interinter_comp.type)) { +#if !CONFIG_D071_IMP_MSK_BLD + inter_pred_params.sb_type = mi->sb_type[PLANE_TYPE_Y]; + inter_pred_params.mask_comp = mi->interinter_comp; +#endif // !CONFIG_D071_IMP_MSK_BLD + if (ref == 1) { inter_pred_params.conv_params.do_average = 0; inter_pred_params.comp_mode = MASK_COMP; @@ -1889,6 +3548,18 @@ inter_pred_params.mask_comp.seg_mask = xd->seg_mask; } +#if CONFIG_CWP + if (ref == 1 && inter_pred_params.conv_params.do_average == 1) { + if (get_cwp_idx(mi) != CWP_EQUAL) { + int8_t weight = get_cwp_idx(mi); + assert(mi->cwp_idx >= CWP_MIN && mi->cwp_idx <= CWP_MAX); + inter_pred_params.conv_params.fwd_offset = weight; + inter_pred_params.conv_params.bck_offset = + (1 << CWP_WEIGHT_BITS) - weight; + } + } +#endif // CONFIG_CWP + #if CONFIG_OPTFLOW_REFINEMENT if (use_optflow_refinement && plane == 0) { const int n = opfl_get_subblock_size(bw, bh, plane @@ -1930,6 +3601,10 @@ , mv_refined, use_optflow_refinement #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + , + 0, NULL +#endif // CONFIG_REFINEMV ); #endif // CONFIG_PEF } @@ -1939,6 +3614,9 @@ #if CONFIG_BAWP const BUFFER_SET *dst_orig, #endif +#if CONFIG_REFINEMV + int build_for_refine_mv_only, +#endif // CONFIG_REFINEMV int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) { @@ -1946,7 +3624,12 @@ // just for debugging purpose // Can be removed later on if (mi->mode == WARPMV) { - assert(mi->ref_mv_idx == 0); +#if CONFIG_SEP_COMP_DRL + assert(mi->ref_mv_idx[0] == 0); + assert(mi->ref_mv_idx[1] == 0); +#else + assert(mi->ref_mv_idx == 0); +#endif // CONFIG_SEP_COMP_DRL assert(mi->motion_mode == WARP_DELTA || mi->motion_mode == WARPED_CAUSAL); } #endif // CONFIG_WARPMV @@ -1963,7 +3646,12 @@ dst_orig, #endif build_for_obmc, bw, bh, mi_x, mi_y, - mc_buf, calc_subpel_params_func); + mc_buf, calc_subpel_params_func +#if CONFIG_REFINEMV + , + build_for_refine_mv_only +#endif // CONFIG_REFINEMV + ); } } @@ -2061,7 +3749,7 @@ return; foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_uint8_t_ptr, - &mbmi->overlappable_neighbors[0]); + &mbmi->overlappable_neighbors[0], true); if (mbmi->overlappable_neighbors[0]) return; foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_uint8_t_ptr, &mbmi->overlappable_neighbors[1]); @@ -2174,9 +3862,9 @@ // handle above row struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride }; - foreach_overlappable_nb_above(cm, xd, - max_neighbor_obmc[mi_size_wide_log2[bsize]], - build_obmc_inter_pred_above, &ctxt_above); + foreach_overlappable_nb_above( + cm, xd, max_neighbor_obmc[mi_size_wide_log2[bsize]], + build_obmc_inter_pred_above, &ctxt_above, false); // handle left column struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride }; @@ -2387,6 +4075,18 @@ return (above_mpp_flag + left_mpp_flag); } +#if CONFIG_REFINEMV +// Derive the context index for refinemv flag +int av1_get_refinemv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd, + BLOCK_SIZE bsize) { + (void)cm; + (void)bsize; + const MB_MODE_INFO *const mbmi = xd->mi[0]; + if (mbmi->skip_mode) return 0; + return (1 + (mbmi->mode - NEAR_NEARMV)); +} +#endif // CONFIG_REFINEMV + int av1_get_pb_mv_precision_down_context(const AV1_COMMON *cm, const MACROBLOCKD *xd) { (void)cm; @@ -2480,7 +4180,11 @@ } void set_precision_set(const AV1_COMMON *const cm, MACROBLOCKD *const xd, MB_MODE_INFO *mbmi, const BLOCK_SIZE bsize, - uint8_t ref_mv_idx) { +#if CONFIG_SEP_COMP_DRL + int *ref_mv_idx) { +#else + uint8_t ref_mv_idx) { +#endif // CONFIG_SEP_COMP_DRL (void)bsize; (void)cm; (void)xd; @@ -2527,5 +4231,22 @@ cm->features.use_pb_mv_precision && have_newmv_in_inter_mode(mbmi->mode); } - #endif + +#if CONFIG_REFINEMV +// Copy mv0 and mv1 to the sub-blocks +// submi is the top-left corner of the sub-block need to fill +// bw is the block width in the unit of pixel +// bh is the block height in unit of pixel +void fill_subblock_refine_mv(REFINEMV_SUBMB_INFO *refinemv_subinfo, int bw, + int bh, MV mv0, MV mv1) { + const int stride = MAX_MIB_SIZE; + for (int y = 0; y < (bh >> MI_SIZE_LOG2); y++) { + for (int x = 0; x < (bw >> MI_SIZE_LOG2); x++) { + refinemv_subinfo[x].refinemv[0].as_mv = mv0; + refinemv_subinfo[x].refinemv[1].as_mv = mv1; + } + refinemv_subinfo += stride; + } +} +#endif // CONFIG_REFINEMV
diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h index 18ea840..5a5a146 100644 --- a/av1/common/reconinter.h +++ b/av1/common/reconinter.h
@@ -167,6 +167,13 @@ int ys; int subpel_x; int subpel_y; +#if CONFIG_D071_IMP_MSK_BLD + int x0; // top left sample horizontal cood. + int y0; // top left sample vertical cood. + int x1; // x0 + bw + int y1; // y0 + bh +#endif // CONFIG_D071_IMP_MSK_BLD + } SubpelParams; struct build_prediction_ctxt { @@ -179,6 +186,14 @@ void *dcb; // Decoder-only coding block. }; +#if CONFIG_REFINEMV +#define REFINE_MV_MAX_OFFSET 1 +#define REF_TOP_BORDER (AOM_INTERP_EXTEND - 1 + REFINE_MV_MAX_OFFSET) +#define REF_LEFT_BORDER (AOM_INTERP_EXTEND - 1 + REFINE_MV_MAX_OFFSET) +#define REF_RIGHT_BORDER (AOM_INTERP_EXTEND + REFINE_MV_MAX_OFFSET) +#define REF_BOTTOM_BORDER (AOM_INTERP_EXTEND + REFINE_MV_MAX_OFFSET) +#endif // CONFIG_REFINEMV + typedef enum InterPredMode { TRANSLATION_PRED, WARP_PRED, @@ -206,6 +221,15 @@ int orig_block_width; int orig_block_height; #endif // CONFIG_OPTFLOW_REFINEMENT + +#if CONFIG_REFINEMV + // In refinemV, the prediction is generated maximum 16x16 sub-block basis + // original_pu_width and original_pu_height represents the width and height + // of the original block. + int original_pu_width; + int original_pu_height; +#endif // CONFIG_REFINEMV + int pix_row; int pix_col; struct buf_2d ref_frame_buf; @@ -226,6 +250,15 @@ int dist_to_top_edge; /*!< Distance from top edge */ int dist_to_bottom_edge; /*!< Distance from bottom edge */ #endif // CONFIG_TIP + +#if CONFIG_REFINEMV + int use_ref_padding; + ReferenceArea *ref_area; +#endif // CONFIG_REFINEMV + +#if CONFIG_D071_IMP_MSK_BLD + INTERINTER_COMPOUND_BORDER_DATA border_data; +#endif // CONFIG_D071_IMP_MSK_BLD } InterPredParams; #if CONFIG_OPTFLOW_REFINEMENT @@ -300,15 +333,49 @@ #if CONFIG_WARP_REF_LIST // Check if the signaling of the warp delta parameters are allowed -static INLINE int allow_warp_parameter_signaling(const MB_MODE_INFO *mbmi) { +static INLINE int allow_warp_parameter_signaling( +#if CONFIG_CWG_D067_IMPROVED_WARP + const AV1_COMMON *const cm, +#endif // CONFIG_CWG_D067_IMPROVED_WARP + const MB_MODE_INFO *mbmi) { return ( #if CONFIG_WARPMV mbmi->mode != WARPMV && #endif // CONFIG_WARPMV +#if CONFIG_CWG_D067_IMPROVED_WARP + cm->features.allow_warpmv_mode && +#endif // CONFIG_CWG_D067_IMPROVED_WARP mbmi->motion_mode == WARP_DELTA && mbmi->warp_ref_idx == 1); } #endif // CONFIG_WARP_REF_LIST +#if CONFIG_CWP +// Map the index to weighting factor for compound weighted prediction +static INLINE int get_cwp_coding_idx(int val, int encode, + const AV1_COMMON *const cm, + const MB_MODE_INFO *const mbmi) { + int is_same_side = 0; + int cur_ref_side = 0; + int other_ref_side = 0; + if (has_second_ref(mbmi)) { + cur_ref_side = cm->ref_frame_side[mbmi->ref_frame[0]]; + other_ref_side = cm->ref_frame_side[mbmi->ref_frame[1]]; + + is_same_side = (cur_ref_side > 0 && other_ref_side > 0) || + (cur_ref_side == 0 && other_ref_side == 0); + } + + if (encode) { + for (int i = 0; i < MAX_CWP_NUM; i++) { + if (cwp_weighting_factor[is_same_side][i] == val) return i; + } + return 0; + } else { + return cwp_weighting_factor[is_same_side][val]; + } +} +#endif // CONFIG_CWP + #if CONFIG_ADAPTIVE_MVD static INLINE int enable_adaptive_mvd_resolution(const AV1_COMMON *const cm, const MB_MODE_INFO *mbmi) { @@ -474,10 +541,316 @@ #if CONFIG_BAWP const BUFFER_SET *dst_orig, #endif +#if CONFIG_REFINEMV + int build_for_refine_mv_only, +#endif // CONFIG_REFINEMV int build_for_obmc, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func); +#if CONFIG_REFINEMV +// Generate one prediction signal for a TIP block +void tip_build_one_inter_predictor( + uint16_t *dst, int dst_stride, const MV *const src_mv, + InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y, + int ref, uint16_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func); + +// Compute the SAD between the two predictors when refinemv is ON +int get_refinemv_sad(uint16_t *src1, uint16_t *src2, int width, int height, + int bd); +// Genrate two prediction signals and compute SAD of a given mv0 and mv1 +int av1_refinemv_build_predictors_and_get_sad( + MACROBLOCKD *xd, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf, + CalcSubpelParamsFunc calc_subpel_params_func, uint16_t *dst_ref0, + uint16_t *dst_ref1, MV mv0, MV mv1, InterPredParams *inter_pred_params); + +// Get the context index to code refinemv flag +int av1_get_refinemv_context(const AV1_COMMON *cm, const MACROBLOCKD *xd, + BLOCK_SIZE bsize); + +// Full blocks refine MVs are stored in 4x4 grid so that the MVs can be reused +// for chroma +void fill_subblock_refine_mv(REFINEMV_SUBMB_INFO *refinemv_subinfo, int bw, + int bh, MV mv0, MV mv1); + +// Generate the reference area ( bounding box) based on the signaled MV +void av1_get_reference_area_with_padding(const AV1_COMMON *cm, MACROBLOCKD *xd, + int plane, MB_MODE_INFO *mi, int bw, + int bh, int mi_x, int mi_y, + ReferenceArea ref_area[2], + const int comp_pixel_x, + const int comp_pixel_y); + +// Derive the sub-pixel related parameters of TIP blocks +// Sub-pel related parameters are stored in the structures pointed by +// "subpel_params" and "block" +void tip_dec_calc_subpel_params(const MV *const src_mv, + InterPredParams *const inter_pred_params, + int mi_x, int mi_y, uint16_t **pre, + SubpelParams *subpel_params, int *src_stride, + PadBlock *block, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + MV32 *scaled_mv, int *subpel_x_mv, + int *subpel_y_mv); + +// Derive the sub-pixel related parameters of non-TIP blocks +// Sub-pel related parameters are stored in the structures pointed by +// "subpel_params" and "block" +void dec_calc_subpel_params(const MV *const src_mv, + InterPredParams *const inter_pred_params, + const MACROBLOCKD *const xd, int mi_x, int mi_y, + uint16_t **pre, SubpelParams *subpel_params, + int *src_stride, PadBlock *block, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + MV32 *scaled_mv, int *subpel_x_mv, + int *subpel_y_mv); + +// check if the refinemv mode is allwed for a given blocksize +static INLINE int is_refinemv_allowed_bsize(BLOCK_SIZE bsize) { + assert(bsize < BLOCK_SIZES_ALL); + return (block_size_wide[bsize] >= 16 || block_size_high[bsize] >= 16); +} + +// check if the refinemv mode is allwed for a given mode and precision +static INLINE int is_refinemv_allowed_mode_precision( + PREDICTION_MODE mode, MvSubpelPrecision precision, + const AV1_COMMON *const cm) { + (void)precision; + if (mode == GLOBAL_GLOBALMV) return 0; + if (cm->features.opfl_refine_type == REFINE_SWITCHABLE && + (mode == JOINT_NEWMV || mode == JOINT_AMVDNEWMV || mode == NEAR_NEWMV || + mode == NEW_NEARMV || mode == NEW_NEWMV)) + return 0; + return (mode >= NEAR_NEARMV && mode <= JOINT_AMVDNEWMV_OPTFLOW); +} +// check if the prediction mode infered to refimemv to always 1. +static INLINE int default_refinemv_modes(const MB_MODE_INFO *mbmi) { + return (mbmi->skip_mode || mbmi->mode == NEAR_NEARMV || + mbmi->mode == NEAR_NEARMV_OPTFLOW || + mbmi->mode == JOINT_NEWMV_OPTFLOW); +} +// Check if the compound and equal distance references +static INLINE int is_refinemv_allowed_reference(const AV1_COMMON *cm, + const MB_MODE_INFO *mbmi) { + if (!cm->seq_params.enable_refinemv) return 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const unsigned int cur_index = cm->cur_frame->display_order_hint; +#else + const unsigned int cur_index = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + int d0, d1; + int is_tip = (mbmi->ref_frame[0] == TIP_FRAME); + + if (is_tip) { + d0 = cm->tip_ref.ref_offset[0]; + d1 = cm->tip_ref.ref_offset[1]; + } else { + if (!mbmi->ref_frame[1]) return 0; + const RefCntBuffer *const ref0 = get_ref_frame_buf(cm, mbmi->ref_frame[0]); + const RefCntBuffer *const ref1 = get_ref_frame_buf(cm, mbmi->ref_frame[1]); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + d0 = get_relative_dist(&cm->seq_params.order_hint_info, cur_index, + ref0->display_order_hint); + d1 = get_relative_dist(&cm->seq_params.order_hint_info, cur_index, + ref1->display_order_hint); +#else + d0 = (int)cur_index - (int)ref0->order_hint; + d1 = (int)cur_index - (int)ref1->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + } + + // reference frame has to be both sides to apply dmvr + if (!((d0 <= 0) ^ (d1 <= 0))) return 0; + + // Current implementation only supports when both has the same distance + if (abs(d0) != abs(d1)) return 0; + + return 1; +} + +// check if the refinemv mode is allowed for a given block +static INLINE int is_refinemv_allowed(const AV1_COMMON *const cm, + const MB_MODE_INFO *mbmi, + BLOCK_SIZE bsize) { + if (!cm->seq_params.enable_refinemv || + cm->superres_scale_denominator != SCALE_NUMERATOR) + return 0; + int is_tip = is_tip_ref_frame(mbmi->ref_frame[0]); + if (is_tip) return 0; + assert(!mbmi->skip_mode); + int is_compound = has_second_ref(mbmi); + return is_compound && is_refinemv_allowed_bsize(bsize) && + is_refinemv_allowed_mode_precision(mbmi->mode, mbmi->pb_mv_precision, + cm) && + is_refinemv_allowed_reference(cm, mbmi); +} + +// check if the refinemv mode is allowed for a given block for TIP mode +static INLINE int is_refinemv_allowed_tip_blocks(const AV1_COMMON *const cm, + const MB_MODE_INFO *mbmi) { + assert(is_tip_ref_frame(mbmi->ref_frame[0])); + return cm->seq_params.enable_refinemv && + cm->superres_scale_denominator == SCALE_NUMERATOR && + is_refinemv_allowed_reference(cm, mbmi); +} + +// check if the refinemv mode is allowed for a given block for skip mode +static INLINE int is_refinemv_allowed_skip_mode(const AV1_COMMON *const cm, + const MB_MODE_INFO *mbmi) { + assert(mbmi->skip_mode); + return cm->seq_params.enable_refinemv && + cm->superres_scale_denominator == SCALE_NUMERATOR && + is_refinemv_allowed_bsize(mbmi->sb_type[PLANE_TYPE_Y]) && + is_refinemv_allowed_reference(cm, mbmi); +} +static INLINE int get_default_refinemv_flag(const AV1_COMMON *const cm, + const MB_MODE_INFO *mbmi) { + if (!cm->seq_params.enable_refinemv || + cm->superres_scale_denominator != SCALE_NUMERATOR) + return 0; + int is_refinemv = + (mbmi->skip_mode + ? is_refinemv_allowed_skip_mode(cm, mbmi) + : is_refinemv_allowed(cm, mbmi, mbmi->sb_type[PLANE_TYPE_Y])); + if (is_refinemv) { + if (default_refinemv_modes(mbmi)) return 1; + } + return 0; +} + +// check if the refinemv mode is switchable for a given block +static INLINE int switchable_refinemv_flag(const AV1_COMMON *const cm, + const MB_MODE_INFO *mbmi) { + if (!cm->seq_params.enable_refinemv) return 0; + int is_refinemv = + (mbmi->skip_mode + ? is_refinemv_allowed_skip_mode(cm, mbmi) + : is_refinemv_allowed(cm, mbmi, mbmi->sb_type[PLANE_TYPE_Y])); + if (is_refinemv && !is_tip_ref_frame(mbmi->ref_frame[0])) { + if (default_refinemv_modes(mbmi)) return 0; + return 1; + } + + return 0; +} + +// Precision of refined MV returned, 0 being integer pel. For now, only 1/8 or +// 1/16-pel can be used. +#define MV_REFINE_PREC_BITS 4 // (1/16-pel) + +// Clamp MV to UMV border based on its distance to left/right/top/bottom edge +static AOM_INLINE MV tip_clamp_mv_to_umv_border_sb( + InterPredParams *const inter_pred_params, const MV *src_mv, int bw, int bh, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + int ss_x, int ss_y) { + // If the MV points so far into the UMV border that no visible pixels + // are used for reconstruction, the subpel part of the MV can be + // discarded and the MV limited to 16 pixels with equivalent results. + const int spel_left = (AOM_INTERP_EXTEND + bw) << SUBPEL_BITS; + const int spel_right = spel_left - SUBPEL_SHIFTS; + const int spel_top = (AOM_INTERP_EXTEND + bh) << SUBPEL_BITS; + const int spel_bottom = spel_top - SUBPEL_SHIFTS; +#if CONFIG_OPTFLOW_REFINEMENT + MV clamped_mv; + if (use_optflow_refinement) { + // optflow refinement always returns MVs with 1/16 precision so it is not + // necessary to shift the MV before clamping + // Here it should be: + // clamped_mv.row = (int16_t)ROUND_POWER_OF_TWO_SIGNED( + // src_mv->row * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + ss_y); + // But currently SUBPEL_BITS == MV_REFINE_PREC_BITS + assert(SUBPEL_BITS == MV_REFINE_PREC_BITS); + + if (ss_y || ss_x) { + clamped_mv.row = (int16_t)ROUND_POWER_OF_TWO_SIGNED( + src_mv->row * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + ss_y); + clamped_mv.col = (int16_t)ROUND_POWER_OF_TWO_SIGNED( + src_mv->col * (1 << SUBPEL_BITS), MV_REFINE_PREC_BITS + ss_x); + } else { + clamped_mv = *src_mv; + } + } else { + clamped_mv.row = (int16_t)(src_mv->row * (1 << (1 - ss_y))); + clamped_mv.col = (int16_t)(src_mv->col * (1 << (1 - ss_x))); + } +#else + MV clamped_mv = { (int16_t)(src_mv->row * (1 << (1 - ss_y))), + (int16_t)(src_mv->col * (1 << (1 - ss_x))) }; +#endif // CONFIG_OPTFLOW_REFINEMENT + assert(ss_x <= 1); + assert(ss_y <= 1); + const SubpelMvLimits mv_limits = { + inter_pred_params->dist_to_left_edge * (1 << (1 - ss_x)) - spel_left, + inter_pred_params->dist_to_right_edge * (1 << (1 - ss_x)) + spel_right, + inter_pred_params->dist_to_top_edge * (1 << (1 - ss_y)) - spel_top, + inter_pred_params->dist_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom + }; + + clamp_mv(&clamped_mv, &mv_limits); + + return clamped_mv; +} + +// This function conduct the SAD search between two predictors and find the best +// MVs +void apply_mv_refinement(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, + MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y, + uint16_t **mc_buf, + CalcSubpelParamsFunc calc_subpel_params_func, + int pre_x, int pre_y, uint16_t *dst_ref0, + uint16_t *dst_ref1, MV *best_mv_ref, int pu_width, + int pu_height); + +// check if padding is required during motion compensation +// return 1 means reference pixel is outside of the reference range and padding +// is required return 0 means no padding. +int update_extend_mc_border_params(const struct scale_factors *const sf, + struct buf_2d *const pre_buf, MV32 scaled_mv, + PadBlock *block, int subpel_x_mv, + int subpel_y_mv, int do_warp, int is_intrabc, + int *x_pad, int *y_pad, + const ReferenceArea *ref_area); + +// Derive the sub-pixel related parameters of refinemv non-TIP blocks +// Sub-pel related parameters are stored in the structures pointed by +// "subpel_params" Also do padding if required This function is used for both +// encoder and decoder +void common_calc_subpel_params_and_extend( + const MV *const src_mv, InterPredParams *const inter_pred_params, + MACROBLOCKD *const xd, int mi_x, int mi_y, int ref, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params, + int *src_stride); + +// Derive the sub-pixel related parameters of refinemv TIP blocks +// Sub-pel related parameters are stored in the structures pointed by +// "subpel_params" Also do padding if required This function is used for both +// encoder and decoder +void tip_common_calc_subpel_params_and_extend( + const MV *const src_mv, InterPredParams *const inter_pred_params, + MACROBLOCKD *const xd, int mi_x, int mi_y, int ref, +#if CONFIG_OPTFLOW_REFINEMENT + int use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params, + int *src_stride); +#endif // CONFIG_REFINEMV + +#if CONFIG_REFINEMV || CONFIG_OPTFLOW_ON_TIP + +unsigned int get_highbd_sad(const uint16_t *src_ptr, int source_stride, + const uint16_t *ref_ptr, int ref_stride, int bd, + int bw, int bh); +#endif // CONFIG_REFINEMV || CONFIG_OPTFLOW_ON_TIP + #if CONFIG_OPTFLOW_REFINEMENT // This parameter k=OPFL_DIST_RATIO_THR is used to prune MV refinement for the // case where d0 and d1 are very different. Assuming a = max(|d0|, |d1|) and @@ -497,9 +870,11 @@ #define OPFL_COV_CLAMP_BITS 28 #define OPFL_COV_CLAMP_VAL (1 << OPFL_COV_CLAMP_BITS) +#if !CONFIG_REFINEMV // Precision of refined MV returned, 0 being integer pel. For now, only 1/8 or // 1/16-pel can be used. #define MV_REFINE_PREC_BITS 4 // (1/16-pel) +#endif //! CONFIG_REFINEMV void av1_opfl_mv_refinement_highbd(const uint16_t *p0, int pstride0, const uint16_t *p1, int pstride1, const int16_t *gx0, const int16_t *gy0, @@ -512,14 +887,23 @@ const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi, int bw, int bh, int mi_x, int mi_y, uint16_t **mc_buf, InterPredParams *inter_pred_params, - CalcSubpelParamsFunc calc_subpel_params_func, int ref, uint16_t *pred_dst); + CalcSubpelParamsFunc calc_subpel_params_func, int ref, uint16_t *pred_dst +#if CONFIG_REFINEMV + , + const MV *const src_mv, int pu_width, int pu_height +#endif // CONFIG_REFINEMV +); static INLINE int is_opfl_refine_allowed(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi) { if (cm->seq_params.enable_opfl_refine == AOM_OPFL_REFINE_NONE || cm->features.opfl_refine_type == REFINE_NONE) return 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const unsigned int cur_index = cm->cur_frame->display_order_hint; +#else const unsigned int cur_index = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int d0, d1; #if CONFIG_OPTFLOW_ON_TIP if (mbmi->ref_frame[0] == TIP_FRAME) { @@ -530,8 +914,15 @@ if (!mbmi->ref_frame[1]) return 0; const RefCntBuffer *const ref0 = get_ref_frame_buf(cm, mbmi->ref_frame[0]); const RefCntBuffer *const ref1 = get_ref_frame_buf(cm, mbmi->ref_frame[1]); - d0 = (int)cur_index - (int)ref0->order_hint; - d1 = (int)cur_index - (int)ref1->order_hint; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + d0 = get_relative_dist(&cm->seq_params.order_hint_info, cur_index, + ref0->display_order_hint); + d1 = get_relative_dist(&cm->seq_params.order_hint_info, cur_index, + ref1->display_order_hint); +#else + d0 = (int)cur_index - (int)ref0->order_hint; + d1 = (int)cur_index - (int)ref1->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC #if CONFIG_OPTFLOW_ON_TIP } #endif // CONFIG_OPTFLOW_ON_TIP @@ -553,6 +944,10 @@ , int do_pred, int use_4x4 #endif // CONFIG_OPTFLOW_ON_TIP +#if CONFIG_REFINEMV + , + MV *best_mv_ref, int pu_width, int pu_height +#endif // CONFIG_REFINEMV ); // With the refined MVs, generate the inter prediction for the block. @@ -669,6 +1064,30 @@ return clamped_mv; } +#if CONFIG_D071_IMP_MSK_BLD +void make_masked_inter_predictor(const uint16_t *pre, int pre_stride, + uint16_t *dst, int dst_stride, + InterPredParams *inter_pred_params, + const SubpelParams *subpel_params, + int use_bacp, int sub_block_id); + +static INLINE int use_border_aware_compound(const AV1_COMMON *cm, + const MB_MODE_INFO *mbmi) { + if (is_masked_compound_type(mbmi->interinter_comp.type) || + mbmi->mode == GLOBAL_GLOBALMV) + return 0; + + (void)cm; + return has_second_ref(mbmi) && + (mbmi->mode >= COMP_INTER_MODE_START && + mbmi->mode < COMP_INTER_MODE_END) && + (mbmi->interinter_comp.type == COMPOUND_DIFFWTD || + mbmi->interinter_comp.type == COMPOUND_AVERAGE); +} +int is_out_of_frame_block(InterPredParams const *inter_pred_params, + int frame_width, int frame_height, int sub_block_id); +#endif // CONFIG_D071_IMP_MSK_BLD + static INLINE int64_t scaled_buffer_offset(int x_offset, int y_offset, int stride, const struct scale_factors *sf) { @@ -725,7 +1144,11 @@ #endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_OPTFLOW_REFINEMENT mbmi->interp_fltr = - (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)) + (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi) +#if CONFIG_REFINEMV + || mbmi->refinemv_flag +#endif // CONFIG_REFINEMV + ) ? MULTITAP_SHARP : av1_unswitchable_filter(frame_interp_filter); #else @@ -748,6 +1171,12 @@ if (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)) return 0; #endif // CONFIG_OPTFLOW_REFINEMENT + +#if CONFIG_REFINEMV + // No interpolation filter search when MV refinement is used. + if (mbmi->refinemv_flag) return 0; +#endif // CONFIG_REFINEMV + if (is_warp_mode(mbmi->motion_mode)) return 0; if (is_nontrans_global_motion(xd, xd->mi[0])) return 0; return 1; @@ -790,6 +1219,13 @@ const uint8_t *av1_get_compound_type_mask( const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type); +#if CONFIG_CWP +// Init the masks for compound weighted prediction +void init_cwp_masks(); +// Get the mask for compound weighted prediction +const int8_t *av1_get_cwp_mask(int list_idx, int idx); +#endif // CONFIG_CWP + // build interintra_predictors for one plane void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd, uint16_t *pred, int stride, @@ -852,7 +1288,11 @@ // Set the precision set of the block. Currently, the value is 0. void set_precision_set(const AV1_COMMON *const cm, MACROBLOCKD *const xd, MB_MODE_INFO *mbmi, const BLOCK_SIZE bsize, +#if CONFIG_SEP_COMP_DRL + int *ref_mv_idx); +#else uint8_t ref_mv_idx); +#endif // CONFIG_SEP_COMP_DRL // Get the index of the precision // this index is signalled when precision is not same as the most probable // precision @@ -874,7 +1314,6 @@ // check if pb_mv_precision is allowed or not int is_pb_mv_precision_active(const AV1_COMMON *const cm, const MB_MODE_INFO *mbmi, const BLOCK_SIZE bsize); - #endif #if CONFIG_WARPMV @@ -888,17 +1327,31 @@ static INLINE int is_warpmv_mode_allowed(const AV1_COMMON *const cm, const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize) { - if (has_second_ref(mbmi) || !cm->features.enabled_motion_modes + int frame_warp_delta_allowed = + (cm->features.enabled_motion_modes & (1 << WARP_DELTA)) != 0; + + if (has_second_ref(mbmi) || !frame_warp_delta_allowed #if CONFIG_TIP || is_tip_ref_frame(mbmi->ref_frame[0]) #endif // CONFIG_TIP +#if CONFIG_CWG_D067_IMPROVED_WARP + || !cm->features.allow_warpmv_mode +#endif // CONFIG_CWG_D067_IMPROVED_WARP ) return 0; - int frame_warp_delta_allowed = - cm->features.enabled_motion_modes & (1 << WARP_DELTA); return frame_warp_delta_allowed && is_warpmv_allowed_bsize(bsize); } + +#if CONFIG_CWG_D067_IMPROVED_WARP +// check if warpmv with mvd is allowed or not +static INLINE int allow_warpmv_with_mvd_coding(const AV1_COMMON *const cm, + const MB_MODE_INFO *mbmi) { + if (!cm->features.allow_warpmv_mode) return 0; + return (mbmi->mode == WARPMV && mbmi->warp_ref_idx < 2); +} +#endif // CONFIG_CWG_D067_IMPROVED_WARP + #endif // CONFIG_WARPMV #ifdef __cplusplus
diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c index 6f4b4b0..36206f3 100644 --- a/av1/common/reconintra.c +++ b/av1/common/reconintra.c
@@ -56,9 +56,20 @@ #if CONFIG_ORIP | NEED_ABOVELEFT #endif - , // SMOOTH - NEED_LEFT | NEED_ABOVE, // SMOOTH_V - NEED_LEFT | NEED_ABOVE, // SMOOTH_H +#if CONFIG_BLEND_MODE + | NEED_ABOVERIGHT | NEED_BOTTOMLEFT +#endif // CONFIG_BLEND_MODE + , // SMOOTH + NEED_LEFT | NEED_ABOVE +#if CONFIG_BLEND_MODE + | NEED_BOTTOMLEFT +#endif // CONFIG_BLEND_MODE + , // SMOOTH_V + NEED_LEFT | NEED_ABOVE +#if CONFIG_BLEND_MODE + | NEED_ABOVERIGHT +#endif // CONFIG_BLEND_MODE + , // SMOOTH_H NEED_LEFT | NEED_ABOVE | NEED_ABOVELEFT, // PAETH }; @@ -793,6 +804,210 @@ } } +#if CONFIG_IDIF +// Directional prediction, zone 1: 0 < angle < 90 using IDIF +void av1_highbd_dr_prediction_z1_idif_c(uint16_t *dst, ptrdiff_t stride, int bw, + int bh, const uint16_t *above, + const uint16_t *left, int dx, int dy, + int bd, int mrl_index) { + int r, c, x, base, shift, val; + + uint16_t ref[4] = { 0 }; + + (void)left; + (void)dy; + (void)bd; + assert(dy == 1); + assert(dx > 0); + + const int max_base_x = (bw + bh) - 1 + (mrl_index << 1); + const int frac_bits = 6; + const int base_inc = 1; + + x = dx * (1 + mrl_index); + for (r = 0; r < bh; ++r, dst += stride, x += dx) { + base = x >> frac_bits; + shift = (x & 0x3F) >> 1; + + if (base >= max_base_x) { + for (int i = r; i < bh; ++i) { + aom_memset16(dst, above[max_base_x], bw); + dst += stride; + } + return; + } + + for (c = 0; c < bw; ++c, base += base_inc) { + if (base < max_base_x) { + // 4-tap filter + ref[0] = above[base - 1]; + ref[1] = above[base]; + ref[2] = above[base + 1]; + ref[3] = above[base + 2]; + + val = av1_dr_interp_filter[shift][0] * ref[0] + + av1_dr_interp_filter[shift][1] * ref[1] + + av1_dr_interp_filter[shift][2] * ref[2] + + av1_dr_interp_filter[shift][3] * ref[3]; + + dst[c] = clip_pixel_highbd( + ROUND_POWER_OF_TWO(val, POWER_DR_INTERP_FILTER), bd); + } else { + dst[c] = above[max_base_x]; + } + } + } +} + +// Directional prediction, zone 2: 90 < angle < 180 using IDIF +void av1_highbd_dr_prediction_z2_idif_c(uint16_t *dst, ptrdiff_t stride, int bw, + int bh, const uint16_t *above, + const uint16_t *left, int dx, int dy, + int bd, int mrl_index) { + (void)bd; + assert(dx > 0); + assert(dy > 0); + + const int min_base_x = -1 - mrl_index; + const int min_base_y = -1 - mrl_index; + + (void)min_base_y; + const int frac_bits_x = 6; + const int frac_bits_y = 6; + + uint16_t ref[4] = { 0 }; + + for (int r = 0; r < bh; ++r) { + for (int c = 0; c < bw; ++c) { + int val; + int y = r + 1; + int x = (c << 6) - (y + mrl_index) * dx; + const int base_x = x >> frac_bits_x; + if (base_x >= min_base_x) { + const int shift = (x & 0x3F) >> 1; + // 4-tap filter + ref[0] = above[base_x - 1]; + ref[1] = above[base_x]; + ref[2] = above[base_x + 1]; + ref[3] = above[base_x + 2]; + + val = av1_dr_interp_filter[shift][0] * ref[0] + + av1_dr_interp_filter[shift][1] * ref[1] + + av1_dr_interp_filter[shift][2] * ref[2] + + av1_dr_interp_filter[shift][3] * ref[3]; + + val = clip_pixel_highbd(ROUND_POWER_OF_TWO(val, POWER_DR_INTERP_FILTER), + bd); + } else { + x = c + 1; + y = (r << 6) - (x + mrl_index) * dy; + const int base_y = y >> frac_bits_y; + assert(base_y >= min_base_y); + const int shift = (y & 0x3F) >> 1; + // 4-tap filter + ref[0] = left[base_y - 1]; + ref[1] = left[base_y]; + ref[2] = left[base_y + 1]; + ref[3] = left[base_y + 2]; + + val = av1_dr_interp_filter[shift][0] * ref[0] + + av1_dr_interp_filter[shift][1] * ref[1] + + av1_dr_interp_filter[shift][2] * ref[2] + + av1_dr_interp_filter[shift][3] * ref[3]; + + val = clip_pixel_highbd(ROUND_POWER_OF_TWO(val, POWER_DR_INTERP_FILTER), + bd); + } + dst[c] = val; + } + dst += stride; + } +} + +// Directional prediction, zone 3: 180 < angle < 270 using IDIF +void av1_highbd_dr_prediction_z3_idif_c(uint16_t *dst, ptrdiff_t stride, int bw, + int bh, const uint16_t *above, + const uint16_t *left, int dx, int dy, + int bd, int mrl_index) { + int r, c, y, base, shift, val; + + (void)above; + (void)dx; + (void)bd; + assert(dx == 1); + assert(dy > 0); + + uint16_t ref[4] = { 0 }; + + const int max_base_y = (bw + bh) - 1 + (mrl_index << 1); + const int frac_bits = 6; + const int base_inc = 1; + + y = dy * (1 + mrl_index); + for (c = 0; c < bw; ++c, y += dy) { + base = y >> frac_bits; + shift = (y & 0x3F) >> 1; + + for (r = 0; r < bh; ++r, base += base_inc) { + if (base < max_base_y) { + // 4-tap filter + ref[0] = left[base - 1]; + ref[1] = left[base]; + ref[2] = left[base + 1]; + ref[3] = left[base + 2]; + + val = av1_dr_interp_filter[shift][0] * ref[0] + + av1_dr_interp_filter[shift][1] * ref[1] + + av1_dr_interp_filter[shift][2] * ref[2] + + av1_dr_interp_filter[shift][3] * ref[3]; + + dst[r * stride + c] = clip_pixel_highbd( + ROUND_POWER_OF_TWO(val, POWER_DR_INTERP_FILTER), bd); + } else { + for (; r < bh; ++r) dst[r * stride + c] = left[max_base_y]; + break; + } + } + } +} + +static void highbd_dr_predictor_idif(uint16_t *dst, ptrdiff_t stride, + TX_SIZE tx_size, uint16_t *above, + uint16_t *left, int angle, int bd, + int mrl_index) { + const int dx = av1_get_dx(angle); + const int dy = av1_get_dy(angle); + const int bw = tx_size_wide[tx_size]; + const int bh = tx_size_high[tx_size]; + assert(angle > 0 && angle < 270); + + const int min_base = -((1 + mrl_index)); + const int max_base = ((bw + bh) - 1 + (mrl_index << 1)); + + if (angle > 0 && angle < 90) { + above[max_base + 1] = above[max_base]; + av1_highbd_dr_prediction_z1_idif(dst, stride, bw, bh, above, left, dx, dy, + bd, mrl_index); + + } else if (angle > 90 && angle < 180) { + above[min_base - 1] = above[min_base]; + left[min_base - 1] = left[min_base]; + av1_highbd_dr_prediction_z2_idif(dst, stride, bw, bh, above, left, dx, dy, + bd, mrl_index); + + } else if (angle > 180 && angle < 270) { + left[max_base + 1] = left[max_base]; + av1_highbd_dr_prediction_z3_idif(dst, stride, bw, bh, above, left, dx, dy, + bd, mrl_index); + + } else if (angle == 90) { + pred_high[V_PRED][tx_size](dst, stride, above, left, bd); + } else if (angle == 180) { + pred_high[H_PRED][tx_size](dst, stride, above, left, bd); + } +} +#endif // CONFIG_IDIF + static void highbd_dr_predictor(uint16_t *dst, ptrdiff_t stride, TX_SIZE tx_size, const uint16_t *above, const uint16_t *left, int upsample_above, @@ -830,18 +1045,60 @@ const int bh = tx_size_high[tx_size]; if (angle > 0 && angle < 90) { +#if CONFIG_EXT_DIR + int dy = dr_intra_derivative[90 - angle]; +#else int dy = second_dr_intra_derivative[angle]; +#endif // CONFIG_EXT_DIR int dx = 1; av1_highbd_dr_prediction_z3(dst, stride, bw, bh, above, left, upsample_left, dx, dy, bd, 0); } else if (angle > 180 && angle < 270) { +#if CONFIG_EXT_DIR + int dx = dr_intra_derivative[angle - 180]; +#else int dx = second_dr_intra_derivative[270 - angle]; +#endif // CONFIG_EXT_DIR int dy = 1; av1_highbd_dr_prediction_z1(dst, stride, bw, bh, above, left, upsample_above, dx, dy, bd, 0); } } +#if CONFIG_IDIF +// Generate the second directional predictor for IBP +static void highbd_second_dr_predictor_idif(uint16_t *dst, ptrdiff_t stride, + TX_SIZE tx_size, uint16_t *above, + uint16_t *left, int angle, int bd) { + const int bw = tx_size_wide[tx_size]; + const int bh = tx_size_high[tx_size]; + + const int max_base = ((bw + bh) - 1); + + if (angle > 0 && angle < 90) { +#if CONFIG_EXT_DIR + int dy = dr_intra_derivative[90 - angle]; +#else + int dy = second_dr_intra_derivative[angle]; +#endif // CONFIG_EXT_DIR + int dx = 1; + left[max_base + 1] = left[max_base]; + av1_highbd_dr_prediction_z3_idif(dst, stride, bw, bh, above, left, dx, dy, + bd, 0); + } else if (angle > 180 && angle < 270) { +#if CONFIG_EXT_DIR + int dx = dr_intra_derivative[angle - 180]; +#else + int dx = second_dr_intra_derivative[270 - angle]; +#endif // CONFIG_EXT_DIR + int dy = 1; + above[max_base + 1] = above[max_base]; + av1_highbd_dr_prediction_z1_idif(dst, stride, bw, bh, above, left, dx, dy, + bd, 0); + } +} +#endif // CONFIG_IDIF + DECLARE_ALIGNED(16, const int8_t, av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]) = { { @@ -1174,7 +1431,12 @@ #endif , const int seq_ibp_flag, - uint8_t *const ibp_weights[TX_SIZES_ALL][DIR_MODES_0_90]) { + uint8_t *const ibp_weights[TX_SIZES_ALL][DIR_MODES_0_90] +#if CONFIG_IDIF + , + const int enable_idif +#endif // CONFIG_IDIF +) { int i; DECLARE_ALIGNED(16, uint16_t, left_data[NUM_INTRA_NEIGHBOUR_PIXELS]); DECLARE_ALIGNED(16, uint16_t, above_data[NUM_INTRA_NEIGHBOUR_PIXELS]); @@ -1219,6 +1481,11 @@ if (is_dr_mode) { p_angle = mode_to_angle_map[mode] + angle_delta; +#if CONFIG_EXT_DIR + const int mrl_index_to_delta[4] = { 0, 1, -1, 0 }; + p_angle += mrl_index_to_delta[mrl_index]; + assert(p_angle > 0 && p_angle < 270); +#endif // CONFIG_EXT_DIR if (p_angle <= 90) need_above = 1, need_left = 0, need_above_left = 1; else if (p_angle < 180) @@ -1266,9 +1533,16 @@ if (is_dr_mode) need_bottom = seq_ibp_flag ? (p_angle < 90) || (p_angle > 180) : p_angle > 180; - +#if CONFIG_IDIF + int num_left_pixels_needed = + txhpx + (need_bottom ? txwpx : 3) + (mrl_index << 1) + 1; + if (enable_idif && (p_angle > 90 && p_angle < 180)) { + num_left_pixels_needed += 1; + } +#else const int num_left_pixels_needed = txhpx + (need_bottom ? txwpx : 3) + (mrl_index << 1); +#endif // CONFIG_IDIF i = 0; if (n_left_px > 0) { for (; i < n_left_px; i++) left_col[i] = left_ref[i * ref_stride]; @@ -1291,9 +1565,16 @@ if (is_dr_mode) need_right = seq_ibp_flag ? (p_angle < 90) || (p_angle > 180) : p_angle < 90; - +#if CONFIG_IDIF + int num_top_pixels_needed = + txwpx + (need_right ? txhpx : 0) + (mrl_index << 1); + if (enable_idif && (p_angle > 90 && p_angle < 180)) { + num_top_pixels_needed += 1; + } +#else const int num_top_pixels_needed = txwpx + (need_right ? txhpx : 0) + (mrl_index << 1); +#endif // CONFIG_IDIF if (n_top_px > 0) { memcpy(above_row, above_ref, n_top_px * sizeof(above_ref[0])); i = n_top_px; @@ -1377,31 +1658,63 @@ av1_filter_intra_edge_high(left_col - ab_le, n_px, strength); } } - upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, angle_above, - filt_type_above); - if (need_above && upsample_above) { - const int n_px = txwpx + (need_right ? txhpx : 0); - av1_upsample_intra_edge_high(above_row, n_px, xd->bd); +#if CONFIG_IDIF + if (!enable_idif) { +#endif // CONFIG_IDIF + upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, angle_above, + filt_type_above); + if (need_above && upsample_above) { + const int n_px = txwpx + (need_right ? txhpx : 0); + av1_upsample_intra_edge_high(above_row, n_px, xd->bd); + } + upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, angle_left, + filt_type_left); + if (need_left && upsample_left) { + const int n_px = txhpx + (need_bottom ? txwpx : 0); + av1_upsample_intra_edge_high(left_col, n_px, xd->bd); + } +#if CONFIG_IDIF } - upsample_left = - av1_use_intra_edge_upsample(txhpx, txwpx, angle_left, filt_type_left); - if (need_left && upsample_left) { - const int n_px = txhpx + (need_bottom ? txwpx : 0); - av1_upsample_intra_edge_high(left_col, n_px, xd->bd); - } +#endif // CONFIG_IDIF } +#if CONFIG_IDIF + if (enable_idif) { + highbd_dr_predictor_idif(dst, dst_stride, tx_size, above_row, left_col, + p_angle, xd->bd, mrl_index); + } else { + highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col, + upsample_above, upsample_left, p_angle, xd->bd, + mrl_index); + } +#else highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col, upsample_above, upsample_left, p_angle, xd->bd, mrl_index); - +#endif // CONFIG_IDIF if (seq_ibp_flag) { - if (mrl_index == 0) { + if (mrl_index == 0 +#if CONFIG_IMPROVED_ANGULAR_INTRA + && (angle_delta % 2 == 0) +#endif // CONFIG_IMPROVED_ANGULAR_INTRA + ) { if (p_angle > 0 && p_angle < 90) { int mode_index = angle_to_mode_index[p_angle]; uint8_t *weights = ibp_weights[tx_size][mode_index]; +#if CONFIG_IDIF + if (enable_idif) { + highbd_second_dr_predictor_idif(second_pred, txwpx, tx_size, + above_row, left_col, p_angle, + xd->bd); + } else { + highbd_second_dr_predictor(second_pred, txwpx, tx_size, above_row, + left_col, upsample_above, upsample_left, + p_angle, xd->bd); + } +#else highbd_second_dr_predictor(second_pred, txwpx, tx_size, above_row, left_col, upsample_above, upsample_left, p_angle, xd->bd); +#endif // CONFIG_IDIF av1_highbd_ibp_dr_prediction_z1_c(weights, dst, dst_stride, second_pred, txwpx, txwpx, txhpx); } @@ -1409,9 +1722,21 @@ int mode_index = angle_to_mode_index[270 - p_angle]; int transpose_tsize = transpose_tx_size[tx_size]; uint8_t *weights = ibp_weights[transpose_tsize][mode_index]; +#if CONFIG_IDIF + if (enable_idif) { + highbd_second_dr_predictor_idif(second_pred, txwpx, tx_size, + above_row, left_col, p_angle, + xd->bd); + } else { + highbd_second_dr_predictor(second_pred, txwpx, tx_size, above_row, + left_col, upsample_above, upsample_left, + p_angle, xd->bd); + } +#else highbd_second_dr_predictor(second_pred, txwpx, tx_size, above_row, left_col, upsample_above, upsample_left, p_angle, xd->bd); +#endif // CONFIG_IDIF av1_highbd_ibp_dr_prediction_z3_c(weights, dst, dst_stride, second_pred, txwpx, txwpx, txhpx); } @@ -1551,6 +1876,9 @@ row_off, col_off, ss_x, ss_y, yd, &px_bottom_left, bsize != init_bsize); const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter; +#if CONFIG_IDIF + const int enable_idif = cm->seq_params.enable_idif; +#endif // CONFIG_IDIF const int is_sb_boundary = (mi_row % cm->mib_size == 0 && row_off == 0) ? 1 : 0; @@ -1571,7 +1899,12 @@ cm->seq_params.enable_orip #endif , - cm->seq_params.enable_ibp, cm->ibp_directional_weights); + cm->seq_params.enable_ibp, cm->ibp_directional_weights +#if CONFIG_IDIF + , + enable_idif +#endif // CONFIG_IDIF + ); return; } @@ -1636,11 +1969,7 @@ av1_get_max_uv_txsize(mbmi->sb_type[PLANE_TYPE_UV], 0, 0); #if CONFIG_ADAPTIVE_DS_FILTER cfl_store_tx(xd, blk_row, blk_col, luma_tx_size, -#if DS_FRAME_LEVEL - cm->features.ds_filter_type); -#else cm->seq_params.enable_cfl_ds_filter); -#endif // DS_FRAME_LEVEL #else cfl_store_tx(xd, blk_row, blk_col, luma_tx_size); #endif // CONFIG_ADAPTIVE_DS_FILTER
diff --git a/av1/common/reconintra.h b/av1/common/reconintra.h index efd6533..0df6103 100644 --- a/av1/common/reconintra.h +++ b/av1/common/reconintra.h
@@ -14,6 +14,7 @@ #define AOM_AV1_COMMON_RECONINTRA_H_ #include <stdlib.h> +#include <math.h> #include "aom/aom_integer.h" #include "av1/common/av1_common_int.h" @@ -124,7 +125,9 @@ PLANE_TYPE plane_type, TX_TYPE tx_type, int is_inter) { bool allow_fsc = cm->seq_params.enable_fsc && +#if !CONFIG_ATC_DCTX_ALIGNED cm->features.allow_screen_content_tools && +#endif // !CONFIG_ATC_DCTX_ALIGNED plane_type == PLANE_TYPE_Y && is_inter && tx_type == IDTX; return allow_fsc; } @@ -213,6 +216,44 @@ extern const int8_t av1_filter_intra_taps[FILTER_INTRA_MODES][8][8]; +#if CONFIG_EXT_DIR +// moved to av1_common_int.h +#elif CONFIG_IMPROVED_ANGULAR_INTRA +static const int16_t dr_intra_derivative[90] = { + // Angles are dense around vertical and horizontal directions, and coarse + // close to + // diagonal directions. + // Approx angle + 0, 0, 0, // + 2048, 0, 0, // 3, ... + 1024, 0, 0, // 6, ... + 512, 0, 0, 0, 0, // 9, ... + 340, 0, 0, // 14, ... + 256, 0, 0, // 17, ... + 204, 0, 0, // 20, ... + 170, 0, 0, // 23, ... (113 & 203 are base angles) + 146, 0, 0, // 26, ... + 128, 0, 0, // 29, ... + 106, 0, 0, 0, // 32, ... + 92, 0, 0, // 36, ... + 82, 0, 0, // 39, ... + 72, 0, 0, // 42, ... + 64, 0, 0, // 45, ... (45 & 135 are base angles) + 56, 0, 0, // 48, ... + 50, 0, 0, // 51, ... + 44, 0, 0, 0, // 54, ... + 38, 0, 0, // 58, ... + 32, 0, 0, // 61, ... + 28, 0, 0, // 64, ... + 24, 0, 0, // 67, ... (67 & 157 are base angles) + 20, 0, 0, // 70, ... + 16, 0, 0, // 73, ... + 12, 0, 0, 0, 0, // 76, ... + 8, 0, 0, // 81, ... + 4, 0, 0, // 84, ... + 2, 0, 0, // 87, ... +}; +#else static const int16_t dr_intra_derivative[90] = { // More evenly spread out angles and limited to 10-bit // Values that are 0 will never be used @@ -246,6 +287,7 @@ 7, 0, 0, // 84, ... 3, 0, 0, // 87, ... }; +#endif // CONFIG_EXT_DIR // Get the shift (up-scaled by 256) in X w.r.t a unit change in Y. // If angle > 0 && angle < 90, dx = -((int)(256 / t)); @@ -301,6 +343,24 @@ } #endif // CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_IDIF +#define POWER_DR_INTERP_FILTER 7 + +DECLARE_ALIGNED(16, static const int16_t, av1_dr_interp_filter[32][4]) = { + { 0, 128, 0, 0 }, { -2, 127, 4, -1 }, { -3, 125, 8, -2 }, + { -5, 123, 13, -3 }, { -6, 121, 17, -4 }, { -7, 118, 22, -5 }, + { -9, 116, 27, -6 }, { -9, 112, 32, -7 }, { -10, 109, 37, -8 }, + { -11, 106, 41, -8 }, { -11, 102, 46, -9 }, { -12, 98, 52, -10 }, + { -12, 94, 56, -10 }, { -12, 90, 61, -11 }, { -12, 85, 66, -11 }, + { -12, 81, 71, -12 }, { -12, 76, 76, -12 }, { -12, 71, 81, -12 }, + { -11, 66, 85, -12 }, { -11, 61, 90, -12 }, { -10, 56, 94, -12 }, + { -10, 52, 98, -12 }, { -9, 46, 102, -11 }, { -8, 41, 106, -11 }, + { -8, 37, 109, -10 }, { -7, 32, 112, -9 }, { -6, 27, 116, -9 }, + { -5, 22, 118, -7 }, { -4, 17, 121, -6 }, { -3, 13, 123, -5 }, + { -2, 8, 125, -3 }, { -1, 4, 127, -2 } +}; +#endif // CONFIG_IDIF + #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/common/restoration.c b/av1/common/restoration.c index 219b507..c307424 100644 --- a/av1/common/restoration.c +++ b/av1/common/restoration.c
@@ -91,6 +91,15 @@ #endif // CONFIG_WIENER_NONSEP_CROSS_FILT }; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +// Filter configuration of cross component weiner filter +const int wienerns_config_uv_from_y_cross[][3] = { + { 1, 0, 0 }, { -1, 0, 0 }, { 0, 1, 1 }, { 0, -1, 1 }, + { 1, 1, 2 }, { -1, -1, 2 }, { -1, 1, 3 }, { 1, -1, 3 }, + { 2, 0, 4 }, { -2, 0, 4 }, { 0, 2, 5 }, { 0, -2, 5 }, +}; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + #define WIENERNS_PREC_BITS_Y 7 const int wienerns_coeff_y[][WIENERNS_COEFCFG_LEN] = { #if ENABLE_LR_4PART_CODE @@ -163,14 +172,43 @@ #endif // ENABLE_LR_4PART_CODE }; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +const int wienerns_coeff_uv_from_y[][WIENERNS_COEFCFG_LEN] = { +#if ENABLE_LR_4PART_CODE + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 5, -12, 0), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 5, -12, 0), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -7, 1), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -7, 1), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -8, 1), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -8, 1), +#else + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 5, -12, 3), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 5, -12, 3), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -7, 3), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -7, 3), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -8, 3), + AOM_WIENERNS_COEFF(WIENERNS_PREC_BITS_UV, 4, -8, 3), +#endif // ENABLE_LR_4PART_CODE +}; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + const WienernsFilterParameters wienerns_filter_y = AOM_MAKE_WIENERNS_CONFIG( WIENERNS_PREC_BITS_Y, wienerns_config_y, wienerns_coeff_y); const WienernsFilterParameters wienerns_filter_uv = AOM_MAKE_WIENERNS_CONFIG2(WIENERNS_PREC_BITS_UV, wienerns_config_uv_from_uv, wienerns_config_uv_from_y, wienerns_coeff_uv); - +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +const WienernsFilterParameters wienerns_cross_filter_uv = + AOM_MAKE_WIENERNS_CONFIG(WIENERNS_PREC_BITS_UV, + wienerns_config_uv_from_y_cross, + wienerns_coeff_uv_from_y); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER const WienernsFilterPairParameters wienerns_filters_midqp = { &wienerns_filter_y, &wienerns_filter_uv +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + &wienerns_cross_filter_uv +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER }; // Configs for the first set of filters for the case without subtract center. @@ -263,6 +301,10 @@ const WienernsFilterPairParameters wienerns_filters_highqp = { &wienerns_filter_y2, &wienerns_filter_uv +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + &wienerns_cross_filter_uv +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER }; /////////////////////////////////////////////////////////////////////////// @@ -343,6 +385,10 @@ const WienernsFilterPairParameters wienerns_filters_lowqp = { &wienerns_filter_y3, &wienerns_filter_uv +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + &wienerns_cross_filter_uv +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER }; #endif // CONFIG_WIENER_NONSEP @@ -496,6 +542,36 @@ #endif } +#if CONFIG_FLEXIBLE_RU_SIZE +// set up the Minimum and maximum RU size for enacoder search +// As normative regulation: +// minimum RU size is equal to RESTORATION_UNITSIZE_MAX >> 2, +// maximum RU size is equal to RESTORATION_UNITSIZE_MAX +// The setting here is also for encoder search. +void set_restoration_unit_size(int width, int height, int sx, int sy, + RestorationInfo *rst) { + int s = AOMMIN(sx, sy); + + rst[0].max_restoration_unit_size = RESTORATION_UNITSIZE_MAX >> 0; + rst[0].min_restoration_unit_size = RESTORATION_UNITSIZE_MAX >> 2; + + // For large resolution, the minimum RU size is set to + // RESTORATION_UNITSIZE_MAX >> 1 to reduce the encode complexity. + if (width * height > 1920 * 1080 * 2) + rst[0].min_restoration_unit_size = RESTORATION_UNITSIZE_MAX >> 1; + + rst[1].max_restoration_unit_size = rst[0].max_restoration_unit_size >> s; + rst[1].min_restoration_unit_size = rst[0].min_restoration_unit_size >> s; + + rst[2].max_restoration_unit_size = rst[1].max_restoration_unit_size; + rst[2].min_restoration_unit_size = rst[1].min_restoration_unit_size; + + rst[0].restoration_unit_size = rst[0].min_restoration_unit_size; + rst[1].restoration_unit_size = rst[1].min_restoration_unit_size; + rst[2].restoration_unit_size = rst[2].min_restoration_unit_size; +} +#endif // CONFIG_FLEXIBLE_RU_SIZE + static void extend_frame_highbd(uint16_t *data, int width, int height, int stride, int border_horz, int border_vert) { uint16_t *data_p; @@ -527,8 +603,12 @@ extend_frame_highbd(data, width, height, stride, border_horz, border_vert); } +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +void copy_tile(int width, int height, const uint16_t *src, +#else static void copy_tile(int width, int height, const uint16_t *src, - int src_stride, uint16_t *dst, int dst_stride) { +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + int src_stride, uint16_t *dst, int dst_stride) { copy_tile_highbd(width, height, src, src_stride, dst, dst_stride); } @@ -1822,6 +1902,7 @@ #endif // CONFIG_WIENER_NONSEP_CROSS_FILT const int block_size = 4; + for (int r = 0; r < height; r += block_size) { const int h = AOMMIN(block_size, height - r); const uint16_t *dgd_row = dgd + r * stride; @@ -1852,7 +1933,11 @@ int is_uv = rui->plane != AOM_PLANE_Y; const NonsepFilterConfig *orig_config = +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + get_wienerns_config(rui->base_qindex, is_uv, 0); +#else get_wienerns_config(rui->base_qindex, is_uv); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #if ADD_CENTER_TAP_TO_WIENERNS NonsepFilterConfig adjusted_config; WienerNonsepInfo adjusted_info; @@ -1890,12 +1975,111 @@ } } +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +// Convolving process of cross-component wiener filter for a 4x4 unit +void av1_convolve_nonsep_cross_highbd_c(const uint16_t *dgd, int width, + int height, int stride, + const uint16_t *dgd2, int stride2, + const NonsepFilterConfig *config, + const int16_t *filter, uint16_t *dst, + int dst_stride, int bit_depth) { + (void)dgd; + (void)stride; + for (int i = 0; i < height; ++i) { + for (int j = 0; j < width; ++j) { + const int dgd2_id = i * stride2 + j; + const int dst_id = i * dst_stride + j; + int32_t tmp = (int32_t)dst[dst_id] * (1 << config->prec_bits); + + for (int k = 0; k < config->num_pixels; ++k) { + const int pos = config->config[k][NONSEP_BUF_POS]; + const int r = config->config[k][NONSEP_ROW_ID]; + const int c = config->config[k][NONSEP_COL_ID]; + const int ir = config->strict_bounds + ? AOMMAX(AOMMIN(i + r, height - 1), 0) + : i + r; + const int jc = + config->strict_bounds ? AOMMAX(AOMMIN(j + c, width - 1), 0) : j + c; + int16_t diff = + clip_base(dgd2[(ir)*stride2 + (jc)] - dgd2[dgd2_id], bit_depth); + diff = k % 2 ? -diff : diff; + tmp += filter[pos] * diff; + } + tmp = ROUND_POWER_OF_TWO_SIGNED(tmp, config->prec_bits); + dst[dst_id] = clip_pixel_highbd(tmp, bit_depth); + } + } +} + +// Cross-component wiener filtering for a process unit +void apply_cross_wienerns_class_id_highbd( + const uint16_t *dgd, int width, int height, int stride, + const WienerNonsepInfo *wienerns_info, + const NonsepFilterConfig *nsfilter_config, uint16_t *dst, int dst_stride, + int plane, const uint16_t *luma, int luma_stride, int bit_depth) { + assert(plane != AOM_PLANE_Y); + (void)plane; + assert(nsfilter_config->num_pixels2 == 0); + assert(wienerns_info->num_classes == 1); + + const int16_t *filter = const_nsfilter_taps(wienerns_info, 0); + + const int block_size = 4; + for (int r = 0; r < height; r += block_size) { + const int h = AOMMIN(block_size, height - r); + const uint16_t *dgd_row = dgd + r * stride; + const uint16_t *luma_row = luma + r * luma_stride; + uint16_t *dst_row = dst + r * dst_stride; + + for (int c = 0; c < width; c += block_size) { + const int w = AOMMIN(block_size, width - c); + av1_convolve_nonsep_cross_highbd_c( + dgd_row + c, w, h, stride, luma_row + c, luma_stride, nsfilter_config, + filter, dst_row + c, dst_stride, bit_depth); + } + } +} + +// Cross-component wiener filtering for a stripe of process units +static void wiener_ns_cross_filter_stripe_highbd( + const RestorationUnitInfo *rui, int stripe_width, int stripe_height, + int procunit_width, const uint16_t *src, int src_stride, uint16_t *dst, + int dst_stride, int32_t *tmpbuf, int bit_depth) { + (void)tmpbuf; + (void)bit_depth; + + assert(rui->wienerns_cross_info.num_classes == 1); + + int is_uv = rui->plane != AOM_PLANE_Y; + + assert(is_uv); + const NonsepFilterConfig *orig_config = + get_wienerns_config(rui->base_qindex, is_uv, 1); + + const NonsepFilterConfig *nsfilter_config = orig_config; + const WienerNonsepInfo *nsfilter_info = &rui->wienerns_cross_info; + + for (int j = 0; j < stripe_width; j += procunit_width) { + int w = AOMMIN(procunit_width, stripe_width - j); + apply_cross_wienerns_class_id_highbd( + src + j, w, stripe_height, src_stride, nsfilter_info, nsfilter_config, + dst + j, dst_stride, rui->plane, rui->luma + j, rui->luma_stride, + bit_depth); + } +} +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + #if CONFIG_WIENER_NONSEP_CROSS_FILT uint16_t *wienerns_copy_luma_highbd(const uint16_t *dgd, int height_y, int width_y, int in_stride, uint16_t **luma_hbd, int height_uv, int width_uv, int border, int out_stride, - int bd) { + int bd +#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2 + , + int ds_type +#endif +) { (void)bd; uint16_t *aug_luma = (uint16_t *)malloc( sizeof(uint16_t) * (width_uv + 2 * border) * (height_uv + 2 * border)); @@ -1973,6 +2157,25 @@ } else { assert(0 && "Invalid dimensions"); } +#elif WIENERNS_CROSS_FILT_LUMA_TYPE == 2 + const int ss_x = (((width_y + 1) >> 1) == width_uv); + const int ss_y = (((height_y + 1) >> 1) == height_uv); + if (ss_x && ss_y && ds_type == 1) { + for (int r = 0; r < height_uv; ++r) { + for (int c = 0; c < width_uv; ++c) { + (*luma)[r * out_stride + c] = (dgd[2 * r * in_stride + 2 * c] + + dgd[(2 * r + 1) * in_stride + 2 * c]) / + 2; + } + } + } else { + for (int r = 0; r < height_uv; ++r) { + for (int c = 0; c < width_uv; ++c) { + (*luma)[r * out_stride + c] = + dgd[(1 + ss_y) * r * in_stride + (1 + ss_x) * c]; + } + } + } #else av1_highbd_resize_plane(dgd, height_y, width_y, in_stride, *luma, height_uv, width_uv, out_stride, bd); @@ -2054,16 +2257,18 @@ #elif CONFIG_PC_WIENER #define NUM_STRIPE_FILTERS 3 -static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = { - wiener_filter_stripe_highbd, - sgrproj_filter_stripe_highbd, - pc_wiener_stripe_highbd, -}; + static const stripe_filter_fun + stripe_filters[NUM_STRIPE_FILTERS] = { + wiener_filter_stripe_highbd, + sgrproj_filter_stripe_highbd, + pc_wiener_stripe_highbd, + }; #else #define NUM_STRIPE_FILTERS 2 -static const stripe_filter_fun stripe_filters[NUM_STRIPE_FILTERS] = { - wiener_filter_stripe_highbd, sgrproj_filter_stripe_highbd -}; + static const stripe_filter_fun + stripe_filters[NUM_STRIPE_FILTERS] = { + wiener_filter_stripe_highbd, sgrproj_filter_stripe_highbd + }; #endif // CONFIG_WIENER_NONSEP && CONFIG_PC_WIENER // Filter one restoration unit @@ -2104,6 +2309,7 @@ const uint16_t *luma_in_ru = NULL; const int enable_cross_buffers = unit_rtype == RESTORE_WIENER_NONSEP && rui->plane != AOM_PLANE_Y; + if (enable_cross_buffers) luma_in_ru = rui->luma + limits->v_start * rui->luma_stride + limits->h_start; @@ -2195,6 +2401,86 @@ #endif // CONFIG_PC_WIENER } +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +// Cross-component fFiltering for one restoration unit +void av1_wiener_ns_cross_filter_unit( + const RestorationTileLimits *limits, const RestorationUnitInfo *rui, + const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs, + const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y, + int bit_depth, uint16_t *data, int stride, uint16_t *dst, int dst_stride, + int32_t *tmpbuf, int optimized_lr) { + (void)rsb; + (void)rlbs; + (void)optimized_lr; + (void)tile_stripe0; + + RestorationType unit_cross_rtype = rui->cross_restoration_type; + + const int unit_h = limits->v_end - limits->v_start; + const int unit_w = limits->h_end - limits->h_start; + uint16_t *data_tl = data + limits->v_start * stride + limits->h_start; + uint16_t *dst_tl = dst + limits->v_start * dst_stride + limits->h_start; + + if (unit_cross_rtype == RESTORE_NONE) { + return; + } + + assert(unit_cross_rtype == RESTORE_WIENER_NONSEP); + + const stripe_filter_fun stripe_filter = wiener_ns_cross_filter_stripe_highbd; + + const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x; + + // rui is a pointer to a const but we modify its contents when calling + // stripe_filter(). Use a temporary for now and refactor the datastructure + // later. + RestorationUnitInfo rui_contents = *rui; + RestorationUnitInfo *tmp_rui = &rui_contents; + + const uint16_t *luma_in_plane = rui->luma; + const uint16_t *luma_in_ru = + luma_in_plane + limits->v_start * rui->luma_stride + limits->h_start; + + // Convolve the whole tile one stripe at a time + RestorationTileLimits remaining_stripes = *limits; + int i = 0; + while (i < unit_h) { + int copy_above, copy_below; + remaining_stripes.v_start = limits->v_start + i; + + get_stripe_boundary_info(&remaining_stripes, tile_rect, ss_y, ©_above, + ©_below); + + const int full_stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y; + const int runit_offset = RESTORATION_UNIT_OFFSET >> ss_y; + + // Work out where this stripe's boundaries are within + // rsb->stripe_boundary_{above,below} + const int tile_stripe = + (remaining_stripes.v_start - tile_rect->top + runit_offset) / + full_stripe_height; + // const int frame_stripe = tile_stripe0 + tile_stripe; + // const int rsb_row = RESTORATION_CTX_VERT * frame_stripe; + + // Calculate this stripe's height, based on two rules: + // * The topmost stripe in each tile is 8 luma pixels shorter than usual. + // * We can't extend past the end of the current restoration unit + const int nominal_stripe_height = + full_stripe_height - ((tile_stripe == 0) ? runit_offset : 0); + const int h = AOMMIN(nominal_stripe_height, + remaining_stripes.v_end - remaining_stripes.v_start); + + tmp_rui->luma = luma_in_ru + i * rui->luma_stride; + + stripe_filter(tmp_rui, unit_w, h, procunit_width, data_tl + i * stride, + stride, dst_tl + i * dst_stride, dst_stride, tmpbuf, + bit_depth); + + i += h; + } +} +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + static void filter_frame_on_unit(const RestorationTileLimits *limits, const AV1PixelRect *tile_rect, int rest_unit_idx, int rest_unit_idx_seq, @@ -2227,6 +2513,15 @@ ctxt->tile_stripe0, ctxt->ss_x, ctxt->ss_y, ctxt->bit_depth, ctxt->data8, ctxt->data_stride, ctxt->dst8, ctxt->dst_stride, tmpbuf, rsi->optimized_lr); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + const int is_uv = (ctxt->plane != AOM_PLANE_Y); + if (is_uv) + av1_wiener_ns_cross_filter_unit( + limits, &rsi->unit_info[rest_unit_idx], &rsi->boundaries, rlbs, + tile_rect, ctxt->tile_stripe0, ctxt->ss_x, ctxt->ss_y, ctxt->bit_depth, + ctxt->data8, ctxt->data_stride, ctxt->dst8, ctxt->dst_stride, tmpbuf, + rsi->optimized_lr); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt, @@ -2251,9 +2546,16 @@ for (int plane = 0; plane < num_planes; ++plane) { RestorationInfo *rsi = &cm->rst_info[plane]; RestorationType rtype = rsi->frame_restoration_type; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + RestorationType cross_rtype = rsi->frame_cross_restoration_type; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER rsi->optimized_lr = optimized_lr; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (rtype == RESTORE_NONE && cross_rtype == RESTORE_NONE) { +#else if (rtype == RESTORE_NONE) { +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER continue; } @@ -2289,7 +2591,13 @@ aom_yv12_partial_coloc_copy_v }; assert(num_planes <= 3); for (int plane = 0; plane < num_planes; ++plane) { - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; + if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + && cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ) + continue; + AV1PixelRect tile_rect = loop_rest_ctxt->ctxt[plane].tile_rect; copy_funs[plane](loop_rest_ctxt->dst, loop_rest_ctxt->frame, tile_rect.left, tile_rect.right, tile_rect.top, tile_rect.bottom); @@ -2308,14 +2616,22 @@ dgd->buffers[AOM_PLANE_Y], dgd->crop_heights[AOM_PLANE_Y], dgd->crop_widths[AOM_PLANE_Y], dgd->strides[AOM_PLANE_Y], &luma, dgd->crop_heights[1], dgd->crop_widths[1], WIENERNS_UV_BRD, luma_stride, - cm->seq_params.bit_depth); + cm->seq_params.bit_depth +#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2 + , + cm->seq_params.enable_cfl_ds_filter == 1 +#endif + ); assert(luma_buf != NULL); #endif // CONFIG_WIENER_NONSEP_CROSS_FILT for (int plane = 0; plane < num_planes; ++plane) { - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) { + if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + && cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ) continue; - } #if CONFIG_WIENER_NONSEP || CONFIG_PC_WIENER ctxt[plane].plane = plane;
diff --git a/av1/common/restoration.h b/av1/common/restoration.h index 8091597..f66987d 100644 --- a/av1/common/restoration.h +++ b/av1/common/restoration.h
@@ -82,7 +82,11 @@ (RESTORATION_PROC_UNIT_SIZE + RESTORATION_BORDER_VERT * 2 + \ RESTORATION_PADDING)) +#if CONFIG_FLEXIBLE_RU_SIZE +#define RESTORATION_UNITSIZE_MAX 512 +#else #define RESTORATION_UNITSIZE_MAX 256 +#endif // CONFIG_FLEXIBLE_RU_SIZE #define RESTORATION_UNITPELS_HORZ_MAX \ (RESTORATION_UNITSIZE_MAX * 3 / 2 + 2 * RESTORATION_BORDER_HORZ + 16) #define RESTORATION_UNITPELS_VERT_MAX \ @@ -203,6 +207,9 @@ typedef struct { const WienernsFilterParameters *y; const WienernsFilterParameters *uv; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + const WienernsFilterParameters *uv_cross; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } WienernsFilterPairParameters; extern const WienernsFilterPairParameters wienerns_filters_lowqp; @@ -212,17 +219,33 @@ #define USE_CENTER_WIENER_NONSEP 0 static INLINE const WienernsFilterParameters *get_wienerns_parameters( +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + int qindex, int is_uv, int is_cross) { +#else int qindex, int is_uv) { +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER const WienernsFilterPairParameters *pair_nsfilter_params = NULL; (void)qindex; pair_nsfilter_params = &wienerns_filters_midqp; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (is_cross) return pair_nsfilter_params->uv_cross; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER return is_uv ? pair_nsfilter_params->uv : pair_nsfilter_params->y; } static INLINE const NonsepFilterConfig *get_wienerns_config(int qindex, - int is_uv) { + int is_uv +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + int is_cross +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +) { const WienernsFilterParameters *base_nsfilter_params = +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + get_wienerns_parameters(qindex, is_uv, is_cross); +#else get_wienerns_parameters(qindex, is_uv); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER return &base_nsfilter_params->nsfilter_config; } #endif // CONFIG_WIENER_NONSEP @@ -258,6 +281,11 @@ */ RestorationType restoration_type; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + /*! Cross restoration type*/ + RestorationType cross_restoration_type; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + /*! * Wiener filter parameters if restoration_type indicates Wiener */ @@ -324,6 +352,17 @@ */ PcwienerBuffers *pcwiener_buffers; #endif // CONFIG_PC_WIENER +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + /*! + * Nonseparable Wiener cross filter information. + */ + WienerNonsepInfo wienerns_cross_info; + + /*! + * wienerns cross filter idx of the current RU + */ + int wienerns_cross_filter_idx; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } RestorationUnitInfo; /*!\cond */ @@ -382,6 +421,16 @@ */ int restoration_unit_size; +#if CONFIG_FLEXIBLE_RU_SIZE + /*! + * Maximum restoration unit size + */ + int max_restoration_unit_size; + /*! + * Minimum restoration unit size + */ + int min_restoration_unit_size; +#endif // CONFIG_FLEXIBLE_RU_SIZE /** * \name Fields allocated and initialised by av1_alloc_restoration_struct. * (horz_)units_per_tile give the number of restoration units in @@ -413,6 +462,11 @@ */ RestorationUnitInfo *unit_info; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + /*! Cross restoration type for frame*/ + RestorationType frame_cross_restoration_type; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + /*! * Restoration Stripe boundary info */ @@ -446,24 +500,33 @@ sgrproj_info->xqd[1] = (SGRPROJ_PRJ_MIN1 + SGRPROJ_PRJ_MAX1) / 2; } -static INLINE void set_default_wiener(WienerInfo *wiener_info) { - wiener_info->vfilter[0] = wiener_info->hfilter[0] = WIENER_FILT_TAP0_MIDV; +static INLINE void set_default_wiener(WienerInfo *wiener_info, int chroma) { + const int wiener_filt_tap0_midv = chroma ? 0 : WIENER_FILT_TAP0_MIDV; + wiener_info->vfilter[0] = wiener_info->hfilter[0] = wiener_filt_tap0_midv; wiener_info->vfilter[1] = wiener_info->hfilter[1] = WIENER_FILT_TAP1_MIDV; wiener_info->vfilter[2] = wiener_info->hfilter[2] = WIENER_FILT_TAP2_MIDV; wiener_info->vfilter[WIENER_HALFWIN] = wiener_info->hfilter[WIENER_HALFWIN] = -2 * - (WIENER_FILT_TAP2_MIDV + WIENER_FILT_TAP1_MIDV + WIENER_FILT_TAP0_MIDV); + (WIENER_FILT_TAP2_MIDV + WIENER_FILT_TAP1_MIDV + wiener_filt_tap0_midv); wiener_info->vfilter[4] = wiener_info->hfilter[4] = WIENER_FILT_TAP2_MIDV; wiener_info->vfilter[5] = wiener_info->hfilter[5] = WIENER_FILT_TAP1_MIDV; - wiener_info->vfilter[6] = wiener_info->hfilter[6] = WIENER_FILT_TAP0_MIDV; + wiener_info->vfilter[6] = wiener_info->hfilter[6] = wiener_filt_tap0_midv; } #if CONFIG_WIENER_NONSEP static INLINE void set_default_wienerns(WienerNonsepInfo *wienerns_info, - int qindex, int num_classes, - int chroma) { + int qindex, int num_classes, int chroma +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + int is_cross +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +) { const WienernsFilterParameters *nsfilter_params = +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + get_wienerns_parameters(qindex, chroma, is_cross); +#else get_wienerns_parameters(qindex, chroma); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER wienerns_info->num_classes = num_classes; for (int c_id = 0; c_id < wienerns_info->num_classes; ++c_id) { #if CONFIG_LR_MERGE_COEFFS @@ -482,13 +545,24 @@ // 0: Skip luma pixels to scale down to chroma (simplest) // 1: Average 4 or 2 luma pixels to scale down to chroma -// 2: Use 8-tap downsampling filter +// 2: Average 2 (top and down) luma pixels to scale down to chroma for 420, +// could be based on the luma downsampling type from CFL tool 3: Use 8-tap +// downsampling filter +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +#define WIENERNS_CROSS_FILT_LUMA_TYPE 2 +#else #define WIENERNS_CROSS_FILT_LUMA_TYPE 0 +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER uint16_t *wienerns_copy_luma_highbd(const uint16_t *dgd, int height_y, int width_y, int in_stride, uint16_t **luma, int height_uv, int width_uv, int border, - int out_stride, int bd); + int out_stride, int bd +#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2 + , + int ds_type +#endif +); #endif // CONFIG_WIENER_NONSEP_CROSS_FILT #endif // CONFIG_WIENER_NONSEP @@ -533,6 +607,9 @@ FilterFrameCtxt ctxt[MAX_MB_PLANE]; YV12_BUFFER_CONFIG *frame; YV12_BUFFER_CONFIG *dst; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + YV12_BUFFER_CONFIG *pre_filter_frame; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } AV1LrStruct; extern const sgr_params_type av1_sgr_params[SGRPROJ_PARAMS]; @@ -587,6 +664,18 @@ int bit_depth, uint16_t *data, int stride, uint16_t *dst, int dst_stride, int32_t *tmpbuf, int optimized_lr); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +/*!\brief Function for applying cross wienerns filter to a single unit. + * The inputs are same as those of av1_loop_restoration_filter_unit + */ +void av1_wiener_ns_cross_filter_unit( + const RestorationTileLimits *limits, const RestorationUnitInfo *rui, + const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs, + const AV1PixelRect *tile_rect, int tile_stripe0, int ss_x, int ss_y, + int bit_depth, uint16_t *data, int stride, uint16_t *dst, int dst_stride, + int32_t *tmpbuf, int optimized_lr); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + /*!\brief Function for applying loop restoration filter to a frame * * \ingroup in_loop_restoration @@ -687,7 +776,15 @@ void av1_lr_sync_read_dummy(void *const lr_sync, int r, int c, int plane); void av1_lr_sync_write_dummy(void *const lr_sync, int r, int c, const int sb_cols, int plane); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +void copy_tile(int width, int height, const uint16_t *src, int src_stride, + uint16_t *dst, int dst_stride); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +#if CONFIG_FLEXIBLE_RU_SIZE +void set_restoration_unit_size(int width, int height, int sx, int sy, + RestorationInfo *rst); +#endif // CONFIG_FLEXIBLE_RU_SIZE /*!\endcond */ #ifdef __cplusplus
diff --git a/av1/common/scan.c b/av1/common/scan.c index af2d30e..96bae5e 100644 --- a/av1/common/scan.c +++ b/av1/common/scan.c
@@ -15,7 +15,7 @@ #include "av1/common/common_data.h" #include "av1/common/scan.h" -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = { 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15, }; @@ -23,7 +23,7 @@ DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x4[16]) = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, @@ -33,7 +33,7 @@ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_4x8[32]) = { 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 16, 13, 10, 7, 20, 17, 14, 11, 24, 21, 18, 15, 28, 25, 22, 19, 29, 26, 23, 30, 27, 31, @@ -43,7 +43,7 @@ 0, 1, 4, 2, 5, 8, 3, 6, 9, 12, 7, 10, 13, 16, 11, 14, 17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 27, 30, 31, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, mcol_scan_4x8[32]) = { 0, 4, 8, 12, 16, 20, 24, 28, 1, 5, 9, 13, 17, 21, 25, 29, @@ -70,7 +70,7 @@ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_4x16[64]) = { 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 16, 13, 10, 7, 20, 17, 14, 11, 24, 21, 18, 15, 28, 25, 22, 19, 32, 29, 26, 23, 36, 33, @@ -84,7 +84,7 @@ 33, 36, 31, 34, 37, 40, 35, 38, 41, 44, 39, 42, 45, 48, 43, 46, 49, 52, 47, 50, 53, 56, 51, 54, 57, 60, 55, 58, 61, 59, 62, 63, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_16x4[64]) = { 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 49, 34, 19, 4, 50, 35, @@ -121,7 +121,7 @@ 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_8x32[256]) = { 0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 32, 25, 18, 11, 4, 40, 33, 26, 19, 12, 5, 48, 41, 34, 27, 20, 13, 6, 56, 49, @@ -163,7 +163,7 @@ 250, 223, 230, 237, 244, 251, 231, 238, 245, 252, 239, 246, 253, 247, 254, 255, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_32x8[256]) = { 0, 32, 1, 64, 33, 2, 96, 65, 34, 3, 128, 97, 66, 35, 4, @@ -268,7 +268,7 @@ 30, 62, 94, 126, 158, 190, 222, 254, 31, 63, 95, 127, 159, 191, 223, 255, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = { 0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 32, 25, 18, 11, 4, 40, 33, 26, 19, 12, 5, 48, 41, 34, 27, 20, 13, 6, 56, 49, 42, 35, @@ -282,7 +282,7 @@ 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, mcol_scan_8x8[64]) = { 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, @@ -298,7 +298,7 @@ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_8x16[128]) = { 0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 32, 25, 18, 11, 4, 40, 33, 26, 19, 12, 5, 48, 41, 34, 27, 20, 13, 6, 56, 49, 42, 35, @@ -321,7 +321,7 @@ 114, 121, 87, 94, 101, 108, 115, 122, 95, 102, 109, 116, 123, 103, 110, 117, 124, 111, 118, 125, 119, 126, 127, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_16x8[128]) = { 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 49, 34, 19, 4, 80, @@ -380,7 +380,7 @@ 120, 121, 122, 123, 124, 125, 126, 127, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_16x32[512]) = { 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 49, 34, 19, 4, 80, 65, 50, 35, 20, 5, 96, 81, 66, 51, 36, 21, 6, 112, 97, @@ -456,7 +456,7 @@ 491, 506, 447, 462, 477, 492, 507, 463, 478, 493, 508, 479, 494, 509, 495, 510, 511, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_32x16[512]) = { 0, 32, 1, 64, 33, 2, 96, 65, 34, 3, 128, 97, 66, 35, 4, @@ -645,7 +645,7 @@ 510, 511, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = { 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 49, 34, 19, 4, 80, 65, 50, 35, 20, 5, 96, 81, 66, 51, 36, 21, 6, 112, 97, @@ -687,7 +687,7 @@ 250, 251, 236, 221, 206, 191, 207, 222, 237, 252, 253, 238, 223, 239, 254, 255 }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, mcol_scan_16x16[256]) = { 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, @@ -888,7 +888,7 @@ 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { 0, 32, 1, 64, 33, 2, 96, 65, 34, 3, 128, 97, 66, 35, 4, 160, 129, 98, 67, 36, 5, 192, 161, 130, 99, 68, 37, 6, @@ -1047,9 +1047,9 @@ 862, 831, 863, 894, 925, 956, 987, 1018, 1019, 988, 957, 926, 895, 927, 958, 989, 1020, 1021, 990, 959, 991, 1022, 1023 }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x4[16]) = { 0, 2, 5, 9, 1, 4, 8, 12, 3, 7, 11, 14, 6, 10, 13, 15 }; @@ -1057,7 +1057,7 @@ DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x4[16]) = { 0, 1, 5, 6, 2, 4, 7, 12, 3, 8, 11, 13, 9, 10, 14, 15 }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x4[16]) = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, @@ -1067,7 +1067,7 @@ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x8[32]) = { 0, 2, 5, 9, 1, 4, 8, 13, 3, 7, 12, 17, 6, 11, 16, 21, 10, 15, 20, 25, 14, 19, 24, 28, 18, 23, 27, 30, 22, 26, 29, 31 @@ -1077,7 +1077,7 @@ 0, 1, 3, 6, 2, 4, 7, 10, 5, 8, 11, 14, 9, 12, 15, 18, 13, 16, 19, 22, 17, 20, 23, 26, 21, 24, 27, 29, 25, 28, 30, 31, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_4x8[32]) = { 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27, @@ -1104,7 +1104,7 @@ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_4x16[64]) = { 0, 2, 5, 9, 1, 4, 8, 13, 3, 7, 12, 17, 6, 11, 16, 21, 10, 15, 20, 25, 14, 19, 24, 29, 18, 23, 28, 33, 22, 27, 32, 37, @@ -1118,7 +1118,7 @@ 29, 32, 35, 38, 33, 36, 39, 42, 37, 40, 43, 46, 41, 44, 47, 50, 45, 48, 51, 54, 49, 52, 55, 58, 53, 56, 59, 61, 57, 60, 62, 63, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x4[64]) = { 0, 2, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, @@ -1155,7 +1155,7 @@ 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x32[256]) = { 0, 2, 5, 9, 14, 20, 27, 35, 1, 4, 8, 13, 19, 26, 34, 43, 3, 7, 12, 18, 25, 33, 42, 51, 6, 11, 17, 24, 32, 41, @@ -1197,7 +1197,7 @@ 219, 226, 233, 239, 244, 248, 251, 253, 227, 234, 240, 245, 249, 252, 254, 255, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x8[256]) = { 0, 2, 5, 9, 14, 20, 27, 35, 43, 51, 59, 67, 75, 83, 91, @@ -1316,7 +1316,7 @@ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x8[64]) = { 0, 2, 5, 9, 14, 20, 27, 35, 1, 4, 8, 13, 19, 26, 34, 42, 3, 7, 12, 18, 25, 33, 41, 48, 6, 11, 17, 24, 32, 40, 47, 53, @@ -1330,9 +1330,9 @@ 10, 19, 23, 32, 39, 45, 52, 54, 20, 22, 33, 38, 46, 51, 55, 60, 21, 34, 37, 47, 50, 56, 59, 61, 35, 36, 48, 49, 57, 58, 62, 63 }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_8x16[128]) = { 0, 2, 5, 9, 14, 20, 27, 35, 1, 4, 8, 13, 19, 26, 34, 43, 3, 7, 12, 18, 25, 33, 42, 51, 6, 11, 17, 24, 32, 41, 50, 59, @@ -1354,7 +1354,7 @@ 75, 82, 89, 96, 103, 109, 114, 118, 83, 90, 97, 104, 110, 115, 119, 122, 91, 98, 105, 111, 116, 120, 123, 125, 99, 106, 112, 117, 121, 124, 126, 127, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x8[128]) = { 0, 2, 5, 9, 14, 20, 27, 35, 43, 51, 59, 67, 75, 83, 91, 99, @@ -1413,7 +1413,7 @@ 120, 121, 122, 123, 124, 125, 126, 127, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x32[512]) = { 0, 2, 5, 9, 14, 20, 27, 35, 44, 54, 65, 77, 90, 104, 119, 135, 1, 4, 8, 13, 19, 26, 34, 43, 53, 64, 76, 89, 103, 118, @@ -1489,7 +1489,7 @@ 509, 391, 406, 420, 433, 445, 456, 466, 475, 483, 490, 496, 501, 505, 508, 510, 511, }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x16[512]) = { 0, 2, 5, 9, 14, 20, 27, 35, 44, 54, 65, 77, 90, 104, 119, @@ -1718,7 +1718,7 @@ 255, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_16x16[256]) = { 0, 2, 5, 9, 14, 20, 27, 35, 44, 54, 65, 77, 90, 104, 119, 135, 1, 4, 8, 13, 19, 26, 34, 43, 53, 64, 76, 89, 103, 118, @@ -1760,7 +1760,7 @@ 135, 136, 164, 165, 189, 190, 210, 211, 227, 228, 240, 241, 249, 250, 254, 255 }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_mcol_iscan_32x32[1024]) = { 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, @@ -1921,7 +1921,7 @@ 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC DECLARE_ALIGNED(16, static const int16_t, av1_default_iscan_32x32[1024]) = { 0, 2, 5, 9, 14, 20, 27, 35, 44, 54, 65, 77, 90, 104, 119, 135, 152, 170, 189, 209, 230, 252, 275, 299, 324, 350, @@ -2085,7 +2085,7 @@ 748, 792, 793, 833, 834, 870, 871, 903, 904, 932, 933, 957, 958, 978, 979, 995, 996, 1008, 1009, 1017, 1018, 1022, 1023 }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC const SCAN_ORDER av1_default_scan_orders[TX_SIZES] = { { default_scan_4x4, av1_default_iscan_4x4 },
diff --git a/av1/common/thread_common.c b/av1/common/thread_common.c index 31e1c30..9be67cb 100644 --- a/av1/common/thread_common.c +++ b/av1/common/thread_common.c
@@ -708,7 +708,13 @@ lr_sync->jobs_dequeued = 0; for (int plane = 0; plane < num_planes; plane++) { +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE && + cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE) + continue; +#else if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER num_even_lr_jobs = num_even_lr_jobs + ((ctxt[plane].rsi->vert_units_per_tile + 1) >> 1); } @@ -716,7 +722,12 @@ lr_job_counter[1] = num_even_lr_jobs; for (int plane = 0; plane < num_planes; plane++) { - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; + if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + && cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ) + continue; const int is_uv = plane > 0; const int ss_y = is_uv && cm->seq_params.subsampling_y; @@ -865,7 +876,12 @@ dgd->buffers[AOM_PLANE_Y], dgd->crop_heights[AOM_PLANE_Y], dgd->crop_widths[AOM_PLANE_Y], dgd->strides[AOM_PLANE_Y], &luma, dgd->crop_heights[1], dgd->crop_widths[1], WIENERNS_UV_BRD, luma_stride, - cm->seq_params.bit_depth); + cm->seq_params.bit_depth +#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2 + , + cm->seq_params.enable_cfl_ds_filter == 1 +#endif + ); assert(luma_buf != NULL); #endif // CONFIG_WIENER_NONSEP_CROSS_FILT @@ -875,7 +891,12 @@ int num_rows_lr = 0; for (int plane = 0; plane < num_planes; plane++) { - if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue; + if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + && cm->rst_info[plane].frame_cross_restoration_type == RESTORE_NONE +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ) + continue; #if CONFIG_WIENER_NONSEP || CONFIG_PC_WIENER ctxt[plane].plane = plane;
diff --git a/av1/common/tip.c b/av1/common/tip.c index 6e175d6..c427a4e 100644 --- a/av1/common/tip.c +++ b/av1/common/tip.c
@@ -37,14 +37,22 @@ if (!order_hint_info->enable_order_hint || frame_is_intra_only(cm)) return; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int cur_order_hint = cm->current_frame.display_order_hint; +#else const int cur_order_hint = cm->current_frame.order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC // Identify the nearest forward and backward references. for (int i = 0; i < INTER_REFS_PER_FRAME; i++) { const RefCntBuffer *const buf = get_ref_frame_buf(cm, i); if (buf == NULL) continue; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref_order_hint = buf->display_order_hint; +#else const int ref_order_hint = buf->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC const int ref_to_cur_dist = get_relative_dist(order_hint_info, ref_order_hint, cur_order_hint); if (ref_to_cur_dist < 0) { @@ -72,7 +80,12 @@ const RefCntBuffer *const start_frame_buf = get_ref_frame_buf(cm, start_frame); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int *const ref_order_hints = + &start_frame_buf->ref_display_order_hint[0]; +#else const int *const ref_order_hints = &start_frame_buf->ref_order_hints[0]; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC for (MV_REFERENCE_FRAME rf = 0; rf < INTER_REFS_PER_FRAME; ++rf) { if (ref_order_hints[rf] == target_frame_order) { return 1; @@ -111,13 +124,21 @@ get_ref_frame_buf(cm, start_frame); if (!is_ref_motion_field_eligible(cm, start_frame_buf)) return 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int start_frame_order_hint = start_frame_buf->display_order_hint; +#else const int start_frame_order_hint = start_frame_buf->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC assert(start_frame_buf->width == cm->width && start_frame_buf->height == cm->height); - +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int *const ref_order_hints = start_frame_buf->ref_display_order_hint; + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else const int *const ref_order_hints = start_frame_buf->ref_order_hints; const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int start_to_current_frame_offset = get_relative_dist( order_hint_info, start_frame_order_hint, cur_order_hint); @@ -461,7 +482,11 @@ } const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC MV_REFERENCE_FRAME nearest_rf[2] = { tip_ref->ref_frame[0], tip_ref->ref_frame[1] }; @@ -477,12 +502,20 @@ if (cm->features.tip_frame_mode) { cm->features.allow_tip_hole_fill = cm->seq_params.enable_tip_hole_fill; RefCntBuffer *ref0_frame_buf = get_ref_frame_buf(cm, nearest_rf[0]); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref0_frame_order_hint = ref0_frame_buf->display_order_hint; +#else const int ref0_frame_order_hint = ref0_frame_buf->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC const int cur_to_ref0_offset = get_relative_dist( order_hint_info, cur_order_hint, ref0_frame_order_hint); RefCntBuffer *ref1_frame_buf = get_ref_frame_buf(cm, nearest_rf[1]); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref1_frame_order_hint = ref1_frame_buf->display_order_hint; +#else const int ref1_frame_order_hint = ref1_frame_buf->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC const int cur_to_ref1_offset = get_relative_dist( order_hint_info, cur_order_hint, ref1_frame_order_hint); @@ -569,10 +602,15 @@ } } -static AOM_INLINE void tip_build_one_inter_predictor( - uint16_t *dst, int dst_stride, const MV *const src_mv, - InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y, - int ref, uint16_t **mc_buf, CalcSubpelParamsFunc calc_subpel_params_func) { +#if !CONFIG_REFINEMV +static AOM_INLINE +#endif //! CONFIG_REFINEMV + void + tip_build_one_inter_predictor( + uint16_t *dst, int dst_stride, const MV *const src_mv, + InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y, + int ref, uint16_t **mc_buf, + CalcSubpelParamsFunc calc_subpel_params_func) { SubpelParams subpel_params; uint16_t *src; int src_stride; @@ -582,15 +620,55 @@ #endif // CONFIG_OPTFLOW_REFINEMENT mc_buf, &src, &subpel_params, &src_stride); - tip_highbd_inter_predictor( - src, src_stride, dst, dst_stride, &subpel_params, - inter_pred_params->block_width, inter_pred_params->block_height, - &inter_pred_params->conv_params, inter_pred_params->interp_filter_params, - inter_pred_params->bit_depth); +#if CONFIG_D071_IMP_MSK_BLD + int use_bacp = 0; + int n_blocks = 0; + if (inter_pred_params->border_data.enable_bacp) { + const int sub_blk_idx = n_blocks * 2 + ref; + inter_pred_params->border_data.bacp_block_data[sub_blk_idx].x0 = + subpel_params.x0; + inter_pred_params->border_data.bacp_block_data[sub_blk_idx].x1 = + subpel_params.x1; + inter_pred_params->border_data.bacp_block_data[sub_blk_idx].y0 = + subpel_params.y0; + inter_pred_params->border_data.bacp_block_data[sub_blk_idx].y1 = + subpel_params.y1; + if (ref == 1) { + use_bacp = is_out_of_frame_block( + inter_pred_params, inter_pred_params->ref_frame_buf.width, + inter_pred_params->ref_frame_buf.height, n_blocks); + + if (use_bacp && inter_pred_params->mask_comp.type == COMPOUND_AVERAGE) { + inter_pred_params->conv_params.do_average = 0; + inter_pred_params->comp_mode = MASK_COMP; + inter_pred_params->mask_comp.seg_mask = xd->seg_mask; + } + } + } + + assert(IMPLIES(ref == 0, !use_bacp)); + + if (use_bacp) { + assert(inter_pred_params->comp_mode == MASK_COMP); + make_masked_inter_predictor(src, src_stride, dst, dst_stride, + inter_pred_params, &subpel_params, use_bacp, + n_blocks); + + } else { +#endif // CONFIG_D071_IMP_MSK_BLD + + tip_highbd_inter_predictor( + src, src_stride, dst, dst_stride, &subpel_params, + inter_pred_params->block_width, inter_pred_params->block_height, + &inter_pred_params->conv_params, + inter_pred_params->interp_filter_params, inter_pred_params->bit_depth); +#if CONFIG_D071_IMP_MSK_BLD + } +#endif // CONFIG_D071_IMP_MSK_BLD } -#if CONFIG_OPTFLOW_ON_TIP -#define MAKE_BFP_SAD_WRAPPER_COMMON(fnname) \ +#if CONFIG_OPTFLOW_ON_TIP || CONFIG_REFINEMV +#define MAKE_BFP_SAD_WRAPPER_COMMON8x8(fnname) \ static unsigned int fnname##_8(const uint16_t *src_ptr, int source_stride, \ const uint16_t *ref_ptr, int ref_stride) { \ return fnname(src_ptr, source_stride, ref_ptr, ref_stride); \ @@ -604,31 +682,119 @@ return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4; \ } -MAKE_BFP_SAD_WRAPPER_COMMON(aom_highbd_sad8x8) +MAKE_BFP_SAD_WRAPPER_COMMON8x8(aom_highbd_sad8x8) +#define MAKE_BFP_SAD_WRAPPER_COMMON16x8(fnname) \ + static unsigned int fnname##_8(const uint16_t *src_ptr, int source_stride, \ + const uint16_t *ref_ptr, int ref_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride); \ + } \ + static unsigned int fnname##_10(const uint16_t *src_ptr, int source_stride, \ + const uint16_t *ref_ptr, int ref_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2; \ + } \ + static unsigned int fnname##_12(const uint16_t *src_ptr, int source_stride, \ + const uint16_t *ref_ptr, int ref_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4; \ + } -// Get the proper sad calculation function for an 8x8 block -static unsigned int get_highbd_sad_8X8(const uint16_t *src_ptr, - int source_stride, - const uint16_t *ref_ptr, int ref_stride, - int bd) { + MAKE_BFP_SAD_WRAPPER_COMMON16x8(aom_highbd_sad16x8) + +#define MAKE_BFP_SAD_WRAPPER_COMMON8x16(fnname) \ + static unsigned int fnname##_8(const uint16_t *src_ptr, int source_stride, \ + const uint16_t *ref_ptr, int ref_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride); \ + } \ + static unsigned int fnname##_10(const uint16_t *src_ptr, int source_stride, \ + const uint16_t *ref_ptr, int ref_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2; \ + } \ + static unsigned int fnname##_12(const uint16_t *src_ptr, int source_stride, \ + const uint16_t *ref_ptr, int ref_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4; \ + } + + MAKE_BFP_SAD_WRAPPER_COMMON8x16(aom_highbd_sad8x16) + +#define MAKE_BFP_SAD_WRAPPER_COMMON16x16(fnname) \ + static unsigned int fnname##_8(const uint16_t *src_ptr, int source_stride, \ + const uint16_t *ref_ptr, int ref_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride); \ + } \ + static unsigned int fnname##_10(const uint16_t *src_ptr, int source_stride, \ + const uint16_t *ref_ptr, int ref_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2; \ + } \ + static unsigned int fnname##_12(const uint16_t *src_ptr, int source_stride, \ + const uint16_t *ref_ptr, int ref_stride) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4; \ + } + + MAKE_BFP_SAD_WRAPPER_COMMON16x16(aom_highbd_sad16x16) + + unsigned int get_highbd_sad(const uint16_t *src_ptr, + int source_stride, + const uint16_t *ref_ptr, + int ref_stride, int bd, int bw, + int bh) { if (bd == 8) { - return aom_highbd_sad8x8_8(src_ptr, source_stride, ref_ptr, ref_stride); + if (bw == 8 && bh == 8) + return aom_highbd_sad8x8_8(src_ptr, source_stride, ref_ptr, ref_stride); + else if (bw == 16 && bh == 8) + return aom_highbd_sad16x8_8(src_ptr, source_stride, ref_ptr, ref_stride); + else if (bw == 8 && bh == 16) + return aom_highbd_sad8x16_8(src_ptr, source_stride, ref_ptr, ref_stride); + else if (bw == 16 && bh == 16) + return aom_highbd_sad16x16_8(src_ptr, source_stride, ref_ptr, ref_stride); + else { + assert(0); + return 0; + } } else if (bd == 10) { - return aom_highbd_sad8x8_10(src_ptr, source_stride, ref_ptr, ref_stride); + if (bw == 8 && bh == 8) + return aom_highbd_sad8x8_10(src_ptr, source_stride, ref_ptr, ref_stride); + else if (bw == 16 && bh == 8) + return aom_highbd_sad16x8_10(src_ptr, source_stride, ref_ptr, ref_stride); + else if (bw == 8 && bh == 16) + return aom_highbd_sad8x16_10(src_ptr, source_stride, ref_ptr, ref_stride); + else if (bw == 16 && bh == 16) + return aom_highbd_sad16x16_10(src_ptr, source_stride, ref_ptr, + ref_stride); + else { + assert(0); + return 0; + } } else if (bd == 12) { - return aom_highbd_sad8x8_12(src_ptr, source_stride, ref_ptr, ref_stride); + if (bw == 8 && bh == 8) + return aom_highbd_sad8x8_12(src_ptr, source_stride, ref_ptr, ref_stride); + else if (bw == 16 && bh == 8) + return aom_highbd_sad16x8_12(src_ptr, source_stride, ref_ptr, ref_stride); + else if (bw == 8 && bh == 16) + return aom_highbd_sad8x16_12(src_ptr, source_stride, ref_ptr, ref_stride); + else if (bw == 16 && bh == 16) + return aom_highbd_sad16x16_12(src_ptr, source_stride, ref_ptr, + ref_stride); + else { + assert(0); + return 0; + } } else { assert(0); return 0; } } - // Build an 8x8 block in the TIP frame static AOM_INLINE void tip_build_inter_predictors_8x8( const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, TIP_PLANE *tip_plane, const MV mv[2], int mi_x, int mi_y, uint16_t **mc_buf, CONV_BUF_TYPE *tmp_conv_dst, CalcSubpelParamsFunc calc_subpel_params_func, - uint16_t *dst, int dst_stride) { + uint16_t *dst, int dst_stride +#if CONFIG_REFINEMV + , + uint16_t *dst0_16_refinemv, uint16_t *dst1_16_refinemv, + ReferenceArea ref_area[2] +#endif // CONFIG_REFINEMV + +) { // TODO(any): currently this only works for y plane assert(plane == 0); @@ -664,6 +830,29 @@ mbmi->motion_mode = SIMPLE_TRANSLATION; mbmi->sb_type[PLANE_TYPE_Y] = BLOCK_8X8; mbmi->interinter_comp.type = COMPOUND_AVERAGE; +#if CONFIG_FLEX_MVRES + mbmi->max_mv_precision = MV_PRECISION_ONE_EIGHTH_PEL; + mbmi->pb_mv_precision = MV_PRECISION_ONE_EIGHTH_PEL; +#endif + +#if CONFIG_REFINEMV + MV best_mv_ref[2] = { { mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col }, + { mbmi->mv[1].as_mv.row, mbmi->mv[1].as_mv.col } }; + + int apply_refinemv = (is_refinemv_allowed_tip_blocks(cm, mbmi) && plane == 0); + + if (apply_refinemv) { + uint16_t *dst_ref0 = NULL, *dst_ref1 = NULL; + dst_ref0 = &dst0_16_refinemv[0]; + dst_ref1 = &dst1_16_refinemv[0]; + mbmi->refinemv_flag = 1; + + apply_mv_refinement(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y, mc_buf, + calc_subpel_params_func, comp_pixel_x, comp_pixel_y, + dst_ref0, dst_ref1, best_mv_ref, bw, bh); + } + +#endif // CONFIG_REFINEMV // Arrays to hold optical flow offsets. int vx0[4] = { 0 }; @@ -688,12 +877,22 @@ InterPredParams params0, params1; av1_opfl_build_inter_predictor(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y, mc_buf, ¶ms0, calc_subpel_params_func, 0, - dst0); + dst0 +#if CONFIG_REFINEMV + , + &best_mv_ref[0], bw, bh +#endif // CONFIG_REFINEMV + ); av1_opfl_build_inter_predictor(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y, mc_buf, ¶ms1, calc_subpel_params_func, 1, - dst1); - const unsigned int sad = get_highbd_sad_8X8(dst0, bw, dst1, bw, bd); + dst1 +#if CONFIG_REFINEMV + , + &best_mv_ref[1], bw, bh +#endif // CONFIG_REFINEMV + ); + const unsigned int sad = get_highbd_sad(dst0, bw, dst1, bw, bd, 8, 8); if (sad < sad_thres) { do_opfl = 0; } @@ -701,8 +900,13 @@ if (do_opfl) { // Initialize refined mv +#if CONFIG_REFINEMV + const MV mv0 = best_mv_ref[0]; + const MV mv1 = best_mv_ref[1]; +#else const MV mv0 = mv[0]; const MV mv1 = mv[1]; +#endif // CONFIG_REFINEMV for (int mvi = 0; mvi < 4; mvi++) { mv_refined[mvi * 2].as_mv = mv0; mv_refined[mvi * 2 + 1].as_mv = mv1; @@ -712,9 +916,20 @@ av1_get_optflow_based_mv_highbd(cm, xd, plane, mbmi, mv_refined, bw, bh, mi_x, mi_y, mc_buf, calc_subpel_params_func, gx0, gy0, gx1, gy1, vx0, vy0, vx1, vy1, - dst0, dst1, 0, use_4x4); + dst0, dst1, 0, use_4x4 + +#if CONFIG_REFINEMV + , + best_mv_ref, bw, bh +#endif // CONFIG_REFINEMV + ); } +#if CONFIG_D071_IMP_MSK_BLD + BacpBlockData bacp_block_data[2 * N_OF_OFFSETS]; + uint8_t use_bacp = cm->features.enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD + for (int ref = 0; ref < 2; ++ref) { const struct scale_factors *const sf = cm->tip_ref.ref_scale_factor[ref]; struct buf_2d *const pred_buf = &tip->pred[ref]; @@ -724,8 +939,23 @@ comp_pixel_x, ss_x, ss_y, bd, 0, sf, pred_buf, MULTITAP_SHARP); +#if CONFIG_REFINEMV + if (apply_refinemv) { + inter_pred_params.use_ref_padding = 1; + inter_pred_params.ref_area = &ref_area[ref]; + } +#endif // CONFIG_REFINEMV + inter_pred_params.comp_mode = UNIFORM_COMP; +#if CONFIG_D071_IMP_MSK_BLD + inter_pred_params.border_data.enable_bacp = use_bacp; + inter_pred_params.border_data.bacp_block_data = + &bacp_block_data[0]; // Always point to the first ref + inter_pred_params.sb_type = mbmi->sb_type[PLANE_TYPE_Y]; + inter_pred_params.mask_comp = mbmi->interinter_comp; +#endif // CONFIG_D071_IMP_MSK_BLD + const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2); const int height = (cm->mi_params.mi_rows << MI_SIZE_LOG2); inter_pred_params.dist_to_top_edge = -GET_MV_SUBPEL(mi_y); @@ -741,7 +971,12 @@ dst, dst_stride, plane, mv_refined, &inter_pred_params, xd, mi_x, mi_y, ref, mc_buf, calc_subpel_params_func, use_4x4); } else { - tip_build_one_inter_predictor(dst, dst_stride, &mv[ref], + tip_build_one_inter_predictor(dst, dst_stride, +#if CONFIG_REFINEMV + &best_mv_ref[ref], +#else + &mv[ref], +#endif // CONFIG_REFINEMV &inter_pred_params, xd, mi_x, mi_y, ref, mc_buf, calc_subpel_params_func); } @@ -749,7 +984,7 @@ xd->tmp_conv_dst = org_buf; } -#endif // CONFIG_OPTFLOW_ON_TIP +#endif // CONFIG_OPTFLOW_ON_TIP || CONFIG_REFINEMV static AOM_INLINE void tip_build_inter_predictors_8x8_and_bigger( const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, TIP_PLANE *tip_plane, @@ -759,9 +994,44 @@ struct buf_2d *const dst_buf = &tip->dst; uint16_t *const dst = dst_buf->buf; -#if CONFIG_OPTFLOW_ON_TIP +#if CONFIG_REFINEMV || CONFIG_OPTFLOW_ON_TIP +#if CONFIG_REFINEMV + uint16_t dst0_16_refinemv[REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT]; + uint16_t dst1_16_refinemv[REFINEMV_SUBBLOCK_WIDTH * REFINEMV_SUBBLOCK_HEIGHT]; + int apply_refinemv = (plane == 0); + ReferenceArea ref_area[2]; + if (apply_refinemv) { + MB_MODE_INFO *mbmi = aom_calloc(1, sizeof(*mbmi)); + mbmi->mv[0].as_mv = mv[0]; + mbmi->mv[1].as_mv = mv[1]; + mbmi->ref_frame[0] = TIP_FRAME; + mbmi->ref_frame[1] = NONE_FRAME; + mbmi->interp_fltr = EIGHTTAP_REGULAR; + mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = 0; + mbmi->use_intrabc[0] = 0; + mbmi->motion_mode = SIMPLE_TRANSLATION; + mbmi->sb_type[PLANE_TYPE_Y] = BLOCK_8X8; + mbmi->interinter_comp.type = COMPOUND_AVERAGE; +#if CONFIG_FLEX_MVRES + mbmi->max_mv_precision = MV_PRECISION_ONE_EIGHTH_PEL; + mbmi->pb_mv_precision = MV_PRECISION_ONE_EIGHTH_PEL; +#endif + const int ss_x = plane ? cm->seq_params.subsampling_x : 0; + const int ss_y = plane ? cm->seq_params.subsampling_y : 0; + const int comp_pixel_x = (mi_x >> ss_x); + const int comp_pixel_y = (mi_y >> ss_y); + av1_get_reference_area_with_padding(cm, xd, plane, mbmi, bw, bh, mi_x, mi_y, + ref_area, comp_pixel_x, comp_pixel_y); + aom_free(mbmi); + } +#endif // CONFIG_REFINEMV + int dst_stride = dst_buf->stride; - if (plane == 0 && cm->features.use_optflow_tip) { + if (plane == 0 && (cm->features.use_optflow_tip +#if CONFIG_REFINEMV + || apply_refinemv +#endif // CONFIG_REFINEMV + )) { if (bw != 8 || bh != 8) { for (int h = 0; h < bh; h += 8) { for (int w = 0; w < bw; w += 8) { @@ -776,10 +1046,15 @@ } tip_build_inter_predictors_8x8(cm, xd, plane, tip_plane, mv, mi_x, mi_y, mc_buf, tmp_conv_dst, - calc_subpel_params_func, dst, dst_stride); + calc_subpel_params_func, dst, dst_stride +#if CONFIG_REFINEMV + , + dst0_16_refinemv, dst1_16_refinemv, ref_area +#endif // CONFIG_REFINEMV + ); return; } -#endif // CONFIG_OPTFLOW_ON_TIP +#endif // CONFIG_OPTFLOW_ON_TIP || CONFIG_REFINEMV const int bd = cm->seq_params.bit_depth; @@ -789,6 +1064,12 @@ const int comp_pixel_y = (mi_y >> ss_y); const int comp_bw = bw >> ss_x; const int comp_bh = bh >> ss_y; + +#if CONFIG_D071_IMP_MSK_BLD + BacpBlockData bacp_block_data[2 * N_OF_OFFSETS]; + uint8_t use_bacp = cm->features.enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD + for (int ref = 0; ref < 2; ++ref) { const struct scale_factors *const sf = cm->tip_ref.ref_scale_factor[ref]; struct buf_2d *const pred_buf = &tip->pred[ref]; @@ -800,6 +1081,16 @@ inter_pred_params.comp_mode = UNIFORM_COMP; +#if CONFIG_D071_IMP_MSK_BLD + inter_pred_params.border_data.enable_bacp = use_bacp; + inter_pred_params.border_data.bacp_block_data = + &bacp_block_data[0]; // Always point to the first ref + inter_pred_params.sb_type = BLOCK_8X8; + assert(bw == 8 && + bh == 8); // Currently BACP is supported only for 8x8 block + inter_pred_params.mask_comp.type = COMPOUND_AVERAGE; +#endif // CONFIG_D071_IMP_MSK_BLD + const int width = (cm->mi_params.mi_cols << MI_SIZE_LOG2); const int height = (cm->mi_params.mi_rows << MI_SIZE_LOG2); inter_pred_params.dist_to_top_edge = -GET_MV_SUBPEL(mi_y);
diff --git a/av1/common/tip.h b/av1/common/tip.h index c199d06..d5c50d6 100644 --- a/av1/common/tip.h +++ b/av1/common/tip.h
@@ -118,6 +118,7 @@ *mv = get_mv_from_fullmv(&fullmv); } +#if !CONFIG_REFINEMV // Clamp MV to UMV border based on its distance to left/right/top/bottom edge static AOM_INLINE MV tip_clamp_mv_to_umv_border_sb( InterPredParams *const inter_pred_params, const MV *src_mv, int bw, int bh, @@ -172,7 +173,7 @@ return clamped_mv; } - +#endif //! CONFIG_REFINEMV #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/common/token_cdfs.h b/av1/common/token_cdfs.h index 546dd7a..b70a2dd 100644 --- a/av1/common/token_cdfs.h +++ b/av1/common/token_cdfs.h
@@ -398,6 +398,24 @@ } }; #endif // CONFIG_CONTEXT_DERIVATION +#if CONFIG_ATC_DCTX_ALIGNED +static const aom_cdf_prob av1_default_coeff_base_bob_multi_cdfs + [TOKEN_CDF_Q_CTXS][SIG_COEF_CONTEXTS_BOB][CDF_SIZE(NUM_BASE_LEVELS + 1)] = { + { { AOM_CDF3(10923, 21845) }, + { AOM_CDF3(10923, 21845) }, + { AOM_CDF3(10923, 21845) } }, + { { AOM_CDF3(18786, 24298) }, + { AOM_CDF3(24159, 27856) }, + { AOM_CDF3(25533, 28778) } }, + { { AOM_CDF3(19757, 25371) }, + { AOM_CDF3(20834, 26403) }, + { AOM_CDF3(20013, 26339) } }, + { { AOM_CDF3(21974, 28446) }, + { AOM_CDF3(23020, 29244) }, + { AOM_CDF3(23858, 29893) } } + }; +#endif // CONFIG_ATC_DCTX_ALIGNED + static const aom_cdf_prob av1_default_eob_extra_cdfs[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES] [EOB_COEF_CONTEXTS][CDF_SIZE(2)] = { @@ -843,6 +861,110 @@ } } } }; +#if CONFIG_ATC_DCTX_ALIGNED +static const aom_cdf_prob + av1_default_eob_multi16_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE( + EOB_MAX_SYMS - 6)] = { { { AOM_CDF5(6554, 13107, 19661, 26214) }, + { AOM_CDF5(6554, 13107, 19661, 26214) } }, + { { AOM_CDF5(222, 339, 759, 2493) }, + { AOM_CDF5(4613, 6877, 12106, 18380) } }, + { { AOM_CDF5(1418, 2074, 4194, 10380) }, + { AOM_CDF5(14420, 18173, 23582, 28265) } }, + { { AOM_CDF5(2817, 4572, 9636, 19946) }, + { AOM_CDF5(18618, 21512, 26133, 29765) } } }; + +static const aom_cdf_prob + av1_default_eob_multi32_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE( + EOB_MAX_SYMS - 5)] = { + { { AOM_CDF6(5461, 10923, 16384, 21845, 27307) }, + { AOM_CDF6(5461, 10923, 16384, 21845, 27307) } }, + { { AOM_CDF6(169, 487, 1046, 1986, 4403) }, + { AOM_CDF6(4144, 7636, 13889, 20369, 26338) } }, + { { AOM_CDF6(1365, 1804, 3348, 6966, 13052) }, + { AOM_CDF6(12169, 14967, 19396, 23910, 28156) } }, + { { AOM_CDF6(2394, 3348, 6277, 12419, 20179) }, + { AOM_CDF6(16608, 19308, 23841, 28108, 31336) } } + }; + +static const aom_cdf_prob + av1_default_eob_multi64_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE( + EOB_MAX_SYMS - 4)] = { + { { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) }, + { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) } }, + { { AOM_CDF7(100, 388, 969, 2002, 4280, 9308) }, + { AOM_CDF7(4333, 8167, 14492, 20418, 26250, 29955) } }, + { { AOM_CDF7(1148, 1569, 2970, 5924, 10850, 20433) }, + { AOM_CDF7(11727, 14694, 19634, 24043, 27541, 30259) } }, + { { AOM_CDF7(2471, 3361, 6306, 11907, 19426, 28048) }, + { AOM_CDF7(16268, 19103, 23823, 27751, 30728, 32149) } } + }; + +static const aom_cdf_prob + av1_default_eob_multi128_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE( + EOB_MAX_SYMS - 3)] = { + { { AOM_CDF8(4096, 8192, 12288, 16384, 20480, 24576, 28672) }, + { AOM_CDF8(4096, 8192, 12288, 16384, 20480, 24576, 28672) } }, + { { AOM_CDF8(34, 889, 2098, 3797, 6595, 10857, 15412) }, + { AOM_CDF8(2654, 4900, 10162, 15197, 21643, 27251, 31178) } }, + { { AOM_CDF8(1804, 2359, 3999, 7338, 12798, 19374, 26178) }, + { AOM_CDF8(12268, 15216, 20355, 24508, 27905, 29986, 31541) } }, + { { AOM_CDF8(3169, 4356, 7874, 13681, 20866, 27270, 30960) }, + { AOM_CDF8(17157, 19749, 24432, 27880, 30345, 31718, 32514) } } + }; + +static const aom_cdf_prob + av1_default_eob_multi256_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE( + EOB_MAX_SYMS - 2)] = { + { { AOM_CDF9(3641, 7282, 10923, 14564, 18204, 21845, 25486, 29127) }, + { AOM_CDF9(3641, 7282, 10923, 14564, 18204, 21845, 25486, 29127) } }, + { { AOM_CDF9(33, 670, 2335, 4307, 7421, 13364, 18212, 24272) }, + { AOM_CDF9(4218, 6101, 11211, 14560, 18294, 23147, 29103, 31965) } }, + { { AOM_CDF9(2847, 3904, 6472, 10515, 16418, 23542, 27413, 30171) }, + { AOM_CDF9(10704, 13812, 18880, 22949, 26500, 29132, 31028, 31981) } }, + { { AOM_CDF9(3871, 5381, 9315, 15338, 22376, 28297, 31018, 32233) }, + { AOM_CDF9(16748, 19447, 24213, 27658, 30076, 31428, 32280, 32665) } } + }; + +static const aom_cdf_prob + av1_default_eob_multi512_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE( + EOB_MAX_SYMS - 1)] = { + { { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, 26214, + 29491) }, + { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938, 26214, + 29491) } }, + { { AOM_CDF10(210, 734, 3354, 8943, 13834, 20297, 25921, 28366, 28820) }, + { AOM_CDF10(7846, 10783, 17565, 23721, 26809, 28393, 30689, 32349, + 32667) } }, + { { AOM_CDF10(4390, 7204, 11548, 15823, 20831, 26069, 28315, 30071, + 31063) }, + { AOM_CDF10(9109, 12852, 19233, 23707, 27291, 29681, 31403, 32259, + 32589) } }, + { { AOM_CDF10(7299, 9302, 13207, 18796, 25134, 30084, 31270, 32095, + 32513) }, + { AOM_CDF10(18536, 20753, 25277, 28762, 31002, 31931, 32489, 32703, + 32707) } } + }; + +static const aom_cdf_prob + av1_default_eob_multi1024_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE( + EOB_MAX_SYMS)] = { { { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, + 20852, 23831, 26810, 29789) }, + { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, + 20852, 23831, 26810, 29789) } }, + { { AOM_CDF11(401, 535, 3344, 6821, 8426, 13375, + 14043, 17788, 19527, 21667) }, + { AOM_CDF11(4161, 8108, 14066, 17761, 20706, 21700, + 22771, 26741, 31827, 32653) } }, + { { AOM_CDF11(4252, 6240, 9718, 13784, 19105, 24225, + 26454, 28321, 29757, 30648) }, + { AOM_CDF11(11539, 14807, 19777, 22884, 26491, + 28818, 30433, 31677, 32422, 32654) } }, + { { AOM_CDF11(8416, 10839, 15261, 20667, 26077, + 30169, 31223, 31886, 32276, 32527) }, + { AOM_CDF11(21740, 24160, 28114, 30490, 31733, + 32248, 32571, 32700, 32704, + 32708) } } }; +#else static const aom_cdf_prob av1_default_eob_multi16_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE( 5)] = { { { { AOM_CDF5(840, 1039, 1980, 4895) }, @@ -1013,6 +1135,7 @@ 29486, 29724, 29807, 32570) }, { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852, 23831, 26810, 29789) } } } }; +#endif // CONFIG_ATC_DCTX_ALIGNED static const aom_cdf_prob av1_default_coeff_lps_multi_cdfs_idtx[TOKEN_CDF_Q_CTXS][IDTX_LEVEL_CONTEXTS] @@ -1075,7 +1198,7 @@ { AOM_CDF4(13384, 20248, 26029) } } }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC static const aom_cdf_prob av1_default_coeff_lps_multi_cdfs[TOKEN_CDF_Q_CTXS][PLANE_TYPES] [LEVEL_CONTEXTS][CDF_SIZE(BR_CDF_SIZE)] = { @@ -1901,7 +2024,7 @@ { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) }, { AOM_CDF4(8192, 16384, 24576) } } } } }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC static const aom_cdf_prob av1_default_coeff_base_multi_cdfs_idtx [TOKEN_CDF_Q_CTXS][IDTX_SIG_COEF_CONTEXTS] @@ -1962,7 +2085,7 @@ { AOM_CDF4(2674, 5476, 12215) }, { AOM_CDF4(1568, 3054, 5228) } } }; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC static const aom_cdf_prob av1_default_coeff_base_lf_multi_cdfs [TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][LF_SIG_COEF_CONTEXTS] [CDF_SIZE(LF_BASE_SYMBOLS)] = { @@ -6461,7 +6584,7 @@ { AOM_CDF3(10923, 21845) }, { AOM_CDF3(10923, 21845) }, { AOM_CDF3(10923, 21845) } } } } }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #if CONFIG_PAR_HIDING static const aom_cdf_prob av1_default_coeff_base_ph_cdfs[TOKEN_CDF_Q_CTXS][COEFF_BASE_PH_CONTEXTS]
diff --git a/av1/common/txb_common.h b/av1/common/txb_common.h index 79f6929..0b82352 100644 --- a/av1/common/txb_common.h +++ b/av1/common/txb_common.h
@@ -109,16 +109,16 @@ AOMMIN(levels[pos + stride], MAX_BASE_BR_RANGE) + AOMMIN(levels[pos + 1 + stride], MAX_BASE_BR_RANGE); mag = AOMMIN((mag + 1) >> 1, 6); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC return mag; #else //((row | col) < 2) is equivalent to ((row < 2) && (col < 2)) if ((row | col) < 2) return mag + 7; return mag + 14; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC // This function returns the low range context index for // the low-frequency region for the EOB coefficient. static AOM_FORCE_INLINE int get_br_ctx_lf_eob(const int c, // raster order @@ -139,9 +139,9 @@ return 7; return 14; } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC // This function returns the low range context index/increment for the // coefficients residing in the low-frequency region for 2D transforms. // Not used for the DC term. @@ -251,9 +251,9 @@ return mag + 14; } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC static const uint8_t clip_max5[256] = { 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -266,7 +266,7 @@ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC static const uint8_t clip_max3[256] = { 0, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -321,7 +321,7 @@ return sign_ctx; } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC // This function returns the template sum of absolute values // for coefficient coding for the low-frequency region. static AOM_FORCE_INLINE int get_nz_mag_lf(const uint8_t *const levels, @@ -346,7 +346,7 @@ } return mag; } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC // This function returns the template sum of absolute values // for coefficient coding for the higher-frequency default region. @@ -399,7 +399,7 @@ return ctx + 7; } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC // This function returns the base range context index/increment for the // coefficients residing in the low-frequency region for 1D/2D transforms. static AOM_FORCE_INLINE int get_nz_map_ctx_from_stats_lf( @@ -449,7 +449,7 @@ } return 0; } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC // This function returns the base range context index/increment for the // coefficients residing in the higher-frequency region for 1D/2D transforms. @@ -457,9 +457,9 @@ const int stats, const int coeff_idx, // raster order const int bwl, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC const TX_SIZE tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC const TX_CLASS tx_class) { // tx_class == 0(TX_CLASS_2D) if ((tx_class | coeff_idx) == 0) return 0; @@ -467,7 +467,7 @@ ctx = AOMMIN(ctx, 4); switch (tx_class) { case TX_CLASS_2D: { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = coeff_idx >> bwl; const int col = coeff_idx - (row << bwl); if (row + col < 6) return ctx; @@ -486,33 +486,33 @@ // if (row + col < 4) return 5 + ctx + 1; // return 21 + ctx; return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } case TX_CLASS_HORIZ: { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC return ctx + 15; #else const int row = coeff_idx >> bwl; const int col = coeff_idx - (row << bwl); return ctx + nz_map_ctx_offset_1d[col]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } case TX_CLASS_VERT: { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC return ctx + 15; #else const int row = coeff_idx >> bwl; return ctx + nz_map_ctx_offset_1d[row]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } default: break; } return 0; } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC typedef aom_cdf_prob (*base_lf_cdf_arr)[CDF_SIZE(LF_BASE_SYMBOLS)]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC typedef aom_cdf_prob (*base_cdf_arr)[CDF_SIZE(4)]; typedef aom_cdf_prob (*br_cdf_arr)[CDF_SIZE(BR_CDF_SIZE)]; #if CONFIG_PAR_HIDING @@ -563,6 +563,15 @@ return 3; } +#if CONFIG_ATC_DCTX_ALIGNED +// Return context index for first position. +static INLINE int get_lower_levels_ctx_bob(int bwl, int height, int scan_idx) { + if (scan_idx <= (height << bwl) / 8) return 0; + if (scan_idx <= (height << bwl) / 4) return 1; + return 2; +} +#endif // CONFIG_ATC_DCTX_ALIGNED + static INLINE int get_upper_levels_ctx_2d(const uint8_t *levels, int coeff_idx, int bwl) { int mag; @@ -576,7 +585,7 @@ return ctx + 7; } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC // This function returns the base range context index/increment for the // coefficients residing in the low-frequency region for 2D transforms. static INLINE int get_lower_levels_ctx_lf_2d(const uint8_t *levels, @@ -612,14 +621,14 @@ get_nz_mag_lf(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class); return get_nz_map_ctx_from_stats_lf(stats, coeff_idx, bwl, tx_class); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC static INLINE int get_lower_levels_ctx_2d(const uint8_t *levels, int coeff_idx, int bwl -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC , TX_SIZE tx_size -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC ) { assert(coeff_idx > 0); int mag; @@ -632,7 +641,7 @@ mag += AOMMIN(levels[(2 << bwl) + (2 << TX_PAD_HOR_LOG2)], 3); // { 2, 0 } const int ctx = AOMMIN((mag + 1) >> 1, 4); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = coeff_idx >> bwl; const int col = coeff_idx - (row << bwl); if (row + col < 6) return ctx; @@ -640,10 +649,10 @@ return ctx + 10; #else return ctx + av1_nz_map_ctx_offset[tx_size][coeff_idx]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC // This function determines the limits to separate the low-frequency // coefficient coding region from the higher-frequency default // region. It is based on the diagonal sum (row+col) or row, columns @@ -661,35 +670,35 @@ } return limits; } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC static AOM_FORCE_INLINE int get_lower_levels_ctx(const uint8_t *levels, int coeff_idx, int bwl, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC TX_SIZE tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC TX_CLASS tx_class) { const int stats = get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_class); #else return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } static INLINE int get_lower_levels_ctx_general(int is_last, int scan_idx, int bwl, int height, const uint8_t *levels, int coeff_idx, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC TX_SIZE tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC TX_CLASS tx_class -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , int plane -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ) { if (is_last) { if (scan_idx == 0) return 0; @@ -697,7 +706,7 @@ if (scan_idx <= (height << bwl) >> 2) return 2; return 3; } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = coeff_idx >> bwl; const int col = coeff_idx - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -708,7 +717,7 @@ } #else return get_lower_levels_ctx(levels, coeff_idx, bwl, tx_size, tx_class); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } static INLINE void set_dc_sign(int *cul_level, int dc_val) {
diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c index 1293edf..6fa6b52 100644 --- a/av1/common/warped_motion.c +++ b/av1/common/warped_motion.c
@@ -983,3 +983,188 @@ return 0; } #endif // CONFIG_EXTENDED_WARP_PREDICTION + +#if CONFIG_CWG_D067_IMPROVED_WARP +// From the warp model, derive the MV in (x,y) position. +// (x,y) is the horizontal and vertical position of the frame +//(0,0) is the top-left co-ordinate of the frame +int_mv get_warp_motion_vector_xy_pos(const WarpedMotionParams *model, + const int x, const int y, + MvSubpelPrecision precision) { + int_mv res; + + if (model->wmtype == IDENTITY) { + res.as_int = 0; + return res; + } + + if (model->wmtype == TRANSLATION) { + // All global motion vectors are stored with WARPEDMODEL_PREC_BITS (16) + // bits of fractional precision. The offset for a translation is stored in + // entries 0 and 1. For translations, all but the top three (two if + // precision < MV_SUBPEL_EIGHTH) fractional bits are always + // zero. + // +#if CONFIG_FLEX_MVRES + // After the right shifts, there are 3 fractional bits of precision. If + // precision < MV_SUBPEL_EIGHTH is false, the bottom bit is always zero + // (so we don't need a call to convert_to_trans_prec here) + res.as_mv.col = model->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF; + res.as_mv.row = model->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF; + + // When extended warp prediction is enabled, the warp model can be derived + // from the neighbor. Neighbor may have different MV precision than current + // block. Therefore, this assertion is not valid when + // CONFIG_EXTENDED_WARP_PREDICTION is enabled +#if !CONFIG_EXTENDED_WARP_PREDICTION + assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col), + precision == MV_PRECISION_ONE_EIGHTH_PEL)); +#endif +#if CONFIG_C071_SUBBLK_WARPMV + if (precision < MV_PRECISION_HALF_PEL) +#endif // CONFIG_C071_SUBBLK_WARPMV + lower_mv_precision(&res.as_mv, precision); +#else + // After the right shifts, there are 3 fractional bits of precision. If + // allow_hp is false, the bottom bit is always zero (so we don't need a + // call to convert_to_trans_prec here) + res.as_mv.col = model->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF; + res.as_mv.row = model->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF; + assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col), allow_hp)); + if (is_integer) { + integer_mv_precision(&res.as_mv); + } +#endif + return res; + } + + const int32_t *mat = model->wmmat; + int tx, ty; + + if (model->wmtype == ROTZOOM) { + assert(model->wmmat[5] == model->wmmat[2]); + assert(model->wmmat[4] == -model->wmmat[3]); + } + + int xc = + (mat[2] * x + mat[3] * y + mat[0]) - (1 << WARPEDMODEL_PREC_BITS) * x; + int yc = + (mat[4] * x + mat[5] * y + mat[1]) - (1 << WARPEDMODEL_PREC_BITS) * y; + +#if CONFIG_FLEX_MVRES + tx = convert_to_trans_prec(precision, xc); + ty = convert_to_trans_prec(precision, yc); +#else + tx = convert_to_trans_prec(allow_hp, xc); + ty = convert_to_trans_prec(allow_hp, yc); +#endif + + res.as_mv.row = ty; + res.as_mv.col = tx; + +#if CONFIG_FLEX_MVRES +#if CONFIG_C071_SUBBLK_WARPMV + if (precision < MV_PRECISION_HALF_PEL) +#endif // CONFIG_C071_SUBBLK_WARPMV + lower_mv_precision(&res.as_mv, precision); +#else + if (is_integer) { + integer_mv_precision(&res.as_mv); + } +#endif + return res; +} + +// return 0 if the model is invalid +// pts (col, row) is the array of source points in the unit of integer pixel +// mvs are the array of the MVs corresponding to the source points +// for nth point, +// pts[2*n] is the col value of the source position. pts[2*n + 1] is the row +// value of the source position mvs[2*n] is the col value of mv. mvs[2*n + 1] +// is the row value of mv pts_inref[2*n] is the col value of the projected +// position. pts_inref[2*n + 1] is the row value of the projected position +int get_model_from_corner_mvs(WarpedMotionParams *derive_model, int *pts, + int np, int *mvs, const BLOCK_SIZE bsize) { + // In order to derive the warp model we need 3 projected points + // If the number of projected points (np) is not equal to 3, model is not + // valid. + if (np != 3) { + derive_model->invalid = 1; + return 0; + } + + int x0, y0; + int ref_x0, ref_x1, ref_x2, ref_y0, ref_y1, ref_y2; + int pts_inref[2 * 3]; + const int width_log2 = mi_size_wide_log2[bsize] + MI_SIZE_LOG2; + const int height_log2 = mi_size_high_log2[bsize] + MI_SIZE_LOG2; + + assert(derive_model != NULL); + + for (int n = 0; n < np; n++) { + pts_inref[2 * n] = pts[2 * n] * (1 << WARPEDMODEL_PREC_BITS) + + mvs[2 * n] * (1 << GM_TRANS_ONLY_PREC_DIFF); + pts_inref[2 * n + 1] = pts[2 * n + 1] * (1 << WARPEDMODEL_PREC_BITS) + + mvs[2 * n + 1] * (1 << GM_TRANS_ONLY_PREC_DIFF); + int valid_point = (pts[2 * n] >= 0 && pts[2 * n + 1] >= 0 && + pts_inref[2 * n] >= 0 && pts_inref[2 * n + 1] >= 0); + if (!valid_point) return 0; + } + + int all_mvs_same = 1; + for (int k = 1; k < np; k++) { + all_mvs_same &= (mvs[0] == mvs[2 * k]) & (mvs[1] == mvs[2 * k + 1]); + } + if (all_mvs_same) { + derive_model->invalid = 1; + return 0; + } + + // Top-left point + x0 = pts[2 * 0]; + y0 = pts[2 * 0 + 1]; + ref_x0 = pts_inref[2 * 0]; + ref_y0 = pts_inref[2 * 0 + 1]; + + // Top-right point + ref_x1 = pts_inref[2 * 1]; + ref_y1 = pts_inref[2 * 1 + 1]; + + // Bottom-left point + ref_x2 = pts_inref[2 * 2]; + ref_y2 = pts_inref[2 * 2 + 1]; + + derive_model->wmmat[2] = (ref_x1 - ref_x0) >> width_log2; + derive_model->wmmat[4] = (ref_y1 - ref_y0) >> width_log2; + + derive_model->wmmat[3] = (ref_x2 - ref_x0) >> height_log2; + derive_model->wmmat[5] = (ref_y2 - ref_y0) >> height_log2; + + int64_t wmmat0 = (int64_t)ref_x0 - + (int64_t)derive_model->wmmat[2] * (int64_t)x0 - + (int64_t)derive_model->wmmat[3] * (int64_t)y0; + int64_t wmmat1 = (int64_t)ref_y0 - + (int64_t)derive_model->wmmat[4] * (int64_t)x0 - + (int64_t)derive_model->wmmat[5] * (int64_t)y0; + + derive_model->wmtype = AFFINE; + derive_model->invalid = 0; + + av1_reduce_warp_model(derive_model); + + // check compatibility with the fast warp filter + if (!av1_get_shear_params(derive_model)) { + derive_model->invalid = 1; + return 0; + } + + derive_model->wmmat[0] = (int32_t)clamp64(wmmat0, -WARPEDMODEL_TRANS_CLAMP, + WARPEDMODEL_TRANS_CLAMP - 1); + derive_model->wmmat[1] = (int32_t)clamp64(wmmat1, -WARPEDMODEL_TRANS_CLAMP, + WARPEDMODEL_TRANS_CLAMP - 1); + + derive_model->wmmat[6] = derive_model->wmmat[7] = 0; + + return 1; +} +#endif // CONFIG_CWG_D067_IMPROVED_WARP
diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h index 444ce88..9722b22 100644 --- a/av1/common/warped_motion.h +++ b/av1/common/warped_motion.h
@@ -302,4 +302,86 @@ WarpedMotionParams *wm_params); #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_IMPROVED_GLOBAL_MOTION +// Given a warp model which was initially used at a temporal distance of +// `in_distance`, rescale it to a new temporal distance of `out_distance`. +// Both distances are allowed to be negative, but they must be nonzero. +// +// The mathematically ideal way to rescale a warp model from one temporal +// distance to another would be to use a matrix exponential: If we write the +// input model as a 3x3 matrix M, then the output model should be +// +// ideal output = M ^ (out_distance / in_distance) +// +// However, computing a matrix exponential is complicated, especially in +// fixed point, and so would not be very hardware friendly. In addition, +// this function is mainly used to predict global motion parameters, with +// the true values being coded as a delta from this prediction. As the +// global motion will not be perfectly consistent, there's a limit to how +// accurate our prediction can be. +// +// For these reasons, we approximate the matrix exponential using its +// first-order Taylor series: +// +// output = I + (M - I) * (out_distance / in_distance) +// +// This is far easier to compute, and provides a "good enough" approximation +// for the models we use in practice, which are all reasonably near to the +// identity model (all parameters except for the translational part are +// within +/- 1/2 of the identity). +static INLINE void av1_scale_warp_model(const WarpedMotionParams *in_params, + int in_distance, + WarpedMotionParams *out_params, + int out_distance) { + static int param_shift[MAX_PARAMDIM - 1] = { + GM_TRANS_PREC_DIFF, GM_TRANS_PREC_DIFF, GM_ALPHA_PREC_DIFF, + GM_ALPHA_PREC_DIFF, GM_ALPHA_PREC_DIFF, GM_ALPHA_PREC_DIFF, + GM_ROW3HOMO_PREC_DIFF, GM_ROW3HOMO_PREC_DIFF + }; + + static int param_min[MAX_PARAMDIM - 1] = { GM_TRANS_MIN, GM_TRANS_MIN, + GM_ALPHA_MIN, GM_ALPHA_MIN, + GM_ALPHA_MIN, GM_ALPHA_MIN, + GM_ROW3HOMO_MIN, GM_ROW3HOMO_MIN }; + + static int param_max[MAX_PARAMDIM - 1] = { GM_TRANS_MAX, GM_TRANS_MAX, + GM_ALPHA_MAX, GM_ALPHA_MAX, + GM_ALPHA_MAX, GM_ALPHA_MAX, + GM_ROW3HOMO_MAX, GM_ROW3HOMO_MAX }; + + assert(in_distance != 0); + assert(out_distance != 0); + + // Flip signs so that in_distance is positive. + // We do this because + // scaled_value = (... + divisor/2) / divisor + // is the simplest way to implement division with round-to-nearest in C, + // but it only works correctly if the divisor is positive + if (in_distance < 0) { + in_distance = -in_distance; + out_distance = -out_distance; + } + + out_params->wmtype = in_params->wmtype; + for (int param = 0; param < MAX_PARAMDIM - 1; param++) { + int center = default_warp_params.wmmat[param]; + + int input = in_params->wmmat[param] - center; + int divisor = in_distance * (1 << param_shift[param]); + int output = (int)(((int64_t)input * out_distance + divisor / 2) / divisor); + output = clamp(output, param_min[param], param_max[param]) * + (1 << param_shift[param]); + + out_params->wmmat[param] = center + output; + } +} +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + +#if CONFIG_CWG_D067_IMPROVED_WARP +int_mv get_warp_motion_vector_xy_pos(const WarpedMotionParams *model, + const int x, const int y, + MvSubpelPrecision precision); +int get_model_from_corner_mvs(WarpedMotionParams *derive_model, int *pts, + int np, int *mvs, const BLOCK_SIZE bsize); +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // AOM_AV1_COMMON_WARPED_MOTION_H_
diff --git a/av1/common/x86/av1_inv_txfm_ssse3.h b/av1/common/x86/av1_inv_txfm_ssse3.h index 25b45bd..7704498 100644 --- a/av1/common/x86/av1_inv_txfm_ssse3.h +++ b/av1/common/x86/av1_inv_txfm_ssse3.h
@@ -68,11 +68,11 @@ }; DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, 0x0f07, #else 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, }; @@ -83,11 +83,11 @@ DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x32_default[32]) = { 0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, #else 0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, }; @@ -99,13 +99,13 @@ }; DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, #else 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07, -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, };
diff --git a/av1/decoder/accounting.c b/av1/decoder/accounting.c index b8c1e5a..0062189 100644 --- a/av1/decoder/accounting.c +++ b/av1/decoder/accounting.c
@@ -18,26 +18,51 @@ #include "aom/aom_integer.h" #include "av1/decoder/accounting.h" -static int accounting_hash(const char *str) { +static int accounting_hash(AccountingSymbolInfo *acct_info) { uint32_t val; const unsigned char *ustr; val = 0; - ustr = (const unsigned char *)str; + ustr = (const unsigned char *)acct_info->c_file; /* This is about the worst hash one can design, but it should be good enough here. */ while (*ustr) val += *ustr++; + + for (int i = 0; i < AOM_ACCOUNTING_MAX_TAGS; i++) { + if (acct_info->tags[i] == NULL) break; + ustr = (const unsigned char *)acct_info->tags[i]; + while (*ustr) val += *ustr++; + } + val += acct_info->c_line; return val % AOM_ACCOUNTING_HASH_SIZE; } +int tags_equal(AccountingSymbolInfo *a, AccountingSymbolInfo *b) { + for (int i = 0; i < AOM_ACCOUNTING_MAX_TAGS; i++) { + if (a->tags[i] == NULL && b->tags[i] != NULL) return 0; + if (a->tags[i] != NULL && b->tags[i] == NULL) return 0; + if (a->tags[i] != b->tags[i]) { + if (strcmp(a->tags[i], b->tags[i]) != 0) { + return 0; + } + } + } + return 1; +} + /* Dictionary lookup based on an open-addressing hash table. */ -int aom_accounting_dictionary_lookup(Accounting *accounting, const char *str) { +int aom_accounting_dictionary_lookup(Accounting *accounting, + AccountingSymbolInfo *acct_info) { int hash; - size_t len; AccountingDictionary *dictionary; dictionary = &accounting->syms.dictionary; - hash = accounting_hash(str); + hash = accounting_hash(acct_info); while (accounting->hash_dictionary[hash] != -1) { - if (strcmp(dictionary->strs[accounting->hash_dictionary[hash]], str) == 0) { + if (strcmp(dictionary->acct_infos[accounting->hash_dictionary[hash]].c_file, + acct_info->c_file) == 0 && + dictionary->acct_infos[accounting->hash_dictionary[hash]].c_line == + acct_info->c_line && + tags_equal(&dictionary->acct_infos[accounting->hash_dictionary[hash]], + acct_info)) { return accounting->hash_dictionary[hash]; } hash++; @@ -46,9 +71,8 @@ /* No match found. */ assert(dictionary->num_strs + 1 < MAX_SYMBOL_TYPES); accounting->hash_dictionary[hash] = dictionary->num_strs; - len = strlen(str); - dictionary->strs[dictionary->num_strs] = malloc(len + 1); - snprintf(dictionary->strs[dictionary->num_strs], len + 1, "%s", str); + dictionary->acct_infos[dictionary->num_strs] = *acct_info; + dictionary->num_strs++; return dictionary->num_strs - 1; } @@ -77,42 +101,25 @@ } void aom_accounting_clear(Accounting *accounting) { - int i; - AccountingDictionary *dictionary; free(accounting->syms.syms); - dictionary = &accounting->syms.dictionary; - for (i = 0; i < dictionary->num_strs; i++) { - free(dictionary->strs[i]); - } } -void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y) { +void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y, + TREE_TYPE tree_type) { accounting->context.x = x; accounting->context.y = y; + accounting->context.tree_type = tree_type; } -void aom_accounting_record(Accounting *accounting, const char *str, - uint32_t bits) { +void aom_accounting_record(Accounting *accounting, int value, + SYMBOL_CODING_MODE coding_mode, + AccountingSymbolInfo acct_info, uint64_t bits) { AccountingSymbol sym; - // Reuse previous symbol if it has the same context and symbol id. - if (accounting->syms.num_syms) { - AccountingSymbol *last_sym; - last_sym = &accounting->syms.syms[accounting->syms.num_syms - 1]; - if (memcmp(&last_sym->context, &accounting->context, - sizeof(AccountingSymbolContext)) == 0) { - uint32_t id; - id = aom_accounting_dictionary_lookup(accounting, str); - if (id == last_sym->id) { - last_sym->bits += bits; - last_sym->samples++; - return; - } - } - } sym.context = accounting->context; - sym.samples = 1; + sym.value = value; + sym.coding_mode = coding_mode; sym.bits = bits; - sym.id = aom_accounting_dictionary_lookup(accounting, str); + sym.id = aom_accounting_dictionary_lookup(accounting, &acct_info); assert(sym.id <= 255); if (accounting->syms.num_syms == accounting->num_syms_allocated) { accounting->num_syms_allocated *= 2; @@ -134,8 +141,21 @@ accounting->syms.num_binary_syms); for (i = 0; i < accounting->syms.num_syms; i++) { sym = &accounting->syms.syms[i]; - printf("%s x: %d, y: %d bits: %f samples: %d\n", - accounting->syms.dictionary.strs[sym->id], sym->context.x, - sym->context.y, (float)sym->bits / 8.0, sym->samples); + printf("%s x: %d, y: %d, tree: %d, bits: %f value: %d\n", + accounting->syms.dictionary.acct_infos[sym->id].c_func, + sym->context.x, sym->context.y, sym->context.tree_type, + (double)sym->bits / (double)(1 << AOM_ACCT_BITRES), 1); } } + +AccountingSymbolInfo aom_accounting_make_info( + const char *c_func, const char *c_file, int c_line, const char *tag0, + const char *tag1, const char *tag2, const char *tag3) { + AccountingSymbolInfo info = { + .c_func = c_func, + .c_file = c_file, + .c_line = c_line, + .tags = { tag0, tag1, tag2, tag3 }, + }; + return info; +}
diff --git a/av1/decoder/accounting.h b/av1/decoder/accounting.h index e4d3037..26cc0d9 100644 --- a/av1/decoder/accounting.h +++ b/av1/decoder/accounting.h
@@ -13,6 +13,7 @@ #define AOM_AV1_DECODER_ACCOUNTING_H_ #include <stdlib.h> #include "aom/aomdx.h" +#include "av1/common/enums.h" #ifdef __cplusplus extern "C" { @@ -25,25 +26,66 @@ #define MAX_SYMBOL_TYPES (256) /*The resolution of fractional-precision bit usage measurements, i.e., - 3 => 1/8th bits.*/ -#define AOM_ACCT_BITRES (3) + 16 => 1/65536th bits.*/ +#define AOM_ACCT_BITRES (16) + +#define AOM_ACCOUNTING_MAX_TAGS (4) + +enum { + SYMBOL_BIT = 0, + SYMBOL_BIT_BYPASS = 1, + SYMBOL_LITERAL_BYPASS = 2, + SYMBOL_UNARY = 3, + SYMBOL_CDF = 4, +} UENUM1BYTE(SYMBOL_CODING_MODE); typedef struct { int16_t x; int16_t y; + TREE_TYPE tree_type; } AccountingSymbolContext; typedef struct { AccountingSymbolContext context; uint32_t id; - /** Number of bits in units of 1/8 bit. */ - uint32_t bits; - uint32_t samples; + /** Number of bits in units of 1/65536 bit. */ + uint64_t bits; + int value; + SYMBOL_CODING_MODE coding_mode; + int coding_type; } AccountingSymbol; +typedef struct { + const char *c_func; + const char *c_file; + int c_line; + const char *tags[AOM_ACCOUNTING_MAX_TAGS]; +} AccountingSymbolInfo; + +AccountingSymbolInfo aom_accounting_make_info( + const char *c_func, const char *c_file, int c_line, const char *tag0, + const char *tag1, const char *tag2, const char *tag3); + +#define ACCT_INFO0() \ + aom_accounting_make_info(__func__, __FILE__, __LINE__, NULL, NULL, NULL, NULL) +#define ACCT_INFO1(tag0) \ + aom_accounting_make_info(__func__, __FILE__, __LINE__, tag0, NULL, NULL, NULL) +#define ACCT_INFO2(tag0, tag1) \ + aom_accounting_make_info(__func__, __FILE__, __LINE__, tag0, tag1, NULL, NULL) +#define ACCT_INFO3(tag0, tag1, tag2) \ + aom_accounting_make_info(__func__, __FILE__, __LINE__, tag0, tag1, tag2, NULL) +#define ACCT_INFO4(tag0, tag1, tag2, tag3) \ + aom_accounting_make_info(__func__, __FILE__, __LINE__, tag0, tag1, tag2, tag3) + +#define GET_ACCT_INFO_MACRO(_0, _1, _2, _3, _4, NAME, ...) NAME +#define ACCT_INFO(...) \ + GET_ACCT_INFO_MACRO(_0 __VA_OPT__(, ) __VA_ARGS__, ACCT_INFO4, ACCT_INFO3, \ + ACCT_INFO2, ACCT_INFO1, ACCT_INFO0) \ + (__VA_ARGS__) + /** Dictionary for translating strings into id. */ typedef struct { - char *strs[MAX_SYMBOL_TYPES]; + AccountingSymbolInfo acct_infos[MAX_SYMBOL_TYPES]; int num_strs; } AccountingDictionary; @@ -70,16 +112,20 @@ int num_syms_allocated; int16_t hash_dictionary[AOM_ACCOUNTING_HASH_SIZE]; AccountingSymbolContext context; - uint32_t last_tell_frac; + uint64_t last_tell_frac; }; void aom_accounting_init(Accounting *accounting); void aom_accounting_reset(Accounting *accounting); void aom_accounting_clear(Accounting *accounting); -void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y); -int aom_accounting_dictionary_lookup(Accounting *accounting, const char *str); -void aom_accounting_record(Accounting *accounting, const char *str, - uint32_t bits); + +void aom_accounting_set_context(Accounting *accounting, int16_t x, int16_t y, + TREE_TYPE tree_type); +int aom_accounting_dictionary_lookup(Accounting *accounting, + AccountingSymbolInfo *acct_info); +void aom_accounting_record(Accounting *accounting, int value, + SYMBOL_CODING_MODE coding_mode, + AccountingSymbolInfo acct_info, uint64_t bits); void aom_accounting_dump(Accounting *accounting); #ifdef __cplusplus } // extern "C"
diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index e59c8f5..dd52456 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c
@@ -72,8 +72,6 @@ #include "av1/decoder/decodetxb.h" #include "av1/decoder/detokenize.h" -#define ACCT_STR __func__ - #define AOM_MIN_THREADS_PER_TILE 1 #define AOM_MAX_THREADS_PER_TILE 2 @@ -128,7 +126,11 @@ } static AOM_INLINE void loop_restoration_read_sb_coeffs( +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane, +#else const AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane, +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER int runit_idx); static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { @@ -247,6 +249,37 @@ PLANE_TYPE plane_type = get_plane_type(plane); av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size); +#if CONFIG_INSPECTION + { + const int txwpx = tx_size_wide[tx_size]; + const int txhpx = tx_size_high[tx_size]; + + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int dst_stride = pd->dst.stride; + uint16_t *dst = &pd->dst.buf[(row * dst_stride + col) << MI_SIZE_LOG2]; + for (int i = 0; i < txhpx; i++) { + for (int j = 0; j < txwpx; j++) { + uint16_t pixel = dst[i * dst_stride + j]; + int stride = cm->predicted_pixels.strides[plane > 0]; + int pixel_c, pixel_r; + + if (plane) { + mi_to_pixel_loc(&pixel_c, &pixel_r, + mbmi->chroma_ref_info.mi_col_chroma_base, + mbmi->chroma_ref_info.mi_row_chroma_base, col, row, + pd->subsampling_x, pd->subsampling_y); + } else { + mi_to_pixel_loc(&pixel_c, &pixel_r, xd->mi_col, xd->mi_row, col, row, + pd->subsampling_x, pd->subsampling_y); + } + + pixel_c += j; + pixel_r += i; + cm->predicted_pixels.buffers[plane][pixel_r * stride + pixel_c] = pixel; + } + } + } +#endif // CONFIG_INSPECTION #if CONFIG_MISMATCH_DEBUG const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); @@ -267,8 +300,12 @@ pd->subsampling_x, pd->subsampling_y); } mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, - cm->current_frame.order_hint, plane, pixel_c, - pixel_r, blk_w, blk_h); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + plane, pixel_c, pixel_r, blk_w, blk_h); } #endif // CONFIG_MISMATCH_DEBUG @@ -317,7 +354,12 @@ mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row, pd->subsampling_x, pd->subsampling_y); } - mismatch_check_block_tx(dst, pd->dst.stride, cm->current_frame.order_hint, + mismatch_check_block_tx(dst, pd->dst.stride, +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC plane, pixel_c, pixel_r, blk_w, blk_h); } #endif // CONFIG_MISMATCH_DEBUG @@ -325,12 +367,7 @@ if (plane == AOM_PLANE_Y && store_cfl_required(cm, xd) && xd->tree_type == SHARED_PART) { #if CONFIG_ADAPTIVE_DS_FILTER - cfl_store_tx(xd, row, col, tx_size, -#if DS_FRAME_LEVEL - cm->features.ds_filter_type); -#else - cm->seq_params.enable_cfl_ds_filter); -#endif // DS_FRAME_LEVEL + cfl_store_tx(xd, row, col, tx_size, cm->seq_params.enable_cfl_ds_filter); #else cfl_store_tx(xd, row, col, tx_size); #endif // CONFIG_ADAPTIVE_DS_FILTER @@ -394,7 +431,12 @@ mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row, pd->subsampling_x, pd->subsampling_y); } - mismatch_check_block_tx(dst, pd->dst.stride, cm->current_frame.order_hint, + mismatch_check_block_tx(dst, pd->dst.stride, +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC plane, pixel_c, pixel_r, blk_w, blk_h); #endif // CONFIG_MISMATCH_DEBUG } @@ -561,8 +603,8 @@ } CHROMA_REF_INFO *chroma_ref_info = &xd->mi[0]->chroma_ref_info; - set_chroma_ref_info(mi_row, mi_col, index, bsize, chroma_ref_info, - parent ? &parent->chroma_ref_info : NULL, + set_chroma_ref_info(xd->tree_type, mi_row, mi_col, index, bsize, + chroma_ref_info, parent ? &parent->chroma_ref_info : NULL, parent ? parent->bsize : BLOCK_INVALID, parent ? parent->partition : PARTITION_NONE, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y); @@ -578,12 +620,14 @@ num_planes, chroma_ref_info); } +#if !CONFIG_REFINEMV typedef struct PadBlock { int x0; int x1; int y0; int y1; } PadBlock; +#endif //! CONFIG_REFINEMV static AOM_INLINE void highbd_build_mc_border(const uint16_t *src, int src_stride, uint16_t *dst, @@ -622,10 +666,12 @@ } while (--b_h); } -static INLINE int update_extend_mc_border_params( - const struct scale_factors *const sf, struct buf_2d *const pre_buf, - MV32 scaled_mv, PadBlock *block, int subpel_x_mv, int subpel_y_mv, - int do_warp, int is_intrabc, int *x_pad, int *y_pad) { +#if !CONFIG_REFINEMV +int update_extend_mc_border_params(const struct scale_factors *const sf, + struct buf_2d *const pre_buf, MV32 scaled_mv, + PadBlock *block, int subpel_x_mv, + int subpel_y_mv, int do_warp, int is_intrabc, + int *x_pad, int *y_pad) { // Get reference width and height. int frame_width = pre_buf->width; int frame_height = pre_buf->height; @@ -662,6 +708,7 @@ } return 0; } +#endif //! CONFIG_REFINEMV static INLINE void extend_mc_border(const struct scale_factors *const sf, struct buf_2d *const pre_buf, @@ -673,7 +720,13 @@ int x_pad = 0, y_pad = 0; if (update_extend_mc_border_params(sf, pre_buf, scaled_mv, &block, subpel_x_mv, subpel_y_mv, do_warp, - is_intrabc, &x_pad, &y_pad)) { + is_intrabc, &x_pad, &y_pad +#if CONFIG_REFINEMV + , + NULL +#endif // CONFIG_REFINEMV + + )) { // Get reference block pointer. const uint16_t *const buf_ptr = pre_buf->buf0 + block.y0 * pre_buf->stride + block.x0; @@ -690,7 +743,7 @@ x_pad * (AOM_INTERP_EXTEND - 1); } } - +#if !CONFIG_REFINEMV static void dec_calc_subpel_params( const MV *const src_mv, InterPredParams *const inter_pred_params, const MACROBLOCKD *const xd, int mi_x, int mi_y, uint16_t **pre, @@ -810,8 +863,17 @@ } *pre = pre_buf->buf0 + block->y0 * pre_buf->stride + block->x0; *src_stride = pre_buf->stride; -} +#if CONFIG_D071_IMP_MSK_BLD + if (inter_pred_params->border_data.enable_bacp) { + subpel_params->x0 = block->x0; + subpel_params->x1 = block->x1; + subpel_params->y0 = block->y0; + subpel_params->y1 = block->y1; + } +#endif // CONFIG_D071_IMP_MSK_BLD +} +#endif //! CONFIG_REFINEMV static void dec_calc_subpel_params_and_extend( const MV *const src_mv, InterPredParams *const inter_pred_params, MACROBLOCKD *const xd, int mi_x, int mi_y, int ref, @@ -820,6 +882,19 @@ #endif // CONFIG_OPTFLOW_REFINEMENT uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params, int *src_stride) { + +#if CONFIG_REFINEMV + if (inter_pred_params->use_ref_padding) { + common_calc_subpel_params_and_extend( + src_mv, inter_pred_params, xd, mi_x, mi_y, ref, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + mc_buf, pre, subpel_params, src_stride); + return; + } +#endif + PadBlock block; MV32 scaled_mv; int subpel_x_mv, subpel_y_mv; @@ -837,6 +912,7 @@ } #if CONFIG_TIP +#if !CONFIG_REFINEMV static AOM_INLINE void tip_dec_calc_subpel_params( const MV *const src_mv, InterPredParams *const inter_pred_params, int mi_x, int mi_y, uint16_t **pre, SubpelParams *subpel_params, int *src_stride, @@ -847,6 +923,11 @@ MV32 *scaled_mv, int *subpel_x_mv, int *subpel_y_mv) { const struct scale_factors *sf = inter_pred_params->scale_factors; struct buf_2d *pre_buf = &inter_pred_params->ref_frame_buf; + +#if CONFIG_REFINEMV + const int bw = inter_pred_params->original_pu_width; + const int bh = inter_pred_params->original_pu_height; +#else #if CONFIG_OPTFLOW_REFINEMENT // Use original block size to clamp MV and to extend block boundary const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width @@ -857,6 +938,8 @@ const int bw = inter_pred_params->block_width; const int bh = inter_pred_params->block_height; #endif // CONFIG_OPTFLOW_REFINEMENT +#endif // CONFIG_REFINEMV + const int is_scaled = av1_is_scaled(sf); if (is_scaled) { const int ssx = inter_pred_params->subsampling_x; @@ -900,10 +983,21 @@ block->y0 = pos_y >> SCALE_SUBPEL_BITS; // Get reference block bottom right coordinate. +#if CONFIG_D071_IMP_MSK_BLD + block->x1 = + ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >> + SCALE_SUBPEL_BITS) + + 1; + block->y1 = + ((pos_y + (inter_pred_params->block_height - 1) * subpel_params->ys) >> + SCALE_SUBPEL_BITS) + + 1; +#else block->x1 = ((pos_x + (bw - 1) * subpel_params->xs) >> SCALE_SUBPEL_BITS) + 1; block->y1 = ((pos_y + (bh - 1) * subpel_params->ys) >> SCALE_SUBPEL_BITS) + 1; +#endif // CONFIG_D071_IMP_MSK_BLD MV temp_mv; temp_mv = tip_clamp_mv_to_umv_border_sb(inter_pred_params, src_mv, bw, bh, @@ -942,8 +1036,13 @@ block->y0 = pos_y; // Get reference block bottom right coordinate. +#if CONFIG_D071_IMP_MSK_BLD + block->x1 = pos_x + inter_pred_params->block_width; + block->y1 = pos_y + inter_pred_params->block_height; +#else block->x1 = pos_x + bw; block->y1 = pos_y + bh; +#endif // CONFIG_D071_IMP_MSK_BLD scaled_mv->row = mv_q4.row; scaled_mv->col = mv_q4.col; @@ -952,8 +1051,17 @@ } *pre = pre_buf->buf0 + block->y0 * pre_buf->stride + block->x0; *src_stride = pre_buf->stride; -} +#if CONFIG_D071_IMP_MSK_BLD + if (inter_pred_params->border_data.enable_bacp) { + subpel_params->x0 = block->x0; + subpel_params->x1 = block->x1; + subpel_params->y0 = block->y0; + subpel_params->y1 = block->y1; + } +#endif // CONFIG_D071_IMP_MSK_BLD +} +#endif static void tip_dec_calc_subpel_params_and_extend( const MV *const src_mv, InterPredParams *const inter_pred_params, MACROBLOCKD *const xd, int mi_x, int mi_y, int ref, @@ -962,7 +1070,22 @@ #endif // CONFIG_OPTFLOW_REFINEMENT uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params, int *src_stride) { + +#if CONFIG_REFINEMV + if (inter_pred_params->use_ref_padding) { + // printf(" used pading in the decoder \n"); + tip_common_calc_subpel_params_and_extend( + src_mv, inter_pred_params, xd, mi_x, mi_y, ref, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + mc_buf, pre, subpel_params, src_stride); + return; + } +#else + (void)xd; +#endif // CONFIG_REFINEMV PadBlock block; MV32 scaled_mv; int subpel_x_mv, subpel_y_mv; @@ -1083,7 +1206,7 @@ MACROBLOCKD *const xd = &dcb->xd; #if CONFIG_ACCOUNTING - aom_accounting_set_context(&pbi->accounting, mi_col, mi_row); + aom_accounting_set_context(&pbi->accounting, mi_col, mi_row, xd->tree_type); #endif set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, parent, index); @@ -1158,11 +1281,19 @@ static void dec_build_inter_predictors(const AV1_COMMON *cm, DecoderCodingBlock *dcb, int plane, MB_MODE_INFO *mi, int build_for_obmc, - int bw, int bh, int mi_x, int mi_y) { + int bw, int bh, int mi_x, int mi_y +#if CONFIG_REFINEMV + , + int build_for_refine_mv_only +#endif // CONFIG_REFINEMV +) { av1_build_inter_predictors(cm, &dcb->xd, plane, mi, #if CONFIG_BAWP NULL, #endif +#if CONFIG_REFINEMV + build_for_refine_mv_only, +#endif // CONFIG_REFINEMV build_for_obmc, bw, bh, mi_x, mi_y, dcb->mc_buf, dec_calc_subpel_params_and_extend); } @@ -1173,13 +1304,35 @@ BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &dcb->xd; const int num_planes = av1_num_planes(cm); + +#if CONFIG_REFINEMV + MB_MODE_INFO *mbmi = xd->mi[0]; + int need_subblock_mvs = xd->is_chroma_ref && mbmi->refinemv_flag && + !is_intrabc_block(mbmi, xd->tree_type); + assert(IMPLIES(need_subblock_mvs, !is_interintra_pred(mbmi))); + if (need_subblock_mvs && default_refinemv_modes(mbmi)) + need_subblock_mvs &= (mbmi->comp_group_idx == 0 && + mbmi->interinter_comp.type == COMPOUND_AVERAGE); + if (need_subblock_mvs) { + fill_subblock_refine_mv(xd->refinemv_subinfo, xd->plane[0].width, + xd->plane[0].height, mbmi->mv[0].as_mv, + mbmi->mv[1].as_mv); + } +#endif // CONFIG_REFINEMV + for (int plane = 0; plane < num_planes; ++plane) { if (plane && !xd->is_chroma_ref) break; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; dec_build_inter_predictors(cm, dcb, plane, xd->mi[0], 0, xd->plane[plane].width, xd->plane[plane].height, - mi_x, mi_y); + mi_x, mi_y +#if CONFIG_REFINEMV + , + 0 +#endif // CONFIG_REFINEMV + ); + if (is_interintra_pred(xd->mi[0])) { BUFFER_SET ctx = { { xd->plane[0].dst.buf, xd->plane[1].dst.buf, xd->plane[2].dst.buf }, @@ -1217,7 +1370,12 @@ if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue; dec_build_inter_predictors(ctxt->cm, (DecoderCodingBlock *)ctxt->dcb, j, - &backup_mbmi, 1, bw, bh, mi_x, mi_y); + &backup_mbmi, 1, bw, bh, mi_x, mi_y +#if CONFIG_REFINEMV + , + 0 +#endif // CONFIG_REFINEMV + ); } } @@ -1238,9 +1396,9 @@ cm, tmp_buf, tmp_width, tmp_height, tmp_stride, xd->mb_to_right_edge, dcb }; const BLOCK_SIZE bsize = xd->mi[0]->sb_type[PLANE_TYPE_Y]; - foreach_overlappable_nb_above(cm, xd, - max_neighbor_obmc[mi_size_wide_log2[bsize]], - dec_build_prediction_by_above_pred, &ctxt); + foreach_overlappable_nb_above( + cm, xd, max_neighbor_obmc[mi_size_wide_log2[bsize]], + dec_build_prediction_by_above_pred, &ctxt, false); xd->mb_to_left_edge = -GET_MV_SUBPEL(xd->mi_col * MI_SIZE); xd->mb_to_right_edge = ctxt.mb_to_far_edge; @@ -1272,7 +1430,12 @@ if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue; dec_build_inter_predictors(ctxt->cm, (DecoderCodingBlock *)ctxt->dcb, j, - &backup_mbmi, 1, bw, bh, mi_x, mi_y); + &backup_mbmi, 1, bw, bh, mi_x, mi_y +#if CONFIG_REFINEMV + , + 0 +#endif // CONFIG_REFINEMV + ); } } @@ -1335,12 +1498,7 @@ if (store_cfl_required(cm, xd) && xd->tree_type == SHARED_PART) { #if CONFIG_ADAPTIVE_DS_FILTER cfl_store_block(xd, mbmi->sb_type[PLANE_TYPE_Y], mbmi->tx_size, -#if DS_FRAME_LEVEL - cm->features.ds_filter_type -#else - cm->seq_params.enable_cfl_ds_filter -#endif - ); + cm->seq_params.enable_cfl_ds_filter); #else cfl_store_block(xd, mbmi->sb_type[PLANE_TYPE_Y], mbmi->tx_size); #endif // CONFIG_ADAPTIVE_DS_FILTER @@ -1394,10 +1552,44 @@ pd->subsampling_x, pd->subsampling_y); } mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, - cm->current_frame.order_hint, plane, pixel_c, - pixel_r, pd->width, pd->height); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + plane, pixel_c, pixel_r, pd->width, pd->height); } #endif // CONFIG_MISMATCH_DEBUG + +#if CONFIG_INSPECTION + for (int plane = 0; plane < num_planes; plane++) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int dst_stride = pd->dst.stride; + const int plane_block_size = + get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); + const int plane_width = mi_size_wide[plane_block_size]; + const int plane_height = mi_size_high[plane_block_size]; + for (int i = 0; i < plane_height * MI_SIZE; i++) { + for (int j = 0; j < plane_width * MI_SIZE; j++) { + uint16_t pixel = pd->dst.buf[i * dst_stride + j]; + int stride = cm->predicted_pixels.strides[plane > 0]; + int pixel_c, pixel_r; + if (plane) { + mi_to_pixel_loc(&pixel_c, &pixel_r, + mbmi->chroma_ref_info.mi_col_chroma_base, + mbmi->chroma_ref_info.mi_row_chroma_base, 0, 0, + pd->subsampling_x, pd->subsampling_y); + } else { + mi_to_pixel_loc(&pixel_c, &pixel_r, xd->mi_col, xd->mi_row, 0, 0, + pd->subsampling_x, pd->subsampling_y); + } + pixel_c += j; + pixel_r += i; + cm->predicted_pixels.buffers[plane][pixel_r * stride + pixel_c] = pixel; + } + } + } +#endif // CONFIG_INSPECTION } static AOM_INLINE void set_color_index_map_offset(MACROBLOCKD *const xd, @@ -1672,7 +1864,7 @@ is_inter ? ec_ctx->inter_4way_txfm_partition_cdf[is_rect][split4_ctx] : ec_ctx->intra_4way_txfm_partition_cdf[is_rect][split4_ctx]; const TX_PARTITION_TYPE split4_partition = - aom_read_symbol(r, split4_cdf, 4, ACCT_STR); + aom_read_symbol(r, split4_cdf, 4, ACCT_INFO("split4_partition")); partition = split4_partition; /* If only one split type (horizontal or vertical) is allowed for this block, @@ -1684,7 +1876,8 @@ // Read bit to indicate if there is any split at all aom_cdf_prob *split2_cdf = is_inter ? ec_ctx->inter_2way_txfm_partition_cdf : ec_ctx->intra_2way_txfm_partition_cdf; - const int has_first_split = aom_read_symbol(r, split2_cdf, 2, ACCT_STR); + const int has_first_split = + aom_read_symbol(r, split2_cdf, 2, ACCT_INFO("has_first_split")); partition = has_first_split ? (allow_horz ? TX_PARTITION_HORZ : TX_PARTITION_VERT) : TX_PARTITION_NONE; @@ -1751,7 +1944,8 @@ const int ctx = txfm_partition_context(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, mbmi->sb_type[plane_type], tx_size); - is_split = aom_read_symbol(r, ec_ctx->txfm_partition_cdf[ctx], 2, ACCT_STR); + is_split = aom_read_symbol(r, ec_ctx->txfm_partition_cdf[ctx], 2, + ACCT_INFO("is_split")); if (is_split) { const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; @@ -1817,7 +2011,7 @@ const int ctx = get_tx_size_context(xd); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; const int depth = aom_read_symbol(r, ec_ctx->tx_size_cdf[tx_size_cat][ctx], - max_depths + 1, ACCT_STR); + max_depths + 1, ACCT_INFO("depth")); assert(depth >= 0 && depth <= max_depths); const TX_SIZE tx_size = depth_to_tx_size(depth, bsize); return tx_size; @@ -2023,8 +2217,8 @@ #endif // CONFIG_CROSS_CHROMA_TX CHROMA_REF_INFO *chroma_ref_info = &xd->mi[0]->chroma_ref_info; - set_chroma_ref_info(mi_row, mi_col, index, bsize, chroma_ref_info, - parent ? &parent->chroma_ref_info : NULL, + set_chroma_ref_info(xd->tree_type, mi_row, mi_col, index, bsize, + chroma_ref_info, parent ? &parent->chroma_ref_info : NULL, parent ? parent->bsize : BLOCK_INVALID, parent ? parent->partition : PARTITION_NONE, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y); @@ -2049,6 +2243,46 @@ } #if CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_UNEVEN_4WAY +/*!\brief Maps (ext_part, 4way, 4way_type, rect_type) to partition_type. */ +static PARTITION_TYPE + rect_part_table[2][2][NUM_UNEVEN_4WAY_PARTS][NUM_RECT_PARTS] = { + { + // !do_ext_partition + { + // !do_4way + { // UNEVEN_4A + PARTITION_HORZ, PARTITION_VERT }, + { // UNEVEN_4B + PARTITION_HORZ, PARTITION_VERT }, + }, + { + // do_4way + { // UNEVEN_4A + PARTITION_HORZ, PARTITION_VERT }, + { // UNEVEN_4B + PARTITION_HORZ, PARTITION_VERT }, + }, + }, + { + // do_ext_partition + { + // !do_4way + { // UNEVEN_4A + PARTITION_HORZ_3, PARTITION_VERT_3 }, + { // UNEVEN_4B + PARTITION_HORZ_3, PARTITION_VERT_3 }, + }, + { + // do_4way + { // UNEVEN_4A + PARTITION_HORZ_4A, PARTITION_VERT_4A }, + { // UNEVEN_4B + PARTITION_HORZ_4B, PARTITION_VERT_4B }, + }, + }, + }; +#else /*!\brief Maps (ext_part, rect_type) to partition_type. */ static PARTITION_TYPE rect_part_table[2][NUM_RECT_PARTS] = { // !do_ext_partition @@ -2056,6 +2290,7 @@ // do_ext_partition { PARTITION_HORZ_3, PARTITION_VERT_3 }, }; +#endif // CONFIG_UNEVEN_4WAY #endif // CONFIG_EXT_RECUR_PARTITIONS static PARTITION_TYPE read_partition(const AV1_COMMON *const cm, @@ -2066,11 +2301,8 @@ const PARTITION_TREE *ptree_luma, #endif // CONFIG_EXT_RECUR_PARTITIONS BLOCK_SIZE bsize) { -#if CONFIG_EXT_RECUR_PARTITIONS - if (!is_partition_point(bsize)) return PARTITION_NONE; -#endif // CONFIG_EXT_RECUR_PARTITIONS - const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); + assert(ctx >= 0); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; #if CONFIG_EXT_RECUR_PARTITIONS @@ -2079,28 +2311,24 @@ const int plane = xd->tree_type == CHROMA_PART; const int ssx = cm->seq_params.subsampling_x; const int ssy = cm->seq_params.subsampling_y; - if (plane == 1 && bsize == BLOCK_8X8) { - return PARTITION_NONE; - } - if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) { - return sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ssx, ssy); + const PARTITION_TYPE derived_partition = + av1_get_normative_forced_partition_type( + &cm->mi_params, xd->tree_type, ssx, ssy, mi_row, mi_col, bsize, + ptree_luma, &ptree->chroma_ref_info); + if (derived_partition != PARTITION_INVALID) { + return derived_partition; } - PARTITION_TYPE implied_partition; - const bool is_part_implied = is_partition_implied_at_boundary( - &cm->mi_params, xd->tree_type, ssx, ssy, mi_row, mi_col, bsize, - &ptree->chroma_ref_info, &implied_partition); - if (is_part_implied) return implied_partition; - - const bool do_split = - aom_read_symbol(r, ec_ctx->do_split_cdf[plane][ctx], 2, ACCT_STR); + const bool do_split = aom_read_symbol(r, ec_ctx->do_split_cdf[plane][ctx], 2, + ACCT_INFO("do_split")); if (!do_split) { return PARTITION_NONE; } const int square_split_ctx = square_split_context(xd, mi_row, mi_col, bsize); if (is_square_split_eligible(bsize, cm->sb_size)) { - const bool do_square_split = aom_read_symbol( - r, ec_ctx->do_square_split_cdf[plane][square_split_ctx], 2, ACCT_STR); + const bool do_square_split = + aom_read_symbol(r, ec_ctx->do_square_split_cdf[plane][square_split_ctx], + 2, ACCT_INFO("do_square_split")); if (do_square_split) { return PARTITION_SPLIT; } @@ -2108,20 +2336,46 @@ RECT_PART_TYPE rect_type = rect_type_implied_by_bsize(bsize, xd->tree_type); if (rect_type == RECT_INVALID) { - rect_type = - aom_read_symbol(r, ec_ctx->rect_type_cdf[plane][ctx], 2, ACCT_STR); + rect_type = aom_read_symbol(r, ec_ctx->rect_type_cdf[plane][ctx], + NUM_RECT_PARTS, ACCT_INFO("rect_type")); } - const bool disable_ext_part = !cm->seq_params.enable_ext_partitions; - const bool ext_partition_allowed = - !disable_ext_part && - is_ext_partition_allowed(bsize, rect_type, xd->tree_type); bool do_ext_partition = false; +#if CONFIG_UNEVEN_4WAY + bool do_uneven_4way_partition = false; + UNEVEN_4WAY_PART_TYPE uneven_4way_partition_type = UNEVEN_4A; +#endif // CONFIG_UNEVEN_4WAY + + const bool ext_partition_allowed = + cm->seq_params.enable_ext_partitions && + is_ext_partition_allowed(bsize, rect_type, xd->tree_type); if (ext_partition_allowed) { - do_ext_partition = aom_read_symbol( - r, ec_ctx->do_ext_partition_cdf[plane][rect_type][ctx], 2, ACCT_STR); + do_ext_partition = + aom_read_symbol(r, ec_ctx->do_ext_partition_cdf[plane][rect_type][ctx], + 2, ACCT_INFO("do_ext_partition")); +#if CONFIG_UNEVEN_4WAY + if (do_ext_partition) { + const bool uneven_4way_partition_allowed = + is_uneven_4way_partition_allowed(bsize, rect_type, xd->tree_type); + if (uneven_4way_partition_allowed) { + do_uneven_4way_partition = aom_read_symbol( + r, ec_ctx->do_uneven_4way_partition_cdf[plane][rect_type][ctx], 2, + ACCT_INFO("do_uneven_4way_partition")); + if (do_uneven_4way_partition) { + uneven_4way_partition_type = aom_read_symbol( + r, ec_ctx->uneven_4way_partition_type_cdf[plane][rect_type][ctx], + NUM_UNEVEN_4WAY_PARTS, ACCT_INFO("uneven_4way_partition_type")); + } + } + } +#endif // CONFIG_UNEVEN_4WAY } +#if CONFIG_UNEVEN_4WAY + return rect_part_table[do_ext_partition][do_uneven_4way_partition] + [uneven_4way_partition_type][rect_type]; +#else return rect_part_table[do_ext_partition][rect_type]; +#endif // CONFIG_UNEVEN_4WAY #else // !CONFIG_EXT_RECUR_PARTITIONS if (!has_rows && !has_cols) return PARTITION_SPLIT; @@ -2141,20 +2395,22 @@ aom_cdf_prob *partition_cdf = ec_ctx->partition_cdf[plane][ctx]; if (has_rows && has_cols) { return (PARTITION_TYPE)aom_read_symbol( - r, partition_cdf, partition_cdf_length(bsize), ACCT_STR); + r, partition_cdf, partition_cdf_length(bsize), ACCT_INFO()); } else if (!has_rows && has_cols) { assert(bsize > BLOCK_8X8); aom_cdf_prob cdf[2]; partition_gather_vert_alike(cdf, partition_cdf, bsize); assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP)); - return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_HORZ; + return aom_read_cdf(r, cdf, 2, ACCT_INFO()) ? PARTITION_SPLIT + : PARTITION_HORZ; } else { assert(has_rows && !has_cols); assert(bsize > BLOCK_8X8); aom_cdf_prob cdf[2]; partition_gather_horz_alike(cdf, partition_cdf, bsize); assert(cdf[1] == AOM_ICDF(CDF_PROB_TOP)); - return aom_read_cdf(r, cdf, 2, ACCT_STR) ? PARTITION_SPLIT : PARTITION_VERT; + return aom_read_cdf(r, cdf, 2, ACCT_INFO()) ? PARTITION_SPLIT + : PARTITION_VERT; } #endif // CONFIG_EXT_RECUR_PARTITIONS } @@ -2185,12 +2441,19 @@ MACROBLOCKD *const xd = &dcb->xd; const int ss_x = xd->plane[1].subsampling_x; const int ss_y = xd->plane[1].subsampling_y; + // Half block width/height. const int hbs_w = mi_size_wide[bsize] / 2; const int hbs_h = mi_size_high[bsize] / 2; -#if !CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + // One-eighth block width/height. + const int ebs_w = mi_size_wide[bsize] / 8; + const int ebs_h = mi_size_high[bsize] / 8; +#endif // CONFIG_UNEVEN_4WAY +#if !CONFIG_EXT_RECUR_PARTITIONS + // Quarter block width/height. const int qbs_w = mi_size_wide[bsize] / 4; const int qbs_h = mi_size_high[bsize] / 4; -#endif // !CONFIG_H_PARTITION +#endif // !CONFIG_EXT_RECUR_PARTITIONS PARTITION_TYPE partition; const int has_rows = (mi_row + hbs_h) < cm->mi_params.mi_rows; const int has_cols = (mi_col + hbs_w) < cm->mi_params.mi_cols; @@ -2220,7 +2483,11 @@ get_partition_plane_end(xd->tree_type, av1_num_planes(cm)); for (int plane = plane_start; plane < plane_end; ++plane) { int rcol0, rcol1, rrow0, rrow1; - if (cm->rst_info[plane].frame_restoration_type != RESTORE_NONE && + if ((cm->rst_info[plane].frame_restoration_type != RESTORE_NONE +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + || cm->rst_info[plane].frame_cross_restoration_type != RESTORE_NONE +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ) && av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize, &rcol0, &rcol1, &rrow0, &rrow1)) { const int rstride = cm->rst_info[plane].horz_units_per_tile; @@ -2239,8 +2506,8 @@ ptree->is_settled = 1; PARTITION_TREE *parent = ptree->parent; set_chroma_ref_info( - mi_row, mi_col, ptree->index, bsize, &ptree->chroma_ref_info, - parent ? &parent->chroma_ref_info : NULL, + xd->tree_type, mi_row, mi_col, ptree->index, bsize, + &ptree->chroma_ref_info, parent ? &parent->chroma_ref_info : NULL, parent ? parent->bsize : BLOCK_INVALID, parent ? parent->partition : PARTITION_NONE, ss_x, ss_y); @@ -2263,6 +2530,12 @@ ptree->partition = partition; switch (partition) { +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + case PARTITION_HORZ_4B: + case PARTITION_VERT_4A: + case PARTITION_VERT_4B: +#endif // CONFIG_UNEVEN_4WAY case PARTITION_SPLIT: ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); @@ -2280,9 +2553,7 @@ ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); ptree->sub_tree[2] = av1_alloc_ptree_node(ptree, 2); -#if CONFIG_H_PARTITION ptree->sub_tree[3] = av1_alloc_ptree_node(ptree, 3); -#endif // CONFIG_H_PARTITION break; #endif // CONFIG_EXT_RECUR_PARTITIONS default: break; @@ -2310,7 +2581,8 @@ const int index = (partition == PARTITION_HORZ || partition == PARTITION_VERT) + (partition == PARTITION_HORZ_3 || partition == PARTITION_VERT_3); - set_chroma_ref_info(mi_row, mi_col, index, bsize, &chroma_ref_info, + set_chroma_ref_info(xd->tree_type, mi_row, mi_col, index, bsize, + &chroma_ref_info, parent ? &parent->chroma_ref_info : NULL, parent ? parent->bsize : BLOCK_INVALID, parent ? parent->partition : PARTITION_NONE, @@ -2391,7 +2663,81 @@ #endif // CONFIG_EXT_RECUR_PARTITIONS break; #if CONFIG_EXT_RECUR_PARTITIONS -#if CONFIG_H_PARTITION + +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + int this_mi_row = mi_row; + DEC_PARTITION(this_mi_row, mi_col, subsize, 0); + this_mi_row += ebs_h; + if (this_mi_row >= cm->mi_params.mi_rows) break; + DEC_PARTITION(this_mi_row, mi_col, bsize_med, 1); + this_mi_row += 2 * ebs_h; + if (this_mi_row >= cm->mi_params.mi_rows) break; + DEC_PARTITION(this_mi_row, mi_col, bsize_big, 2); + this_mi_row += 4 * ebs_h; + if (this_mi_row >= cm->mi_params.mi_rows) break; + DEC_PARTITION(this_mi_row, mi_col, subsize, 3); + break; + } + case PARTITION_HORZ_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + int this_mi_row = mi_row; + DEC_PARTITION(this_mi_row, mi_col, subsize, 0); + this_mi_row += ebs_h; + if (this_mi_row >= cm->mi_params.mi_rows) break; + DEC_PARTITION(this_mi_row, mi_col, bsize_big, 1); + this_mi_row += 4 * ebs_h; + if (this_mi_row >= cm->mi_params.mi_rows) break; + DEC_PARTITION(this_mi_row, mi_col, bsize_med, 2); + this_mi_row += 2 * ebs_h; + if (this_mi_row >= cm->mi_params.mi_rows) break; + DEC_PARTITION(this_mi_row, mi_col, subsize, 3); + break; + } + case PARTITION_VERT_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + int this_mi_col = mi_col; + DEC_PARTITION(mi_row, this_mi_col, subsize, 0); + this_mi_col += ebs_w; + if (this_mi_col >= cm->mi_params.mi_cols) break; + DEC_PARTITION(mi_row, this_mi_col, bsize_med, 1); + this_mi_col += 2 * ebs_w; + if (this_mi_col >= cm->mi_params.mi_cols) break; + DEC_PARTITION(mi_row, this_mi_col, bsize_big, 2); + this_mi_col += 4 * ebs_w; + if (this_mi_col >= cm->mi_params.mi_cols) break; + DEC_PARTITION(mi_row, this_mi_col, subsize, 3); + break; + } + case PARTITION_VERT_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + int this_mi_col = mi_col; + DEC_PARTITION(mi_row, this_mi_col, subsize, 0); + this_mi_col += ebs_w; + if (this_mi_col >= cm->mi_params.mi_cols) break; + DEC_PARTITION(mi_row, this_mi_col, bsize_big, 1); + this_mi_col += 4 * ebs_w; + if (this_mi_col >= cm->mi_params.mi_cols) break; + DEC_PARTITION(mi_row, this_mi_col, bsize_med, 2); + this_mi_col += 2 * ebs_w; + if (this_mi_col >= cm->mi_params.mi_cols) break; + DEC_PARTITION(mi_row, this_mi_col, subsize, 3); + break; + } +#endif // CONFIG_UNEVEN_4WAY case PARTITION_HORZ_3: case PARTITION_VERT_3: { for (int i = 0; i < 4; ++i) { @@ -2414,39 +2760,13 @@ } break; } -#else - case PARTITION_HORZ_3: { - const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_HORZ); - int this_mi_row = mi_row; - DEC_PARTITION(this_mi_row, mi_col, subsize, 0); - this_mi_row += qbs_h; - if (this_mi_row >= cm->mi_params.mi_rows) break; - DEC_PARTITION(this_mi_row, mi_col, bsize3, 1); - this_mi_row += 2 * qbs_h; - if (this_mi_row >= cm->mi_params.mi_rows) break; - DEC_PARTITION(this_mi_row, mi_col, subsize, 2); - break; - } - case PARTITION_VERT_3: { - const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_VERT); - int this_mi_col = mi_col; - DEC_PARTITION(mi_row, this_mi_col, subsize, 0); - this_mi_col += qbs_w; - if (this_mi_col >= cm->mi_params.mi_cols) break; - DEC_PARTITION(mi_row, this_mi_col, bsize3, 1); - this_mi_col += 2 * qbs_w; - if (this_mi_col >= cm->mi_params.mi_cols) break; - DEC_PARTITION(mi_row, this_mi_col, subsize, 2); - break; - } -#endif // CONFIG_H_PARTITION case PARTITION_SPLIT: DEC_PARTITION(mi_row, mi_col, subsize, 0); DEC_PARTITION(mi_row, mi_col + hbs_w, subsize, 1); DEC_PARTITION(mi_row + hbs_h, mi_col, subsize, 2); DEC_PARTITION(mi_row + hbs_h, mi_col + hbs_w, subsize, 3); break; -#else +#else // !CONFIG_EXT_RECUR_PARTITIONS case PARTITION_SPLIT: DEC_PARTITION(mi_row, mi_col, subsize, 0); DEC_PARTITION(mi_row, mi_col + hbs_w, subsize, 1); @@ -2561,6 +2881,11 @@ parse_decode_flag); xd->tree_type = SHARED_PART; } +#if CONFIG_INSPECTION + if (pbi->inspect_sb_cb != NULL) { + (*pbi->inspect_sb_cb)(pbi, pbi->inspect_ctx); + } +#endif // CONFIG_INSPECTION } static AOM_INLINE void setup_segmentation(AV1_COMMON *const cm, @@ -2652,7 +2977,7 @@ #if CONFIG_LR_FLEX_SYNTAX // Converts decoded index to frame restoration type depending on lr tools -// thta are enabled for the frame for a given plane. +// that are enabled for the frame for a given plane. static RestorationType index_to_frame_restoration_type( const AV1_COMMON *const cm, int plane, int ndx) { RestorationType r = RESTORE_NONE; @@ -2671,8 +2996,19 @@ struct aom_read_bit_buffer *rb) { assert(!cm->features.all_lossless); const int num_planes = av1_num_planes(cm); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + for (int p = 0; p < num_planes; ++p) { + RestorationInfo *rsi = &cm->rst_info[p]; + rsi->frame_restoration_type = RESTORE_NONE; + rsi->frame_cross_restoration_type = RESTORE_NONE; + } +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER if (is_global_intrabc_allowed(cm)) return; +#if CONFIG_FLEXIBLE_RU_SIZE + int luma_none = 1, chroma_none = 1; +#else int all_none = 1, chroma_none = 1; +#endif // CONFIG_FLEXIBLE_RU_SIZE for (int p = 0; p < num_planes; ++p) { RestorationInfo *rsi = &cm->rst_info[p]; #if CONFIG_LR_FLEX_SYNTAX @@ -2690,9 +3026,16 @@ if (rsi->frame_restoration_type == RESTORE_SWITCHABLE && cm->features.lr_tools_count[p] > 2) { if (aom_rb_read_bit(rb)) { + int tools_count = cm->features.lr_tools_count[p]; for (int i = 1; i < RESTORE_SWITCHABLE_TYPES; ++i) { - if (!(plane_lr_tools_disable_mask & (1 << i))) - plane_lr_tools_disable_mask |= (aom_rb_read_bit(rb) << i); + if (!(plane_lr_tools_disable_mask & (1 << i))) { + const int disable_tool = aom_rb_read_bit(rb); + plane_lr_tools_disable_mask |= (disable_tool << i); + tools_count -= disable_tool; + // if tools_count becomes 2 break from the loop since we + // do not allow any other tool to be disabled. + if (tools_count == 2) break; + } } av1_set_lr_tools(plane_lr_tools_disable_mask, p, &cm->features); } @@ -2728,8 +3071,23 @@ } } #endif // CONFIG_LR_FLEX_SYNTAX + +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (p > 0) { + if (aom_rb_read_bit(rb)) { + rsi->frame_cross_restoration_type = RESTORE_WIENER_NONSEP; + } + } + if (rsi->frame_restoration_type != RESTORE_NONE || + rsi->frame_cross_restoration_type != RESTORE_NONE) { +#else if (rsi->frame_restoration_type != RESTORE_NONE) { +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +#if CONFIG_FLEXIBLE_RU_SIZE + luma_none &= p > 0; +#else all_none = 0; +#endif // CONFIG_FLEXIBLE_RU_SIZE chroma_none &= p == 0; } #if CONFIG_WIENER_NONSEP @@ -2742,6 +3100,44 @@ : NUM_WIENERNS_CLASS_INIT_CHROMA; #endif // CONFIG_WIENER_NONSEP } +#if CONFIG_FLEXIBLE_RU_SIZE + const int frame_width = cm->superres_upscaled_width; + const int frame_height = cm->superres_upscaled_height; + set_restoration_unit_size(frame_width, frame_height, + cm->seq_params.subsampling_x, + cm->seq_params.subsampling_y, cm->rst_info); + int size = cm->rst_info[0].max_restoration_unit_size; + + cm->rst_info[0].restoration_unit_size = + cm->rst_info[0].max_restoration_unit_size; + if (!luma_none) { + if (aom_rb_read_bit(rb)) + cm->rst_info[0].restoration_unit_size = size >> 1; + else { + if (aom_rb_read_bit(rb)) + cm->rst_info[0].restoration_unit_size = size; + else + cm->rst_info[0].restoration_unit_size = size >> 2; + } + } + if (num_planes > 1) { + cm->rst_info[1].restoration_unit_size = + cm->rst_info[1].max_restoration_unit_size; + if (!chroma_none) { + size = cm->rst_info[1].max_restoration_unit_size; + if (aom_rb_read_bit(rb)) + cm->rst_info[1].restoration_unit_size = size >> 1; + else { + if (aom_rb_read_bit(rb)) + cm->rst_info[1].restoration_unit_size = size; + else + cm->rst_info[1].restoration_unit_size = size >> 2; + } + } + cm->rst_info[2].restoration_unit_size = + cm->rst_info[1].restoration_unit_size; + } +#else if (!all_none) { #if CONFIG_BLOCK_256 assert(cm->sb_size == BLOCK_64X64 || cm->sb_size == BLOCK_128X128 || @@ -2796,6 +3192,7 @@ cm->rst_info[2].restoration_unit_size = cm->rst_info[1].restoration_unit_size; } +#endif // CONFIG_FLEXIBLE_RU_SIZE } static AOM_INLINE void read_wiener_filter(MACROBLOCKD *xd, int wiener_win, @@ -2803,11 +3200,11 @@ WienerInfoBank *bank, aom_reader *rb) { #if CONFIG_LR_MERGE_COEFFS - const int exact_match = - aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2, ACCT_STR); + const int exact_match = aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2, + ACCT_INFO("exact_match")); int k; for (k = 0; k < bank->bank_size - 1; ++k) { - if (aom_read_literal(rb, 1, ACCT_STR)) break; + if (aom_read_literal(rb, 1, ACCT_INFO("bank_size"))) break; } const int ref = k; if (exact_match) { @@ -2830,7 +3227,8 @@ aom_read_primitive_refsubexpfin( rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, WIENER_FILT_TAP0_SUBEXP_K, - ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) + + ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, + ACCT_INFO("vfilter[0]")) + WIENER_FILT_TAP0_MINV; else wiener_info->vfilter[0] = wiener_info->vfilter[WIENER_WIN - 1] = 0; @@ -2838,13 +3236,15 @@ aom_read_primitive_refsubexpfin( rb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1, WIENER_FILT_TAP1_SUBEXP_K, - ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV, ACCT_STR) + + ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV, + ACCT_INFO("vfilter[1]")) + WIENER_FILT_TAP1_MINV; wiener_info->vfilter[2] = wiener_info->vfilter[WIENER_WIN - 3] = aom_read_primitive_refsubexpfin( rb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1, WIENER_FILT_TAP2_SUBEXP_K, - ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV, ACCT_STR) + + ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV, + ACCT_INFO("vfilter[2]")) + WIENER_FILT_TAP2_MINV; // The central element has an implicit +WIENER_FILT_STEP wiener_info->vfilter[WIENER_HALFWIN] = @@ -2856,7 +3256,8 @@ aom_read_primitive_refsubexpfin( rb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, WIENER_FILT_TAP0_SUBEXP_K, - ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, ACCT_STR) + + ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, + ACCT_INFO("hfilter[0]")) + WIENER_FILT_TAP0_MINV; else wiener_info->hfilter[0] = wiener_info->hfilter[WIENER_WIN - 1] = 0; @@ -2864,13 +3265,15 @@ aom_read_primitive_refsubexpfin( rb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1, WIENER_FILT_TAP1_SUBEXP_K, - ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV, ACCT_STR) + + ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV, + ACCT_INFO("hfilter[1]")) + WIENER_FILT_TAP1_MINV; wiener_info->hfilter[2] = wiener_info->hfilter[WIENER_WIN - 3] = aom_read_primitive_refsubexpfin( rb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1, WIENER_FILT_TAP2_SUBEXP_K, - ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV, ACCT_STR) + + ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV, + ACCT_INFO("hfilter[2]")) + WIENER_FILT_TAP2_MINV; // The central element has an implicit +WIENER_FILT_STEP wiener_info->hfilter[WIENER_HALFWIN] = @@ -2884,11 +3287,11 @@ SgrprojInfoBank *bank, aom_reader *rb) { #if CONFIG_LR_MERGE_COEFFS - const int exact_match = - aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2, ACCT_STR); + const int exact_match = aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2, + ACCT_INFO("exact_match")); int k; for (k = 0; k < bank->bank_size - 1; ++k) { - if (aom_read_literal(rb, 1, ACCT_STR)) break; + if (aom_read_literal(rb, 1, ACCT_INFO("bank"))) break; } const int ref = k; if (exact_match) { @@ -2904,7 +3307,7 @@ #endif // CONFIG_LR_MERGE_COEFFS SgrprojInfo *ref_sgrproj_info = av1_ref_from_sgrproj_bank(bank, ref); - sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_STR); + sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_INFO("ep")); const sgr_params_type *params = &av1_sgr_params[sgrproj_info->ep]; if (params->r[0] == 0) { @@ -2912,13 +3315,13 @@ sgrproj_info->xqd[1] = aom_read_primitive_refsubexpfin( rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) + + ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_INFO()) + SGRPROJ_PRJ_MIN1; } else if (params->r[1] == 0) { sgrproj_info->xqd[0] = aom_read_primitive_refsubexpfin( rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_STR) + + ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_INFO()) + SGRPROJ_PRJ_MIN0; sgrproj_info->xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - sgrproj_info->xqd[0], SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1); @@ -2926,12 +3329,12 @@ sgrproj_info->xqd[0] = aom_read_primitive_refsubexpfin( rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_STR) + + ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, ACCT_INFO()) + SGRPROJ_PRJ_MIN0; sgrproj_info->xqd[1] = aom_read_primitive_refsubexpfin( rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) + + ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_INFO()) + SGRPROJ_PRJ_MIN1; } @@ -2948,11 +3351,11 @@ assert(num_classes <= WIENERNS_MAX_CLASSES); #if CONFIG_LR_MERGE_COEFFS for (int c_id = 0; c_id < num_classes; ++c_id) { - const int exact_match = - aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, 2, ACCT_STR); + const int exact_match = aom_read_symbol(rb, xd->tile_ctx->merged_param_cdf, + 2, ACCT_INFO("exact_match")); int ref; for (ref = 0; ref < bank->bank_size_for_class[c_id] - 1; ++ref) { - if (aom_read_literal(rb, 1, ACCT_STR)) break; + if (aom_read_literal(rb, 1, ACCT_INFO("bank"))) break; } if (exact_match) { copy_nsfilter_taps_for_class( @@ -2967,7 +3370,12 @@ (void)xd; #endif // CONFIG_LR_MERGE_COEFFS const WienernsFilterParameters *nsfilter_params = +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + get_wienerns_parameters(xd->current_base_qindex, is_uv, + wienerns_info->is_cross_filter); +#else get_wienerns_parameters(xd->current_base_qindex, is_uv); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER const int beg_feat = 0; const int end_feat = nsfilter_params->ncoeffs; const int(*wienerns_coeffs)[WIENERNS_COEFCFG_LEN] = nsfilter_params->coeffs; @@ -2997,28 +3405,33 @@ const int rodd = is_uv ? 0 : (end_feat & 1); for (int i = beg_feat; i < end_feat; ++i) { if (rodd && i == end_feat - 5 && i != beg_feat) { - reduce_step[0] = aom_read_symbol( - rb, xd->tile_ctx->wienerns_reduce_cdf[0], 2, ACCT_STR); + reduce_step[0] = + aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[0], 2, + ACCT_INFO("wienerns_reduce_cdf0")); if (reduce_step[0]) break; } if (!rodd && i == end_feat - 4 && i != beg_feat) { - reduce_step[1] = aom_read_symbol( - rb, xd->tile_ctx->wienerns_reduce_cdf[1], 2, ACCT_STR); + reduce_step[1] = + aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[1], 2, + ACCT_INFO("wienerns_reduce_cdf1")); if (reduce_step[1]) break; } if (rodd && i == end_feat - 3 && i != beg_feat) { - reduce_step[2] = aom_read_symbol( - rb, xd->tile_ctx->wienerns_reduce_cdf[2], 2, ACCT_STR); + reduce_step[2] = + aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[2], 2, + ACCT_INFO("wienerns_reduce_cdf2")); if (reduce_step[2]) break; } if (!rodd && i == end_feat - 2 && i != beg_feat) { - reduce_step[3] = aom_read_symbol( - rb, xd->tile_ctx->wienerns_reduce_cdf[3], 2, ACCT_STR); + reduce_step[3] = + aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[3], 2, + ACCT_INFO("wienerns_reduce_cdf3")); if (reduce_step[3]) break; } if (rodd && i == end_feat - 1 && i != beg_feat) { - reduce_step[4] = aom_read_symbol( - rb, xd->tile_ctx->wienerns_reduce_cdf[4], 2, ACCT_STR); + reduce_step[4] = + aom_read_symbol(rb, xd->tile_ctx->wienerns_reduce_cdf[4], 2, + ACCT_INFO("wienerns_reduce_cdf4")); if (reduce_step[4]) break; } #if ENABLE_LR_4PART_CODE @@ -3029,7 +3442,8 @@ wienerns_coeffs[i - beg_feat][WIENERNS_MIN_ID], xd->tile_ctx->wienerns_4part_cdf [wienerns_coeffs[i - beg_feat][WIENERNS_PAR_ID]], - wienerns_coeffs[i - beg_feat][WIENERNS_BIT_ID], ACCT_STR) + + wienerns_coeffs[i - beg_feat][WIENERNS_BIT_ID], + ACCT_INFO("wienerns_info_nsfilter")) + wienerns_coeffs[i - beg_feat][WIENERNS_MIN_ID]; #else wienerns_info_nsfilter[i] = @@ -3038,7 +3452,7 @@ wienerns_coeffs[i - beg_feat][WIENERNS_PAR_ID], ref_wienerns_info_nsfilter[i] - wienerns_coeffs[i - beg_feat][WIENERNS_MIN_ID], - ACCT_STR) + + ACCT_INFO("wienerns_info_nsfilter")) + wienerns_coeffs[i - beg_feat][WIENERNS_MIN_ID]; #endif // ENABLE_LR_4PART_CODE } @@ -3048,17 +3462,39 @@ #endif // CONFIG_WIENER_NONSEP static AOM_INLINE void loop_restoration_read_sb_coeffs( +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane, +#else const AV1_COMMON *const cm, MACROBLOCKD *xd, aom_reader *const r, int plane, +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER int runit_idx) { +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + RestorationInfo *rsi = &cm->rst_info[plane]; +#else const RestorationInfo *rsi = &cm->rst_info[plane]; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER RestorationUnitInfo *rui = &rsi->unit_info[runit_idx]; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + assert(rsi->frame_restoration_type != RESTORE_NONE || + rsi->frame_cross_restoration_type != RESTORE_NONE); + rui->restoration_type = RESTORE_NONE; + rui->cross_restoration_type = RESTORE_NONE; +#else assert(rsi->frame_restoration_type != RESTORE_NONE); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER assert(!cm->features.all_lossless); const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN; #if CONFIG_WIENER_NONSEP rui->wienerns_info.num_classes = rsi->num_filter_classes; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + rui->wienerns_cross_info.num_classes = + xd->wienerns_cross_info[plane].filter[0].num_classes; + + rui->wienerns_info.is_cross_filter = 0; + rui->wienerns_cross_info.is_cross_filter = 1; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #endif // CONFIG_WIENER_NONSEP if (rsi->frame_restoration_type == RESTORE_SWITCHABLE) { @@ -3067,16 +3503,17 @@ for (int re = 0; re <= cm->features.lr_last_switchable_ndx[plane]; re++) { if (cm->features.lr_tools_disable_mask[plane] & (1 << re)) continue; const int found = aom_read_symbol( - r, xd->tile_ctx->switchable_flex_restore_cdf[re][plane], 2, ACCT_STR); + r, xd->tile_ctx->switchable_flex_restore_cdf[re][plane], 2, + ACCT_INFO("found")); if (found) { rui->restoration_type = re; break; } } #else - rui->restoration_type = - aom_read_symbol(r, xd->tile_ctx->switchable_restore_cdf, - RESTORE_SWITCHABLE_TYPES, ACCT_STR); + rui->restoration_type = aom_read_symbol( + r, xd->tile_ctx->switchable_restore_cdf, RESTORE_SWITCHABLE_TYPES, + ACCT_INFO("restoration_type")); #endif // CONFIG_LR_FLEX_SYNTAX switch (rui->restoration_type) { case RESTORE_WIENER: @@ -3101,7 +3538,8 @@ default: assert(rui->restoration_type == RESTORE_NONE); break; } } else if (rsi->frame_restoration_type == RESTORE_WIENER) { - if (aom_read_symbol(r, xd->tile_ctx->wiener_restore_cdf, 2, ACCT_STR)) { + if (aom_read_symbol(r, xd->tile_ctx->wiener_restore_cdf, 2, + ACCT_INFO("wiener_restore_cdf"))) { rui->restoration_type = RESTORE_WIENER; read_wiener_filter(xd, wiener_win, &rui->wiener_info, &xd->wiener_info[plane], r); @@ -3109,7 +3547,8 @@ rui->restoration_type = RESTORE_NONE; } } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) { - if (aom_read_symbol(r, xd->tile_ctx->sgrproj_restore_cdf, 2, ACCT_STR)) { + if (aom_read_symbol(r, xd->tile_ctx->sgrproj_restore_cdf, 2, + ACCT_INFO("sgrproj_restore_cdf"))) { rui->restoration_type = RESTORE_SGRPROJ; read_sgrproj_filter(xd, &rui->sgrproj_info, &xd->sgrproj_info[plane], r); } else { @@ -3117,7 +3556,8 @@ } #if CONFIG_WIENER_NONSEP } else if (rsi->frame_restoration_type == RESTORE_WIENER_NONSEP) { - if (aom_read_symbol(r, xd->tile_ctx->wienerns_restore_cdf, 2, ACCT_STR)) { + if (aom_read_symbol(r, xd->tile_ctx->wienerns_restore_cdf, 2, + ACCT_INFO("wienerns_restore_cdf"))) { rui->restoration_type = RESTORE_WIENER_NONSEP; read_wienerns_filter(xd, plane != AOM_PLANE_Y, &rui->wienerns_info, &xd->wienerns_info[plane], r); @@ -3127,7 +3567,8 @@ #endif // CONFIG_WIENER_NONSEP #if CONFIG_PC_WIENER } else if (rsi->frame_restoration_type == RESTORE_PC_WIENER) { - if (aom_read_symbol(r, xd->tile_ctx->pc_wiener_restore_cdf, 2, ACCT_STR)) { + if (aom_read_symbol(r, xd->tile_ctx->pc_wiener_restore_cdf, 2, + ACCT_INFO("pc_wiener_restore_cdf"))) { rui->restoration_type = RESTORE_PC_WIENER; // No side-information for now. } else { @@ -3135,6 +3576,18 @@ } #endif // CONFIG_PC_WIENER } + +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (rsi->frame_cross_restoration_type == RESTORE_WIENER_NONSEP) { + if (aom_read_symbol(r, xd->tile_ctx->wienerns_restore_cdf, 2, + ACCT_INFO())) { + rui->cross_restoration_type = RESTORE_WIENER_NONSEP; + read_wienerns_filter(xd, plane != AOM_PLANE_Y, &rui->wienerns_cross_info, + &xd->wienerns_cross_info[plane], r); + } + } +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + #if CONFIG_LR_FLEX_SYNTAX assert(((cm->features.lr_tools_disable_mask[plane] >> rui->restoration_type) & 1) == 0); @@ -3790,6 +4243,11 @@ cdef_info->cdef_strengths[0] == 0 && cdef_info->cdef_uv_strengths[0] == 0; const int no_restoration = +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + rst_info[0].frame_cross_restoration_type == RESTORE_NONE && + rst_info[1].frame_cross_restoration_type == RESTORE_NONE && + rst_info[2].frame_cross_restoration_type == RESTORE_NONE && +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER rst_info[0].frame_restoration_type == RESTORE_NONE && rst_info[1].frame_restoration_type == RESTORE_NONE && rst_info[2].frame_restoration_type == RESTORE_NONE; @@ -4065,7 +4523,15 @@ for (int plane = 0; plane < num_planes; ++plane) { dcb->dqcoeff_block[plane] = cb_buffer->dqcoeff[plane]; +#if CONFIG_INSPECTION + dcb->dqcoeff_block_copy[plane] = cb_buffer->dqcoeff_copy[plane]; + dcb->qcoeff_block[plane] = cb_buffer->qcoeff[plane]; + dcb->dequant_values[plane] = cb_buffer->dequant_values[plane]; +#endif // CONFIG_INSPECTION dcb->eob_data[plane] = cb_buffer->eob_data[plane]; +#if CONFIG_ATC_DCTX_ALIGNED + dcb->bob_data[plane] = cb_buffer->bob_data[plane]; +#endif // CONFIG_ATC_DCTX_ALIGNED dcb->cb_offset[plane] = 0; dcb->txb_offset[plane] = 0; } @@ -4344,7 +4810,7 @@ av1_zero_left_context(xd); #if CONFIG_REF_MV_BANK av1_zero(xd->ref_mv_bank); -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT xd->ref_mv_bank_pt = &td->ref_mv_bank; #endif #endif // CONFIG_REF_MV_BANK @@ -4366,9 +4832,9 @@ // for MV referencing during decoding the tile. // xd->ref_mv_bank is updated as decoding goes. xd->ref_mv_bank.rmb_sb_hits = 0; -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT td->ref_mv_bank = xd->ref_mv_bank; -#endif // !CONFIG_C043_MVP_IMPROVEMENTS +#endif // !CONFIG_MVP_IMPROVEMENT #endif // CONFIG_REF_MV_BANK #if CONFIG_WARP_REF_LIST @@ -4865,7 +5331,7 @@ av1_zero_left_context(xd); #if CONFIG_REF_MV_BANK av1_zero(xd->ref_mv_bank); -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT xd->ref_mv_bank_pt = &td->ref_mv_bank; #endif #endif // CONFIG_REF_MV_BANK @@ -4885,9 +5351,9 @@ #if CONFIG_REF_MV_BANK xd->ref_mv_bank.rmb_sb_hits = 0; -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT td->ref_mv_bank = xd->ref_mv_bank; -#endif // !CONFIG_C043_MVP_IMPROVEMENTS +#endif // !CONFIG_MVP_IMPROVEMENT #endif // CONFIG_REF_MV_BANK #if CONFIG_WARP_REF_LIST @@ -6003,7 +6469,7 @@ if (aom_rb_read_bit(rb)) { seq_params->lr_tools_disable_mask[1] = DEF_UV_LR_TOOLS_DISABLE_MASK; for (int i = 1; i < RESTORE_SWITCHABLE_TYPES; ++i) { - if (DEF_UV_LR_TOOLS_DISABLE_MASK | (1 << i)) continue; + if (DEF_UV_LR_TOOLS_DISABLE_MASK & (1 << i)) continue; seq_params->lr_tools_disable_mask[1] |= (aom_rb_read_bit(rb) << i); } } else { @@ -6016,10 +6482,15 @@ void av1_read_sequence_header_beyond_av1(struct aom_read_bit_buffer *rb, SequenceHeader *seq_params) { + // printf("print sps\n"); #if CONFIG_REF_MV_BANK seq_params->enable_refmvbank = aom_rb_read_bit(rb); #endif // CONFIG_REF_MV_BANK seq_params->explicit_ref_frame_map = aom_rb_read_bit(rb); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // 0 : use show_existing_frame, 1: use implicit derivation + seq_params->enable_frame_output_order = aom_rb_read_bit(rb); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT // A bit is sent here to indicate if the max number of references is 7. If // this bit is 0, then two more bits are sent to indicate the exact number // of references allowed (range: 3 to 6). @@ -6048,6 +6519,12 @@ #if CONFIG_BAWP seq_params->enable_bawp = aom_rb_read_bit(rb); #endif // CONFIG_BAWP +#if CONFIG_CWP + seq_params->enable_cwp = aom_rb_read_bit(rb); +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + seq_params->enable_imp_msk_bld = aom_rb_read_bit(rb); +#endif // CONFIG_D071_IMP_MSK_BLD seq_params->enable_fsc = aom_rb_read_bit(rb); #if CONFIG_CCSO seq_params->enable_ccso = aom_rb_read_bit(rb); @@ -6058,6 +6535,9 @@ #if CONFIG_ORIP seq_params->enable_orip = aom_rb_read_bit(rb); #endif +#if CONFIG_IDIF + seq_params->enable_idif = aom_rb_read_bit(rb); +#endif // CONFIG_IDIF #if CONFIG_OPTFLOW_REFINEMENT seq_params->enable_opfl_refine = seq_params->order_hint_info.enable_order_hint ? aom_rb_read_literal(rb, 2) @@ -6067,6 +6547,10 @@ #if CONFIG_ADAPTIVE_MVD seq_params->enable_adaptive_mvd = aom_rb_read_bit(rb); #endif // CONFIG_ADAPTIVE_MVD + +#if CONFIG_REFINEMV + seq_params->enable_refinemv = aom_rb_read_bit(rb); +#endif // CONFIG_REFINEMV #if CONFIG_FLEX_MVRES seq_params->enable_flex_mvres = aom_rb_read_bit(rb); #endif // CONFIG_FLEX_MVRES @@ -6081,6 +6565,13 @@ #if CONFIG_EXT_RECUR_PARTITIONS seq_params->enable_ext_partitions = aom_rb_read_bit(rb); #endif // CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_IMPROVED_GLOBAL_MOTION + if (seq_params->reduced_still_picture_hdr) { + seq_params->enable_global_motion = 0; + } else { + seq_params->enable_global_motion = aom_rb_read_bit(rb); + } +#endif // CONFIG_IMPROVED_GLOBAL_MOTION } static int read_global_motion_params(WarpedMotionParams *params, @@ -6088,16 +6579,27 @@ struct aom_read_bit_buffer *rb, #if !CONFIG_FLEX_MVRES int allow_hp) { +#if CONFIG_IMPROVED_GLOBAL_MOTION + (void)allow_hp; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION #else MvSubpelPrecision precision) { const int precision_loss = get_gm_precision_loss(precision); -#endif +#if CONFIG_IMPROVED_GLOBAL_MOTION + (void)precision_loss; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION +#endif // !CONFIG_FLEX_MVRES TransformationType type = aom_rb_read_bit(rb); if (type != IDENTITY) { - if (aom_rb_read_bit(rb)) + if (aom_rb_read_bit(rb)) { type = ROTZOOM; - else + } else { +#if CONFIG_IMPROVED_GLOBAL_MOTION + type = AFFINE; +#else type = aom_rb_read_bit(rb) ? TRANSLATION : AFFINE; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + } } *params = default_warp_params; @@ -6133,6 +6635,11 @@ } if (type >= TRANSLATION) { +#if CONFIG_IMPROVED_GLOBAL_MOTION + const int trans_dec_factor = GM_TRANS_DECODE_FACTOR; + const int trans_prec_diff = GM_TRANS_PREC_DIFF; + const int trans_max = GM_TRANS_MAX; +#else const int trans_bits = (type == TRANSLATION) #if CONFIG_FLEX_MVRES ? GM_ABS_TRANS_ONLY_BITS - precision_loss @@ -6155,13 +6662,15 @@ ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp #endif : GM_TRANS_PREC_DIFF; + const int trans_max = (1 << trans_bits); +#endif // CONFIG_IMPROVED_GLOBAL_MOTION params->wmmat[0] = aom_rb_read_signed_primitive_refsubexpfin( - rb, (1 << trans_bits) + 1, SUBEXPFIN_K, + rb, trans_max + 1, SUBEXPFIN_K, (ref_params->wmmat[0] >> trans_prec_diff)) * trans_dec_factor; params->wmmat[1] = aom_rb_read_signed_primitive_refsubexpfin( - rb, (1 << trans_bits) + 1, SUBEXPFIN_K, + rb, trans_max + 1, SUBEXPFIN_K, (ref_params->wmmat[1] >> trans_prec_diff)) * trans_dec_factor; } @@ -6179,10 +6688,86 @@ static AOM_INLINE void read_global_motion(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { +#if CONFIG_IMPROVED_GLOBAL_MOTION + const SequenceHeader *const seq_params = &cm->seq_params; + int num_total_refs = cm->ref_frames_info.num_total_refs; + bool use_global_motion = false; + if (seq_params->enable_global_motion) { + use_global_motion = aom_rb_read_bit(rb); + } + if (!use_global_motion) { + for (int frame = 0; frame < INTER_REFS_PER_FRAME; ++frame) { + cm->global_motion[frame] = default_warp_params; + cm->cur_frame->global_motion[frame] = default_warp_params; + } + return; + } + + int our_ref = aom_rb_read_primitive_quniform(rb, num_total_refs + 1); + if (our_ref == num_total_refs) { + // Special case: Use IDENTITY model + cm->base_global_motion_model = default_warp_params; + cm->base_global_motion_distance = 1; + } else { + RefCntBuffer *buf = get_ref_frame_buf(cm, our_ref); + assert(buf); + int their_num_refs = buf->num_ref_frames; + if (their_num_refs == 0) { + // Special case: if an intra/key frame is used as a ref, use an + // IDENTITY model + cm->base_global_motion_model = default_warp_params; + cm->base_global_motion_distance = 1; + } else { + int their_ref = aom_rb_read_primitive_quniform(rb, their_num_refs); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int our_ref_order_hint = buf->display_order_hint; + const int their_ref_order_hint = buf->ref_display_order_hint[their_ref]; +#else + const int our_ref_order_hint = buf->order_hint; + const int their_ref_order_hint = buf->ref_order_hints[their_ref]; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->base_global_motion_model = buf->global_motion[their_ref]; + cm->base_global_motion_distance = + get_relative_dist(&seq_params->order_hint_info, our_ref_order_hint, + their_ref_order_hint); + } + } +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + for (int frame = 0; frame < cm->ref_frames_info.num_total_refs; ++frame) { +#if CONFIG_IMPROVED_GLOBAL_MOTION + int temporal_distance; + if (seq_params->order_hint_info.enable_order_hint) { + const RefCntBuffer *const ref_buf = get_ref_frame_buf(cm, frame); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref_order_hint = ref_buf->display_order_hint; + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else + const int ref_order_hint = ref_buf->order_hint; + const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + temporal_distance = get_relative_dist(&seq_params->order_hint_info, + cur_order_hint, ref_order_hint); + } else { + temporal_distance = 1; + } + + if (temporal_distance == 0) { + // Don't code global motion for frames at the same temporal instant + cm->global_motion[frame] = default_warp_params; + continue; + } + + WarpedMotionParams ref_params_; + av1_scale_warp_model(&cm->base_global_motion_model, + cm->base_global_motion_distance, &ref_params_, + temporal_distance); + WarpedMotionParams *ref_params = &ref_params_; +#else const WarpedMotionParams *ref_params = cm->prev_frame ? &cm->prev_frame->global_motion[frame] : &default_warp_params; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION int good_params = #if !CONFIG_FLEX_MVRES read_global_motion_params(&cm->global_motion[frame], ref_params, rb, @@ -6291,6 +6876,10 @@ continue; } frame_bufs[i].order_hint = 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + frame_bufs[i].display_order_hint = 0; + av1_zero(frame_bufs[i].ref_display_order_hint); +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC av1_zero(frame_bufs[i].ref_order_hints); } av1_zero_unused_internal_frame_buffers(&cm->buffer_pool->int_frame_buffers); @@ -6336,6 +6925,33 @@ return cur_disp_order_hint; } +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC +static INLINE int get_ref_frame_disp_order_hint(AV1_COMMON *const cm, + const RefCntBuffer *const buf) { + // Find the reference frame with the largest order_hint + int max_disp_order_hint = 0; + for (int map_idx = 0; map_idx < INTER_REFS_PER_FRAME; map_idx++) { + if ((int)buf->ref_display_order_hint[map_idx] > max_disp_order_hint) + max_disp_order_hint = buf->ref_display_order_hint[map_idx]; + } + + // If the order_hint is above the threshold distance of 35 frames (largest + // possible lag_in_frames) from the found reference frame, we assume it was + // modified using: + // order_hint = display_order_hint % display_order_hint_factor + // Here, the actual display_order_hint is recovered. + const int display_order_hint_factor = + 1 << (cm->seq_params.order_hint_info.order_hint_bits_minus_1 + 1); + int disp_order_hint = buf->order_hint; + while (abs(max_disp_order_hint - disp_order_hint) > 35) { + if (disp_order_hint > max_disp_order_hint) return disp_order_hint; + + disp_order_hint += display_order_hint_factor; + } + return disp_order_hint; +} +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + // On success, returns 0. On failure, calls aom_internal_error and does not // return. static int read_uncompressed_header(AV1Decoder *pbi, @@ -6507,11 +7123,7 @@ } } features->disable_cdf_update = aom_rb_read_bit(rb); -#if DS_FRAME_LEVEL - if (current_frame->frame_type == KEY_FRAME) { - features->ds_filter_type = aom_rb_read_literal(rb, 2); - } -#endif // DS_FRAME_LEVEL + if (seq_params->force_screen_content_tools == 2) { features->allow_screen_content_tools = aom_rb_read_bit(rb); } else { @@ -6707,7 +7319,11 @@ buf->order_hint = order_hint; // TODO(kslu) This is a workaround for error resilient mode. Make // it more consistent with get_disp_order_hint(). +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + buf->display_order_hint = get_ref_frame_disp_order_hint(cm, buf); +#else buf->display_order_hint = order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC } } } @@ -6736,17 +7352,21 @@ features->allow_global_intrabc = aom_rb_read_bit(rb); features->allow_local_intrabc = features->allow_global_intrabc ? aom_rb_read_bit(rb) : 1; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT features->max_drl_bits = aom_rb_read_primitive_quniform( rb, MAX_MAX_DRL_BITS - MIN_MAX_DRL_BITS + 1) + MIN_MAX_DRL_BITS; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT } #endif // CONFIG_IBC_SR_EXT features->allow_ref_frame_mvs = 0; cm->prev_frame = NULL; + +#if CONFIG_IMPROVED_GLOBAL_MOTION + cm->cur_frame->num_ref_frames = 0; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION } else { features->allow_ref_frame_mvs = 0; #if CONFIG_TIP @@ -6763,15 +7383,19 @@ features->allow_global_intrabc = aom_rb_read_bit(rb); features->allow_local_intrabc = features->allow_global_intrabc ? aom_rb_read_bit(rb) : 1; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT features->max_drl_bits = aom_rb_read_primitive_quniform( rb, MAX_MAX_DRL_BITS - MIN_MAX_DRL_BITS + 1) + MIN_MAX_DRL_BITS; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT } #endif // CONFIG_IBC_SR_EXT +#if CONFIG_IMPROVED_GLOBAL_MOTION + cm->cur_frame->num_ref_frames = 0; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + } else if (pbi->need_resync != 1) { /* Skip if need resync */ // Implicitly derive the reference mapping RefFrameMapPair ref_frame_map_pairs[REF_FRAMES]; @@ -6852,6 +7476,9 @@ } av1_get_past_future_cur_ref_lists(cm, scores); } +#if CONFIG_IMPROVED_GLOBAL_MOTION + cm->cur_frame->num_ref_frames = cm->ref_frames_info.num_total_refs; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION if (!features->error_resilient_mode && frame_size_override_flag) { setup_frame_size_with_refs(cm, rb); @@ -6975,7 +7602,7 @@ if (!(current_frame->frame_type == INTRA_ONLY_FRAME) && pbi->need_resync != 1) { - for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { + for (int i = 0; i < cm->ref_frames_info.num_total_refs; ++i) { const RefCntBuffer *const ref_buf = get_ref_frame_buf(cm, i); if (!ref_buf) continue; struct scale_factors *const ref_scale_factors = @@ -7066,6 +7693,11 @@ cm->rst_info[0].frame_restoration_type = RESTORE_NONE; cm->rst_info[1].frame_restoration_type = RESTORE_NONE; cm->rst_info[2].frame_restoration_type = RESTORE_NONE; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE; +#endif } #if CONFIG_TIP @@ -7182,6 +7814,11 @@ cm->rst_info[0].frame_restoration_type = RESTORE_NONE; cm->rst_info[1].frame_restoration_type = RESTORE_NONE; cm->rst_info[2].frame_restoration_type = RESTORE_NONE; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE; +#endif } setup_loopfilter(cm, rb); @@ -7225,6 +7862,21 @@ features->enable_bawp = 0; #endif // CONFIG_BAWP +#if CONFIG_CWP + features->enable_cwp = seq_params->enable_cwp; +#endif // CONFIG_CWP +#if CONFIG_CWG_D067_IMPROVED_WARP + features->allow_warpmv_mode = 0; + if (!frame_is_intra_only(cm) && + (features->enabled_motion_modes & (1 << WARP_DELTA)) != 0) { + features->allow_warpmv_mode = aom_rb_read_bit(rb); + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + +#if CONFIG_D071_IMP_MSK_BLD + features->enable_imp_msk_bld = seq_params->enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD + features->reduced_tx_set_used = aom_rb_read_bit(rb); if (features->allow_ref_frame_mvs && !frame_might_allow_ref_frame_mvs(cm)) { @@ -7385,10 +8037,10 @@ cm->mi_params.setup_mi(&cm->mi_params); if (cm->features.allow_ref_frame_mvs) av1_setup_motion_field(cm); -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT else av1_setup_ref_frame_sides(cm); -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_PEF if (cm->seq_params.enable_pef && cm->features.allow_pef) { @@ -7426,7 +8078,13 @@ if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE || cm->rst_info[1].frame_restoration_type != RESTORE_NONE || - cm->rst_info[2].frame_restoration_type != RESTORE_NONE) { + cm->rst_info[2].frame_restoration_type != RESTORE_NONE +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + || cm->rst_info[0].frame_cross_restoration_type != RESTORE_NONE || + cm->rst_info[1].frame_cross_restoration_type != RESTORE_NONE || + cm->rst_info[2].frame_cross_restoration_type != RESTORE_NONE +#endif + ) { av1_alloc_restoration_buffers(cm); } const int buf_size = MC_TEMP_BUF_PELS << 1; @@ -7450,7 +8108,16 @@ #if CONFIG_LPF_MASK av1_loop_filter_frame_init(cm, 0, num_planes); #endif - +#if CONFIG_INSPECTION + aom_realloc_frame_buffer( + &cm->predicted_pixels, cm->width, cm->height, + cm->seq_params.subsampling_x, cm->seq_params.subsampling_y, + AOM_DEC_BORDER_IN_PIXELS, cm->features.byte_alignment, NULL, NULL, NULL); + aom_realloc_frame_buffer( + &cm->prefiltered_pixels, cm->width, cm->height, + cm->seq_params.subsampling_x, cm->seq_params.subsampling_y, + AOM_DEC_BORDER_IN_PIXELS, cm->features.byte_alignment, NULL, NULL, NULL); +#endif // CONFIG_INSPECTION if (pbi->max_threads > 1 && !(tiles->large_scale && !pbi->ext_tile_debug) && pbi->row_mt) *p_data_end = @@ -7466,6 +8133,11 @@ set_planes_to_neutral_grey(&cm->seq_params, xd->cur_buf, 1); } +#if CONFIG_INSPECTION + memcpy(cm->prefiltered_pixels.buffer_alloc, cm->cur_frame->buf.buffer_alloc, + cm->prefiltered_pixels.frame_size); +#endif // CONFIG_INSPECTION + if (end_tile != tiles->rows * tiles->cols - 1) { return; } @@ -7539,6 +8211,11 @@ #endif const int do_loop_restoration = +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + cm->rst_info[0].frame_cross_restoration_type != RESTORE_NONE || + cm->rst_info[1].frame_cross_restoration_type != RESTORE_NONE || + cm->rst_info[2].frame_cross_restoration_type != RESTORE_NONE || +#endif cm->rst_info[0].frame_restoration_type != RESTORE_NONE || cm->rst_info[1].frame_restoration_type != RESTORE_NONE || cm->rst_info[2].frame_restoration_type != RESTORE_NONE; @@ -7633,7 +8310,7 @@ if (pbi->inspect_cb != NULL) { (*pbi->inspect_cb)(pbi, pbi->inspect_ctx); } -#endif +#endif // CONFIG_INSPECTION // Non frame parallel update frame context here. if (!tiles->large_scale) {
diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c index 7467a62..6c8ce50 100644 --- a/av1/decoder/decodemv.c +++ b/av1/decoder/decodemv.c
@@ -17,6 +17,9 @@ #include "av1/common/cdef_block.h" #include "av1/common/cfl.h" #include "av1/common/common.h" +#if CONFIG_ATC_DCTX_ALIGNED +#include "av1/common/txb_common.h" +#endif // CONFIG_ATC_DCTX_ALIGNED #include "av1/common/entropy.h" #include "av1/common/entropymode.h" #include "av1/common/entropymv.h" @@ -32,13 +35,11 @@ #include "aom_dsp/aom_dsp_common.h" -#define ACCT_STR __func__ - #define DEC_MISMATCH_DEBUG 0 #if !CONFIG_AIMC static PREDICTION_MODE read_intra_mode(aom_reader *r, aom_cdf_prob *cdf) { - return (PREDICTION_MODE)aom_read_symbol(r, cdf, INTRA_MODES, ACCT_STR); + return (PREDICTION_MODE)aom_read_symbol(r, cdf, INTRA_MODES, ACCT_INFO()); } #endif // !CONFIG_AIMC @@ -83,8 +84,8 @@ get_mi_grid_idx(mi_params, xd->mi_row & first_block_mask, xd->mi_col & first_block_mask); MB_MODE_INFO *const mbmi = mi_params->mi_grid_base[grid_idx]; - mbmi->cdef_strength = - aom_read_literal(r, cm->cdef_info.cdef_bits, ACCT_STR); + mbmi->cdef_strength = aom_read_literal(r, cm->cdef_info.cdef_bits, + ACCT_INFO("cdef_strength")); xd->cdef_transmitted[index] = true; } } @@ -105,7 +106,7 @@ if (!(mi_row & blk_size_y) && !(mi_col & blk_size_x) && cm->ccso_info.ccso_enable[0]) { const int blk_idc = - aom_read_symbol(r, xd->tile_ctx->ccso_cdf[0], 2, ACCT_STR); + aom_read_symbol(r, xd->tile_ctx->ccso_cdf[0], 2, ACCT_INFO("blk_idc")); xd->ccso_blk_y = blk_idc; mi_params ->mi_grid_base[(mi_row & ~blk_size_y) * mi_params->mi_stride + @@ -118,10 +119,10 @@ #if CONFIG_CCSO_EXT cm->ccso_info.ccso_enable[1]) { const int blk_idc = - aom_read_symbol(r, xd->tile_ctx->ccso_cdf[1], 2, ACCT_STR); + aom_read_symbol(r, xd->tile_ctx->ccso_cdf[1], 2, ACCT_INFO("blk_idc")); #else cm->ccso_info.ccso_enable[0]) { - const int blk_idc = aom_read_bit(r, ACCT_STR); + const int blk_idc = aom_read_bit(r, ACCT_INFO("blk_idc")); #endif xd->ccso_blk_u = blk_idc; mi_params @@ -134,10 +135,10 @@ #if CONFIG_CCSO_EXT cm->ccso_info.ccso_enable[2]) { const int blk_idc = - aom_read_symbol(r, xd->tile_ctx->ccso_cdf[2], 2, ACCT_STR); + aom_read_symbol(r, xd->tile_ctx->ccso_cdf[2], 2, ACCT_INFO("blk_idc")); #else cm->ccso_info.ccso_enable[1]) { - const int blk_idc = aom_read_bit(r, ACCT_STR); + const int blk_idc = aom_read_bit(r, ACCT_INFO("blk_idc")); #endif xd->ccso_blk_v = blk_idc; mi_params @@ -159,17 +160,18 @@ if ((bsize != cm->sb_size || mbmi->skip_txfm[xd->tree_type == CHROMA_PART] == 0) && read_delta_q_flag) { - abs = aom_read_symbol(r, ec_ctx->delta_q_cdf, DELTA_Q_PROBS + 1, ACCT_STR); + abs = aom_read_symbol(r, ec_ctx->delta_q_cdf, DELTA_Q_PROBS + 1, + ACCT_INFO("abs")); const int smallval = (abs < DELTA_Q_SMALL); if (!smallval) { - const int rem_bits = aom_read_literal(r, 3, ACCT_STR) + 1; + const int rem_bits = aom_read_literal(r, 3, ACCT_INFO("rem_bits")) + 1; const int thr = (1 << rem_bits) + 1; - abs = aom_read_literal(r, rem_bits, ACCT_STR) + thr; + abs = aom_read_literal(r, rem_bits, ACCT_INFO("abs")) + thr; } if (abs) { - sign = aom_read_bit(r, ACCT_STR); + sign = aom_read_bit(r, ACCT_INFO("sign")); } else { sign = 1; } @@ -190,34 +192,47 @@ const int read_delta_lf_flag = (b_col == 0 && b_row == 0); if ((bsize != cm->sb_size || mbmi->skip_txfm[plane_type] == 0) && read_delta_lf_flag) { - int abs = aom_read_symbol(r, cdf, DELTA_LF_PROBS + 1, ACCT_STR); + int abs = aom_read_symbol(r, cdf, DELTA_LF_PROBS + 1, ACCT_INFO("abs")); const int smallval = (abs < DELTA_LF_SMALL); if (!smallval) { - const int rem_bits = aom_read_literal(r, 3, ACCT_STR) + 1; + const int rem_bits = aom_read_literal(r, 3, ACCT_INFO("rem_bits")) + 1; const int thr = (1 << rem_bits) + 1; - abs = aom_read_literal(r, rem_bits, ACCT_STR) + thr; + abs = aom_read_literal(r, rem_bits, ACCT_INFO("abs")) + thr; } - const int sign = abs ? aom_read_bit(r, ACCT_STR) : 1; + const int sign = abs ? aom_read_bit(r, ACCT_INFO("sign")) : 1; reduced_delta_lflevel = sign ? -abs : abs; } return reduced_delta_lflevel; } -static uint8_t read_mrl_index(FRAME_CONTEXT *ec_ctx, aom_reader *r) { +static uint8_t read_mrl_index(FRAME_CONTEXT *ec_ctx, aom_reader *r +#if CONFIG_EXT_DIR + , + const MB_MODE_INFO *neighbor0, + const MB_MODE_INFO *neighbor1 +#endif // CONFIG_EXT_DIR +) { +#if CONFIG_EXT_DIR + int ctx = get_mrl_index_ctx(neighbor0, neighbor1); + aom_cdf_prob *mrl_cdf = ec_ctx->mrl_index_cdf[ctx]; const uint8_t mrl_index = - aom_read_symbol(r, ec_ctx->mrl_index_cdf, MRL_LINE_NUMBER, ACCT_STR); + aom_read_symbol(r, mrl_cdf, MRL_LINE_NUMBER, ACCT_INFO()); +#else + const uint8_t mrl_index = + aom_read_symbol(r, ec_ctx->mrl_index_cdf, MRL_LINE_NUMBER, ACCT_INFO()); +#endif // CONFIG_EXT_DIR return mrl_index; } static uint8_t read_fsc_mode(aom_reader *r, aom_cdf_prob *fsc_cdf) { - const uint8_t fsc_mode = aom_read_symbol(r, fsc_cdf, FSC_MODES, ACCT_STR); + const uint8_t fsc_mode = aom_read_symbol(r, fsc_cdf, FSC_MODES, ACCT_INFO()); return fsc_mode; } #if CONFIG_IMPROVED_CFL static uint8_t read_cfl_index(FRAME_CONTEXT *ec_ctx, aom_reader *r) { uint8_t cfl_index = - aom_read_symbol(r, ec_ctx->cfl_index_cdf, CFL_TYPE_COUNT, ACCT_STR); + aom_read_symbol(r, ec_ctx->cfl_index_cdf, CFL_TYPE_COUNT, ACCT_INFO()); return cfl_index; } #endif @@ -229,25 +244,27 @@ PREDICTION_MODE y_mode) { const UV_PREDICTION_MODE uv_mode = aom_read_symbol(r, ec_ctx->uv_mode_cdf[cfl_allowed][y_mode], - UV_INTRA_MODES - !cfl_allowed, ACCT_STR); + UV_INTRA_MODES - !cfl_allowed, ACCT_INFO()); return uv_mode; } #endif // !CONFIG_AIMC static uint8_t read_cfl_alphas(FRAME_CONTEXT *const ec_ctx, aom_reader *r, int8_t *signs_out) { - const int8_t joint_sign = - aom_read_symbol(r, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS, "cfl:signs"); + const int8_t joint_sign = aom_read_symbol( + r, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS, ACCT_INFO("cfl:signs")); uint8_t idx = 0; // Magnitudes are only coded for nonzero values if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) { aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)]; - idx = (uint8_t)aom_read_symbol(r, cdf_u, CFL_ALPHABET_SIZE, "cfl:alpha_u") + idx = (uint8_t)aom_read_symbol(r, cdf_u, CFL_ALPHABET_SIZE, + ACCT_INFO("cfl:alpha_u")) << CFL_ALPHABET_SIZE_LOG2; } if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) { aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)]; - idx += (uint8_t)aom_read_symbol(r, cdf_v, CFL_ALPHABET_SIZE, "cfl:alpha_v"); + idx += (uint8_t)aom_read_symbol(r, cdf_v, CFL_ALPHABET_SIZE, + ACCT_INFO("cfl:alpha_v")); } *signs_out = joint_sign; return idx; @@ -257,7 +274,7 @@ int size_group) { const INTERINTRA_MODE ii_mode = (INTERINTRA_MODE)aom_read_symbol( r, xd->tile_ctx->interintra_mode_cdf[size_group], INTERINTRA_MODES, - ACCT_STR); + ACCT_INFO()); return ii_mode; } @@ -277,8 +294,9 @@ int is_warpmv = 0; if (is_warpmv_mode_allowed(cm, mbmi, bsize)) { const int16_t iswarpmvmode_ctx = inter_warpmv_mode_ctx(cm, xd, mbmi); - is_warpmv = aom_read_symbol( - r, ec_ctx->inter_warp_mode_cdf[iswarpmvmode_ctx], 2, ACCT_STR); + is_warpmv = + aom_read_symbol(r, ec_ctx->inter_warp_mode_cdf[iswarpmvmode_ctx], 2, + ACCT_INFO("is_warpmv")); if (is_warpmv) { return WARPMV; } @@ -287,7 +305,7 @@ return SINGLE_INTER_MODE_START + aom_read_symbol(r, ec_ctx->inter_single_mode_cdf[ismode_ctx], - INTER_SINGLE_MODES, ACCT_STR); + INTER_SINGLE_MODES, ACCT_INFO("inter_single_mode")); } static void read_drl_idx(int max_drl_bits, const int16_t mode_ctx, @@ -295,50 +313,87 @@ MB_MODE_INFO *mbmi, aom_reader *r) { MACROBLOCKD *const xd = &dcb->xd; uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = 0; + mbmi->ref_mv_idx[1] = 0; +#if !CONFIG_SKIP_MODE_ENHANCEMENT + assert(!mbmi->skip_mode); +#endif // CONFIG_SKIP_MODE_ENHANCEMENT + // if (has_second_drl(mbmi)) + if (has_second_drl(mbmi)) { + if (mbmi->mode == NEAR_NEWMV) + max_drl_bits = AOMMIN(max_drl_bits, SEP_COMP_DRL_SIZE); + else + assert(mbmi->mode == NEAR_NEARMV); + } + for (int ref = 0; ref < 1 + has_second_drl(mbmi); ref++) { + for (int idx = 0; idx < max_drl_bits; ++idx) { + const uint16_t *weight = has_second_drl(mbmi) + ? xd->weight[mbmi->ref_frame[ref]] + : xd->weight[ref_frame_type]; + aom_cdf_prob *drl_cdf = +#if CONFIG_SKIP_MODE_ENHANCEMENT + mbmi->skip_mode ? ec_ctx->skip_drl_cdf[AOMMIN(idx, 2)] + : av1_get_drl_cdf(ec_ctx, weight, mode_ctx, idx); +#else + av1_get_drl_cdf(ec_ctx, xd->weight[ref_frame_type], mode_ctx, idx); +#endif // CONFIG_SKIP_MODE_ENHANCEMENT + int drl_idx = aom_read_symbol(r, drl_cdf, 2, ACCT_INFO("drl_idx")); + mbmi->ref_mv_idx[ref] = idx + drl_idx; + if (!drl_idx) break; + } + assert(mbmi->ref_mv_idx[ref] < max_drl_bits + 1); + } +#else mbmi->ref_mv_idx = 0; #if !CONFIG_SKIP_MODE_ENHANCEMENT assert(!mbmi->skip_mode); #endif // CONFIG_SKIP_MODE_ENHANCEMENT for (int idx = 0; idx < max_drl_bits; ++idx) { aom_cdf_prob *drl_cdf = -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT mbmi->skip_mode ? ec_ctx->skip_drl_cdf[AOMMIN(idx, 2)] : av1_get_drl_cdf(ec_ctx, xd->weight[ref_frame_type], mode_ctx, idx); #else av1_get_drl_cdf(ec_ctx, xd->weight[ref_frame_type], mode_ctx, idx); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX - int drl_idx = aom_read_symbol(r, drl_cdf, 2, ACCT_STR); +#endif // CONFIG_SKIP_MODE_ENHANCEMENT + int drl_idx = aom_read_symbol(r, drl_cdf, 2, ACCT_INFO("drl_idx")); mbmi->ref_mv_idx = idx + drl_idx; if (!drl_idx) break; } assert(mbmi->ref_mv_idx < max_drl_bits + 1); +#endif // CONFIG_SEP_COMP_DRL } #if CONFIG_WEDGE_MOD_EXT static int8_t read_wedge_mode(aom_reader *r, FRAME_CONTEXT *ec_ctx, const BLOCK_SIZE bsize) { - int wedge_angle_dir = - aom_read_symbol(r, ec_ctx->wedge_angle_dir_cdf[bsize], 2, ACCT_STR); + int wedge_angle_dir = aom_read_symbol(r, ec_ctx->wedge_angle_dir_cdf[bsize], + 2, ACCT_INFO("wedge_angle_dir")); int wedge_angle = WEDGE_ANGLES; if (wedge_angle_dir == 0) { - wedge_angle = aom_read_symbol(r, ec_ctx->wedge_angle_0_cdf[bsize], - H_WEDGE_ANGLES, ACCT_STR); + wedge_angle = + aom_read_symbol(r, ec_ctx->wedge_angle_0_cdf[bsize], H_WEDGE_ANGLES, + ACCT_INFO("wedge_angle", "wedge_angle_0_cdf")); } else { wedge_angle = - H_WEDGE_ANGLES + aom_read_symbol(r, ec_ctx->wedge_angle_1_cdf[bsize], - H_WEDGE_ANGLES, ACCT_STR); + H_WEDGE_ANGLES + + aom_read_symbol(r, ec_ctx->wedge_angle_1_cdf[bsize], H_WEDGE_ANGLES, + ACCT_INFO("wedge_angle", "wedge_angle_1_cdf")); } int wedge_dist = 0; if ((wedge_angle >= H_WEDGE_ANGLES) || (wedge_angle == WEDGE_90 || wedge_angle == WEDGE_180)) { - wedge_dist = aom_read_symbol(r, ec_ctx->wedge_dist_cdf2[bsize], - NUM_WEDGE_DIST - 1, ACCT_STR) + - 1; + wedge_dist = + aom_read_symbol(r, ec_ctx->wedge_dist_cdf2[bsize], NUM_WEDGE_DIST - 1, + ACCT_INFO("wedge_dist", "wedge_dist_cdf2")) + + 1; } else { assert(wedge_angle < H_WEDGE_ANGLES); - wedge_dist = aom_read_symbol(r, ec_ctx->wedge_dist_cdf[bsize], - NUM_WEDGE_DIST, ACCT_STR); + wedge_dist = + aom_read_symbol(r, ec_ctx->wedge_dist_cdf[bsize], NUM_WEDGE_DIST, + ACCT_INFO("wedge_dist", "wedge_dist_cdf")); } return wedge_angle_dist_2_index[wedge_angle][wedge_dist]; } @@ -356,11 +411,22 @@ int max_idx_bits = mbmi->max_num_warp_candidates - 1; for (int bit_idx = 0; bit_idx < max_idx_bits; ++bit_idx) { aom_cdf_prob *warp_ref_idx_cdf = av1_get_warp_ref_idx_cdf(ec_ctx, bit_idx); - int warp_idx = aom_read_symbol(r, warp_ref_idx_cdf, 2, ACCT_STR); + int warp_idx = + aom_read_symbol(r, warp_ref_idx_cdf, 2, ACCT_INFO("warp_idx")); mbmi->warp_ref_idx = bit_idx + warp_idx; if (!warp_idx) break; } } + +#if CONFIG_CWG_D067_IMPROVED_WARP +static void read_warpmv_with_mvd_flag(FRAME_CONTEXT *ec_ctx, MB_MODE_INFO *mbmi, + aom_reader *r) { + mbmi->warpmv_with_mvd_flag = aom_read_symbol( + r, ec_ctx->warpmv_with_mvd_flag_cdf[mbmi->sb_type[PLANE_TYPE_Y]], 2, + ACCT_INFO("warpmv_with_mvd_flag")); +} +#endif // CONFIG_CWG_D067_IMPROVED_WARP + #endif // CONFIG_WARP_REF_LIST // Read the delta for a single warp parameter // Each delta is coded as a symbol in the range @@ -372,7 +438,7 @@ int coded_value = aom_read_symbol(r, xd->tile_ctx->warp_delta_param_cdf[index_type], - WARP_DELTA_NUM_SYMBOLS, ACCT_STR); + WARP_DELTA_NUM_SYMBOLS, ACCT_INFO()); return (coded_value - WARP_DELTA_CODED_MAX) * WARP_DELTA_STEP; } @@ -414,7 +480,11 @@ // TODO(rachelbarker): Allow signaling warp type? #if CONFIG_WARP_REF_LIST - if (allow_warp_parameter_signaling(mbmi)) { + if (allow_warp_parameter_signaling( +#if CONFIG_CWG_D067_IMPROVED_WARP + cm, +#endif // CONFIG_CWG_D067_IMPROVED_WARP + mbmi)) { #endif // CONFIG_WARP_REF_LIST params->wmtype = ROTZOOM; params->wmmat[2] = base_params.wmmat[2] + read_warp_delta_param(xd, 2, r); @@ -455,8 +525,9 @@ #if CONFIG_WARPMV if (mbmi->mode == WARPMV) { if (allowed_motion_modes & (1 << WARPED_CAUSAL)) { - int use_warped_causal = aom_read_symbol( - r, xd->tile_ctx->warped_causal_warpmv_cdf[bsize], 2, ACCT_STR); + int use_warped_causal = + aom_read_symbol(r, xd->tile_ctx->warped_causal_warpmv_cdf[bsize], 2, + ACCT_INFO("use_warped_causal")); return use_warped_causal ? WARPED_CAUSAL : WARP_DELTA; } return WARP_DELTA; @@ -466,8 +537,9 @@ mbmi->use_wedge_interintra = 0; if (allowed_motion_modes & (1 << INTERINTRA)) { const int bsize_group = size_group_lookup[bsize]; - const int use_interintra = aom_read_symbol( - r, xd->tile_ctx->interintra_cdf[bsize_group], 2, ACCT_STR); + const int use_interintra = + aom_read_symbol(r, xd->tile_ctx->interintra_cdf[bsize_group], 2, + ACCT_INFO("use_interintra")); assert(mbmi->ref_frame[1] == NONE_FRAME); if (use_interintra) { const INTERINTRA_MODE interintra_mode = @@ -478,8 +550,9 @@ mbmi->angle_delta[PLANE_TYPE_UV] = 0; mbmi->filter_intra_mode_info.use_filter_intra = 0; if (av1_is_wedge_used(bsize)) { - mbmi->use_wedge_interintra = aom_read_symbol( - r, xd->tile_ctx->wedge_interintra_cdf[bsize], 2, ACCT_STR); + mbmi->use_wedge_interintra = + aom_read_symbol(r, xd->tile_ctx->wedge_interintra_cdf[bsize], 2, + ACCT_INFO("use_wedge_interintra")); if (mbmi->use_wedge_interintra) { #if CONFIG_WEDGE_MOD_EXT mbmi->interintra_wedge_index = @@ -487,7 +560,8 @@ assert(mbmi->interintra_wedge_index != -1); #else mbmi->interintra_wedge_index = (int8_t)aom_read_symbol( - r, xd->tile_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES, ACCT_STR); + r, xd->tile_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES, + ACCT_INFO("interintra_wedge_index")); #endif } } @@ -496,8 +570,8 @@ } if (allowed_motion_modes & (1 << OBMC_CAUSAL)) { - int use_obmc = - aom_read_symbol(r, xd->tile_ctx->obmc_cdf[bsize], 2, ACCT_STR); + int use_obmc = aom_read_symbol(r, xd->tile_ctx->obmc_cdf[bsize], 2, + ACCT_INFO("use_obmc")); if (use_obmc) { return OBMC_CAUSAL; } @@ -506,8 +580,9 @@ if (allowed_motion_modes & (1 << WARP_EXTEND)) { const int ctx1 = av1_get_warp_extend_ctx1(xd, mbmi); const int ctx2 = av1_get_warp_extend_ctx2(xd, mbmi); - int use_warp_extend = aom_read_symbol( - r, xd->tile_ctx->warp_extend_cdf[ctx1][ctx2], 2, ACCT_STR); + int use_warp_extend = + aom_read_symbol(r, xd->tile_ctx->warp_extend_cdf[ctx1][ctx2], 2, + ACCT_INFO("use_warp_extend")); if (use_warp_extend) { return WARP_EXTEND; } @@ -515,15 +590,16 @@ if (allowed_motion_modes & (1 << WARPED_CAUSAL)) { int use_warped_causal = - aom_read_symbol(r, xd->tile_ctx->warped_causal_cdf[bsize], 2, ACCT_STR); + aom_read_symbol(r, xd->tile_ctx->warped_causal_cdf[bsize], 2, + ACCT_INFO("use_warped_causal")); if (use_warped_causal) { return WARPED_CAUSAL; } } if (allowed_motion_modes & (1 << WARP_DELTA)) { - int use_warp_delta = - aom_read_symbol(r, xd->tile_ctx->warp_delta_cdf[bsize], 2, ACCT_STR); + int use_warp_delta = aom_read_symbol(r, xd->tile_ctx->warp_delta_cdf[bsize], + 2, ACCT_INFO("use_warp_delta")); if (use_warp_delta) { mbmi->motion_mode = WARP_DELTA; #if !CONFIG_WARPMV @@ -569,13 +645,14 @@ if (last_motion_mode_allowed == SIMPLE_TRANSLATION) return SIMPLE_TRANSLATION; if (last_motion_mode_allowed == OBMC_CAUSAL) { - motion_mode = aom_read_symbol( - r, xd->tile_ctx->obmc_cdf[mbmi->sb_type[PLANE_TYPE_Y]], 2, ACCT_STR); + motion_mode = + aom_read_symbol(r, xd->tile_ctx->obmc_cdf[mbmi->sb_type[PLANE_TYPE_Y]], + 2, ACCT_INFO("motion_mode", "obmc_cdf")); return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode); } else { motion_mode = aom_read_symbol( r, xd->tile_ctx->motion_mode_cdf[mbmi->sb_type[PLANE_TYPE_Y]], - MOTION_MODES, ACCT_STR); + MOTION_MODES, ACCT_INFO("motion_mode", "motion_mode_cdf")); return (MOTION_MODE)(SIMPLE_TRANSLATION + motion_mode); } } @@ -593,17 +670,38 @@ : xd->tile_ctx->jmvd_scale_mode_cdf; const int jmvd_scale_cnt = is_joint_amvd_mode ? JOINT_AMVD_SCALE_FACTOR_CNT : JOINT_NEWMV_SCALE_FACTOR_CNT; - const int jmvd_scale_mode = - aom_read_symbol(r, jmvd_scale_mode_cdf, jmvd_scale_cnt, ACCT_STR); + const int jmvd_scale_mode = aom_read_symbol( + r, jmvd_scale_mode_cdf, jmvd_scale_cnt, ACCT_INFO("jmvd_scale_mode")); #else - const int jmvd_scale_mode = - aom_read_symbol(r, xd->tile_ctx->jmvd_scale_mode_cdf, - JOINT_NEWMV_SCALE_FACTOR_CNT, ACCT_STR); + const int jmvd_scale_mode = aom_read_symbol( + r, xd->tile_ctx->jmvd_scale_mode_cdf, JOINT_NEWMV_SCALE_FACTOR_CNT, + ACCT_INFO("jmvd_scale_mode")); #endif // CONFIG_ADAPTIVE_MVD return jmvd_scale_mode; } #endif // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD +#if CONFIG_CWP +// Read index for the weighting factor of compound weighted prediction +static int read_cwp_idx(MACROBLOCKD *xd, aom_reader *r, const AV1_COMMON *cm, + MB_MODE_INFO *const mbmi) { + int8_t cwp_idx = 0; + int bit_cnt = 0; + const int ctx = 0; + for (int idx = 0; idx < MAX_CWP_NUM - 1; ++idx) { + const int tmp_idx = aom_read_symbol( + r, xd->tile_ctx->cwp_idx_cdf[ctx][bit_cnt], 2, ACCT_INFO()); + cwp_idx = idx + tmp_idx; + if (!tmp_idx) break; + ++bit_cnt; + } + assert(cwp_idx <= CWP_MAX); + + // convert index to weight + return get_cwp_coding_idx(cwp_idx, 0, cm, mbmi); +} +#endif // CONFIG_CWP + static PREDICTION_MODE read_inter_compound_mode(MACROBLOCKD *xd, aom_reader *r, #if CONFIG_OPTFLOW_REFINEMENT const AV1_COMMON *cm, @@ -614,17 +712,19 @@ int use_optical_flow = 0; if (cm->features.opfl_refine_type == REFINE_SWITCHABLE && is_opfl_refine_allowed(cm, mbmi)) { - use_optical_flow = - aom_read_symbol(r, xd->tile_ctx->use_optflow_cdf[ctx], 2, ACCT_STR); + use_optical_flow = aom_read_symbol(r, xd->tile_ctx->use_optflow_cdf[ctx], 2, + ACCT_INFO("use_optical_flow")); } #endif // CONFIG_OPTFLOW_REFINEMENT const int mode = #if CONFIG_OPTFLOW_REFINEMENT aom_read_symbol(r, xd->tile_ctx->inter_compound_mode_cdf[ctx], - INTER_COMPOUND_REF_TYPES, ACCT_STR); + INTER_COMPOUND_REF_TYPES, + ACCT_INFO("inter_compound_mode_cdf")); #else aom_read_symbol(r, xd->tile_ctx->inter_compound_mode_cdf[ctx], - INTER_COMPOUND_MODES, ACCT_STR); + INTER_COMPOUND_MODES, + ACCT_INFO("inter_compound_mode_cdf")); #endif // CONFIG_OPTFLOW_REFINEMENT #if CONFIG_OPTFLOW_REFINEMENT if (use_optical_flow) { @@ -668,7 +768,8 @@ struct segmentation *const seg = &cm->seg; struct segmentation_probs *const segp = &ec_ctx->seg; aom_cdf_prob *pred_cdf = segp->spatial_pred_seg_cdf[cdf_num]; - const int coded_id = aom_read_symbol(r, pred_cdf, MAX_SEGMENTS, ACCT_STR); + const int coded_id = + aom_read_symbol(r, pred_cdf, MAX_SEGMENTS, ACCT_INFO("coded_id")); const int segment_id = av1_neg_deinterleave(coded_id, pred, seg->last_active_segid + 1); @@ -789,7 +890,8 @@ FRAME_CONTEXT *ec_ctx = xd->tile_ctx; struct segmentation_probs *const segp = &ec_ctx->seg; aom_cdf_prob *pred_cdf = segp->pred_cdf[ctx]; - mbmi->seg_id_predicted = aom_read_symbol(r, pred_cdf, 2, ACCT_STR); + mbmi->seg_id_predicted = + aom_read_symbol(r, pred_cdf, 2, ACCT_INFO("seg_id_predicted")); if (mbmi->seg_id_predicted) { segment_id = get_predicted_segment_id(cm, mi_offset, x_inside_boundary, y_inside_boundary); @@ -823,8 +925,8 @@ const int ctx = av1_get_skip_mode_context(xd); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - const int skip_mode = - aom_read_symbol(r, ec_ctx->skip_mode_cdfs[ctx], 2, ACCT_STR); + const int skip_mode = aom_read_symbol(r, ec_ctx->skip_mode_cdfs[ctx], 2, + ACCT_INFO("skip_mode")); return skip_mode; } @@ -835,13 +937,13 @@ } else { const int ctx = av1_get_skip_txfm_context(xd); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - const int skip_txfm = - aom_read_symbol(r, ec_ctx->skip_txfm_cdfs[ctx], 2, ACCT_STR); + const int skip_txfm = aom_read_symbol(r, ec_ctx->skip_txfm_cdfs[ctx], 2, + ACCT_INFO("skip_txfm")); return skip_txfm; } } -#if !CONFIG_INDEP_PALETTE_PARSING +#if !CONFIG_PALETTE_IMPROVEMENTS // Merge the sorted list of cached colors(cached_colors[0...n_cached_colors-1]) // and the sorted list of transmitted colors(colors[n_cached_colors...n-1]) into // one single sorted list(colors[...]). @@ -860,27 +962,29 @@ } } } -#endif //! CONFIG_INDEP_PALETTE_PARSING +#endif //! CONFIG_PALETTE_IMPROVEMENTS static void read_palette_colors_y(MACROBLOCKD *const xd, int bit_depth, PALETTE_MODE_INFO *const pmi, aom_reader *r) { -#if CONFIG_INDEP_PALETTE_PARSING +#if CONFIG_PALETTE_IMPROVEMENTS uint16_t color_cache[2 * PALETTE_MAX_SIZE]; const int n_cache = av1_get_palette_cache(xd, 0, color_cache); const int n = pmi->palette_size[0]; int idx = 0; for (int i = 0; i < n_cache && idx < n; ++i) { - if (aom_read_bit(r, ACCT_STR)) pmi->palette_colors[idx++] = color_cache[i]; + if (aom_read_bit(r, ACCT_INFO("color_cache"))) + pmi->palette_colors[idx++] = color_cache[i]; } if (idx < n) { - pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR); + pmi->palette_colors[idx++] = + aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors")); if (idx < n) { const int min_bits = bit_depth - 3; - int bits = min_bits + aom_read_literal(r, 2, ACCT_STR); + int bits = min_bits + aom_read_literal(r, 2, ACCT_INFO("bits")); int range = (1 << bit_depth) - pmi->palette_colors[idx - 1] - 1; for (; idx < n; ++idx) { assert(range >= 0); - const int delta = aom_read_literal(r, bits, ACCT_STR) + 1; + const int delta = aom_read_literal(r, bits, ACCT_INFO("delta")) + 1; pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta, 0, (1 << bit_depth) - 1); range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]); @@ -905,17 +1009,19 @@ const int n = pmi->palette_size[0]; int idx = 0; for (int i = 0; i < n_cache && idx < n; ++i) - if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i]; + if (aom_read_bit(r, ACCT_INFO("color_cache"))) + cached_colors[idx++] = color_cache[i]; if (idx < n) { const int n_cached_colors = idx; - pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR); + pmi->palette_colors[idx++] = + aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors")); if (idx < n) { const int min_bits = bit_depth - 3; - int bits = min_bits + aom_read_literal(r, 2, ACCT_STR); + int bits = min_bits + aom_read_literal(r, 2, ACCT_INFO("bits")); int range = (1 << bit_depth) - pmi->palette_colors[idx - 1] - 1; for (; idx < n; ++idx) { assert(range >= 0); - const int delta = aom_read_literal(r, bits, ACCT_STR) + 1; + const int delta = aom_read_literal(r, bits, ACCT_INFO("delta")) + 1; pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta, 0, (1 << bit_depth) - 1); range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]); @@ -926,29 +1032,31 @@ } else { memcpy(pmi->palette_colors, cached_colors, n * sizeof(cached_colors[0])); } -#endif // CONFIG_INDEP_PALETTE_PARSING +#endif // CONFIG_PALETTE_IMPROVEMENTS } static void read_palette_colors_uv(MACROBLOCKD *const xd, int bit_depth, PALETTE_MODE_INFO *const pmi, aom_reader *r) { -#if CONFIG_INDEP_PALETTE_PARSING +#if CONFIG_PALETTE_IMPROVEMENTS const int n = pmi->palette_size[1]; // U channel colors. uint16_t color_cache[2 * PALETTE_MAX_SIZE]; const int n_cache = av1_get_palette_cache(xd, 1, color_cache); int idx = PALETTE_MAX_SIZE; for (int i = 0; i < n_cache && idx < PALETTE_MAX_SIZE + n; ++i) - if (aom_read_bit(r, ACCT_STR)) pmi->palette_colors[idx++] = color_cache[i]; + if (aom_read_bit(r, ACCT_INFO("color_cache"))) + pmi->palette_colors[idx++] = color_cache[i]; if (idx < PALETTE_MAX_SIZE + n) { - pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR); + pmi->palette_colors[idx++] = + aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors")); if (idx < PALETTE_MAX_SIZE + n) { const int min_bits = bit_depth - 3; - int bits = min_bits + aom_read_literal(r, 2, ACCT_STR); + int bits = min_bits + aom_read_literal(r, 2, ACCT_INFO("bits")); int range = (1 << bit_depth) - pmi->palette_colors[idx - 1]; for (; idx < PALETTE_MAX_SIZE + n; ++idx) { assert(range >= 0); - const int delta = aom_read_literal(r, bits, ACCT_STR); + const int delta = aom_read_literal(r, bits, ACCT_INFO("delta")); pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta, 0, (1 << bit_depth) - 1); range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]); @@ -976,18 +1084,20 @@ const int n_cache = av1_get_palette_cache(xd, 1, color_cache); int idx = 0; for (int i = 0; i < n_cache && idx < n; ++i) - if (aom_read_bit(r, ACCT_STR)) cached_colors[idx++] = color_cache[i]; + if (aom_read_bit(r, ACCT_INFO("color_cache"))) + cached_colors[idx++] = color_cache[i]; if (idx < n) { const int n_cached_colors = idx; idx += PALETTE_MAX_SIZE; - pmi->palette_colors[idx++] = aom_read_literal(r, bit_depth, ACCT_STR); + pmi->palette_colors[idx++] = + aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors")); if (idx < PALETTE_MAX_SIZE + n) { const int min_bits = bit_depth - 3; - int bits = min_bits + aom_read_literal(r, 2, ACCT_STR); + int bits = min_bits + aom_read_literal(r, 2, ACCT_INFO("bits")); int range = (1 << bit_depth) - pmi->palette_colors[idx - 1]; for (; idx < PALETTE_MAX_SIZE + n; ++idx) { assert(range >= 0); - const int delta = aom_read_literal(r, bits, ACCT_STR); + const int delta = aom_read_literal(r, bits, ACCT_INFO("delta")); pmi->palette_colors[idx] = clamp(pmi->palette_colors[idx - 1] + delta, 0, (1 << bit_depth) - 1); range -= (pmi->palette_colors[idx] - pmi->palette_colors[idx - 1]); @@ -1000,17 +1110,17 @@ memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, cached_colors, n * sizeof(cached_colors[0])); } -#endif // CONFIG_INDEP_PALETTE_PARSING +#endif // CONFIG_PALETTE_IMPROVEMENTS // V channel colors. - if (aom_read_bit(r, ACCT_STR)) { // Delta encoding. + if (aom_read_bit(r, ACCT_INFO("use_delta"))) { // Delta encoding. const int min_bits_v = bit_depth - 4; const int max_val = 1 << bit_depth; - int bits = min_bits_v + aom_read_literal(r, 2, ACCT_STR); + int bits = min_bits_v + aom_read_literal(r, 2, ACCT_INFO("bits")); pmi->palette_colors[2 * PALETTE_MAX_SIZE] = - aom_read_literal(r, bit_depth, ACCT_STR); + aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors")); for (int i = 1; i < n; ++i) { - int delta = aom_read_literal(r, bits, ACCT_STR); - if (delta && aom_read_bit(r, ACCT_STR)) delta = -delta; + int delta = aom_read_literal(r, bits, ACCT_INFO("delta")); + if (delta && aom_read_bit(r, ACCT_INFO("negate"))) delta = -delta; int val = (int)pmi->palette_colors[2 * PALETTE_MAX_SIZE + i - 1] + delta; if (val < 0) val += max_val; if (val >= max_val) val -= max_val; @@ -1019,7 +1129,7 @@ } else { for (int i = 0; i < n; ++i) { pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] = - aom_read_literal(r, bit_depth, ACCT_STR); + aom_read_literal(r, bit_depth, ACCT_INFO("palette_colors")); } } } @@ -1036,11 +1146,11 @@ const int palette_mode_ctx = av1_get_palette_mode_ctx(xd); const int modev = aom_read_symbol( r, xd->tile_ctx->palette_y_mode_cdf[bsize_ctx][palette_mode_ctx], 2, - ACCT_STR); + ACCT_INFO("modev", "luma")); if (modev) { pmi->palette_size[0] = aom_read_symbol(r, xd->tile_ctx->palette_y_size_cdf[bsize_ctx], - PALETTE_SIZES, ACCT_STR) + + PALETTE_SIZES, ACCT_INFO("palette_size", "luma")) + 2; read_palette_colors_y(xd, cm->seq_params.bit_depth, pmi, r); } @@ -1049,11 +1159,12 @@ mbmi->uv_mode == UV_DC_PRED && xd->is_chroma_ref) { const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0); const int modev = aom_read_symbol( - r, xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2, ACCT_STR); + r, xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2, + ACCT_INFO("modev", "chroma")); if (modev) { pmi->palette_size[1] = aom_read_symbol(r, xd->tile_ctx->palette_uv_size_cdf[bsize_ctx], - PALETTE_SIZES, ACCT_STR) + + PALETTE_SIZES, ACCT_INFO("palette_size", "chroma")) + 2; read_palette_colors_uv(xd, cm->seq_params.bit_depth, pmi, r); } @@ -1062,7 +1173,7 @@ #if !CONFIG_AIMC static int read_angle_delta(aom_reader *r, aom_cdf_prob *cdf) { - const int sym = aom_read_symbol(r, cdf, 2 * MAX_ANGLE_DELTA + 1, ACCT_STR); + const int sym = aom_read_symbol(r, cdf, 2 * MAX_ANGLE_DELTA + 1, ACCT_INFO()); return sym - MAX_ANGLE_DELTA; } #endif // !CONFIG_AIMC @@ -1075,10 +1186,11 @@ if (av1_filter_intra_allowed(cm, mbmi) && xd->tree_type != CHROMA_PART) { filter_intra_mode_info->use_filter_intra = aom_read_symbol( r, xd->tile_ctx->filter_intra_cdfs[mbmi->sb_type[PLANE_TYPE_Y]], 2, - ACCT_STR); + ACCT_INFO("use_filter_intra")); if (filter_intra_mode_info->use_filter_intra) { - filter_intra_mode_info->filter_intra_mode = aom_read_symbol( - r, xd->tile_ctx->filter_intra_mode_cdf, FILTER_INTRA_MODES, ACCT_STR); + filter_intra_mode_info->filter_intra_mode = + aom_read_symbol(r, xd->tile_ctx->filter_intra_mode_cdf, + FILTER_INTRA_MODES, ACCT_INFO("filter_intra_mode")); } } else { filter_intra_mode_info->use_filter_intra = 0; @@ -1086,12 +1198,22 @@ } void av1_read_tx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd, int blk_row, - int blk_col, TX_SIZE tx_size, aom_reader *r) { + int blk_col, TX_SIZE tx_size, aom_reader *r +#if CONFIG_ATC_DCTX_ALIGNED + , + const int plane, const int eob, const int dc_skip) { + if (plane != PLANE_TYPE_Y) return; +#else +) { +#endif // CONFIG_ATC_DCTX_ALIGNED MB_MODE_INFO *mbmi = xd->mi[0]; TX_TYPE *tx_type = &xd->tx_type_map[blk_row * xd->tx_type_map_stride + blk_col]; *tx_type = DCT_DCT; +#if CONFIG_ATC_DCTX_ALIGNED + if (dc_skip == 1) return; +#endif // CONFIG_ATC_DCTX_ALIGNED // No need to read transform type if block is skipped. if (mbmi->skip_txfm[xd->tree_type == CHROMA_PART] || segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) @@ -1114,9 +1236,16 @@ const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; FRAME_CONTEXT *ec_ctx = xd->tile_ctx; if (inter_block) { +#if CONFIG_ATC_DCTX_ALIGNED + const int eob_tx_ctx = get_lp2tx_ctx(tx_size, get_txb_bwl(tx_size), eob); + *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol( + r, ec_ctx->inter_ext_tx_cdf[eset][eob_tx_ctx][square_tx_size], + av1_num_ext_tx_set[tx_set_type], ACCT_INFO("tx_type"))]; +#else *tx_type = av1_ext_tx_inv[tx_set_type][aom_read_symbol( r, ec_ctx->inter_ext_tx_cdf[eset][square_tx_size], - av1_num_ext_tx_set[tx_set_type], ACCT_STR)]; + av1_num_ext_tx_set[tx_set_type], ACCT_INFO("tx_type"))]; +#endif // CONFIG_ATC_DCTX_ALIGNED } else { if (mbmi->fsc_mode[xd->tree_type == CHROMA_PART]) { *tx_type = IDTX; @@ -1127,7 +1256,7 @@ ? fimode_to_intradir[mbmi->filter_intra_mode_info .filter_intra_mode] : mbmi->mode; -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC #if CONFIG_ATC_REDUCED_TXSET const int size_info = av1_size_class[tx_size]; *tx_type = av1_tx_idx_to_type( @@ -1138,21 +1267,21 @@ cm->features.reduced_tx_set_used ? av1_num_reduced_tx_set : av1_num_ext_tx_set_intra[tx_set_type], - ACCT_STR), + ACCT_INFO("tx_type")), tx_set_type, intra_mode, size_info); #else const int size_info = av1_size_class[tx_size]; *tx_type = av1_tx_idx_to_type( aom_read_symbol( r, ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_mode], - av1_num_ext_tx_set_intra[tx_set_type], ACCT_STR), + av1_num_ext_tx_set_intra[tx_set_type], ACCT_INFO("tx_type")), tx_set_type, intra_mode, size_info); #endif // CONFIG_ATC_REDUCED_TXSET #else *tx_type = av1_ext_tx_inv_intra[tx_set_type][aom_read_symbol( r, ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_mode], - av1_num_ext_tx_set_intra[tx_set_type], ACCT_STR)]; -#endif // CONFIG_ATC_NEWTXSETS + av1_num_ext_tx_set_intra[tx_set_type], ACCT_INFO("tx_type"))]; +#endif // CONFIG_ATC } } } @@ -1193,8 +1322,9 @@ get_above_and_left_cctx_type(cm, xd, tx_size, &above_cctx, &left_cctx); #endif // CONFIG_EXT_RECUR_PARTITIONS const int cctx_ctx = get_cctx_context(xd, &above_cctx, &left_cctx); - cctx_type = aom_read_symbol( - r, ec_ctx->cctx_type_cdf[square_tx_size][cctx_ctx], CCTX_TYPES, ACCT_STR); + cctx_type = + aom_read_symbol(r, ec_ctx->cctx_type_cdf[square_tx_size][cctx_ctx], + CCTX_TYPES, ACCT_INFO("cctx_type")); update_cctx_array(xd, blk_row, blk_col, row_offset, col_offset, tx_size, cctx_type); } @@ -1222,8 +1352,9 @@ const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; if (!inter_block) { if (block_signals_sec_tx_type(xd, tx_size, *tx_type, *eob)) { - const uint8_t stx_flag = aom_read_symbol( - r, ec_ctx->stx_cdf[square_tx_size], STX_TYPES, ACCT_STR); + const uint8_t stx_flag = + aom_read_symbol(r, ec_ctx->stx_cdf[square_tx_size], STX_TYPES, + ACCT_INFO("stx_flag")); *tx_type |= (stx_flag << 4); } } @@ -1232,7 +1363,7 @@ const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; if (block_signals_sec_tx_type(xd, tx_size, *tx_type, *eob)) { const uint8_t stx_flag = aom_read_symbol( - r, ec_ctx->stx_cdf[square_tx_size], STX_TYPES, ACCT_STR); + r, ec_ctx->stx_cdf[square_tx_size], STX_TYPES, ACCT_INFO("stx_flag")); *tx_type |= (stx_flag << 4); } } @@ -1258,12 +1389,12 @@ const int_mv *ref_mv, int mi_row, int mi_col, BLOCK_SIZE bsize, aom_reader *r) { FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT const MB_MODE_INFO *const mbmi = xd->mi[0]; if (mbmi->intrabc_mode == 1) { mv->as_int = ref_mv->as_int; } else { -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT #if CONFIG_FLEX_MVRES read_mv(r, &mv->as_mv, ref_mv->as_mv, #if CONFIG_ADAPTIVE_MVD @@ -1278,9 +1409,9 @@ &ec_ctx->ndvc, MV_SUBPEL_NONE); #endif -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT // DV should not have sub-pel. assert((mv->as_mv.col & 7) == 0); assert((mv->as_mv.row & 7) == 0); @@ -1292,21 +1423,21 @@ return valid; } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT static void read_intrabc_drl_idx(int max_ref_bv_cnt, FRAME_CONTEXT *ec_ctx, MB_MODE_INFO *mbmi, aom_reader *r) { mbmi->intrabc_drl_idx = 0; int bit_cnt = 0; for (int idx = 0; idx < max_ref_bv_cnt - 1; ++idx) { - const int intrabc_drl_idx = - aom_read_symbol(r, ec_ctx->intrabc_drl_idx_cdf[bit_cnt], 2, ACCT_STR); + const int intrabc_drl_idx = aom_read_symbol( + r, ec_ctx->intrabc_drl_idx_cdf[bit_cnt], 2, ACCT_INFO()); mbmi->intrabc_drl_idx = idx + intrabc_drl_idx; if (!intrabc_drl_idx) break; ++bit_cnt; } assert(mbmi->intrabc_drl_idx < max_ref_bv_cnt); } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT static void read_intrabc_info(AV1_COMMON *const cm, DecoderCodingBlock *dcb, aom_reader *r) { @@ -1314,16 +1445,18 @@ MB_MODE_INFO *const mbmi = xd->mi[0]; FRAME_CONTEXT *ec_ctx = xd->tile_ctx; assert(xd->tree_type != CHROMA_PART); +#if !CONFIG_SKIP_TXFM_OPT #if CONFIG_NEW_CONTEXT_MODELING mbmi->use_intrabc[0] = 0; mbmi->use_intrabc[1] = 0; const int intrabc_ctx = get_intrabc_ctx(xd); mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = - aom_read_symbol(r, ec_ctx->intrabc_cdf[intrabc_ctx], 2, ACCT_STR); + aom_read_symbol(r, ec_ctx->intrabc_cdf[intrabc_ctx], 2, ACCT_INFO()); #else mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = - aom_read_symbol(r, ec_ctx->intrabc_cdf, 2, ACCT_STR); + aom_read_symbol(r, ec_ctx->intrabc_cdf, 2, ACCT_INFO()); #endif // CONFIG_NEW_CONTEXT_MODELING +#endif // !CONFIG_SKIP_TXFM_OPT if (xd->tree_type == CHROMA_PART) assert(mbmi->use_intrabc[PLANE_TYPE_UV] == 0); if (mbmi->use_intrabc[xd->tree_type == CHROMA_PART]) { @@ -1344,6 +1477,10 @@ set_most_probable_mv_precision(cm, mbmi, bsize); #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + #if CONFIG_BAWP mbmi->bawp_flag = 0; #endif @@ -1354,7 +1491,7 @@ // TODO(kslu): Rework av1_find_mv_refs to avoid having this big array // ref_mvs int_mv ref_mvs[INTRA_FRAME + 1][MAX_MV_REF_CANDIDATES]; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT for (int i = 0; i < MAX_REF_BV_STACK_SIZE; ++i) { xd->ref_mv_stack[INTRA_FRAME][i].this_mv.as_int = 0; xd->ref_mv_stack[INTRA_FRAME][i].comp_mv.as_int = 0; @@ -1362,8 +1499,11 @@ xd->ref_mv_stack[INTRA_FRAME][i].row_offset = OFFSET_NONSPATIAL; xd->ref_mv_stack[INTRA_FRAME][i].col_offset = OFFSET_NONSPATIAL; #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWP + xd->ref_mv_stack[INTRA_FRAME][i].cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT av1_find_mv_refs(cm, xd, mbmi, INTRA_FRAME, dcb->ref_mv_count, xd->ref_mv_stack, xd->weight, ref_mvs, /*global_mvs=*/NULL @@ -1378,9 +1518,9 @@ ); -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT mbmi->intrabc_mode = - aom_read_symbol(r, ec_ctx->intrabc_mode_cdf, 2, ACCT_STR); + aom_read_symbol(r, ec_ctx->intrabc_mode_cdf, 2, ACCT_INFO()); read_intrabc_drl_idx(MAX_REF_BV_STACK_SIZE, ec_ctx, mbmi, r); int_mv dv_ref = xd->ref_mv_stack[INTRA_FRAME][mbmi->intrabc_drl_idx].this_mv; @@ -1396,7 +1536,7 @@ av1_find_best_ref_mvs(0, ref_mvs[INTRA_FRAME], &nearestmv, &nearmv, 0); #endif int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT if (dv_ref.as_int == 0) av1_find_ref_dv(&dv_ref, &xd->tile, cm->mib_size, xd->mi_row); // Ref DV should not have sub-pel. @@ -1467,14 +1607,17 @@ uint8_t mode_idx = 0; const int context = get_y_mode_idx_ctx(xd); int mode_set_index = - aom_read_symbol(r, ec_ctx->y_mode_set_cdf, INTRA_MODE_SETS, ACCT_STR); + aom_read_symbol(r, ec_ctx->y_mode_set_cdf, INTRA_MODE_SETS, + ACCT_INFO("mode_set_index", "y_mode_set_cdf")); if (mode_set_index == 0) { - mode_idx = aom_read_symbol(r, ec_ctx->y_mode_idx_cdf_0[context], - FIRST_MODE_COUNT, ACCT_STR); + mode_idx = + aom_read_symbol(r, ec_ctx->y_mode_idx_cdf_0[context], FIRST_MODE_COUNT, + ACCT_INFO("mode_idx", "y_mode_idx_cdf_0")); } else { - mode_idx = FIRST_MODE_COUNT + (mode_set_index - 1) * SECOND_MODE_COUNT + - aom_read_symbol(r, ec_ctx->y_mode_idx_cdf_1[context], - SECOND_MODE_COUNT, ACCT_STR); + mode_idx = + FIRST_MODE_COUNT + (mode_set_index - 1) * SECOND_MODE_COUNT + + aom_read_symbol(r, ec_ctx->y_mode_idx_cdf_1[context], SECOND_MODE_COUNT, + ACCT_INFO("mode_idx", "y_mode_idx_cdf_1")); } assert(mode_idx < LUMA_MODE_COUNT); get_y_intra_mode_set(mbmi, xd); @@ -1493,7 +1636,7 @@ const int context = av1_is_directional_mode(mbmi->mode) ? 1 : 0; const int uv_mode_idx = aom_read_symbol(r, ec_ctx->uv_mode_cdf[cfl_allowed][context], - UV_INTRA_MODES - !cfl_allowed, ACCT_STR); + UV_INTRA_MODES - !cfl_allowed, ACCT_INFO("uv_mode_idx")); assert(uv_mode_idx >= 0 && uv_mode_idx < UV_INTRA_MODES); get_uv_intra_mode_set(mbmi); mbmi->uv_mode = mbmi->uv_intra_mode_list[uv_mode_idx]; @@ -1520,8 +1663,30 @@ mbmi->skip_mode = 0; #endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_SKIP_TXFM_OPT + if (av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) { +#if CONFIG_NEW_CONTEXT_MODELING + mbmi->use_intrabc[0] = 0; + mbmi->use_intrabc[1] = 0; + const int intrabc_ctx = get_intrabc_ctx(xd); + mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = + aom_read_symbol(r, ec_ctx->intrabc_cdf[intrabc_ctx], 2, + ACCT_INFO("use_intrabc", "chroma")); +#else + mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = aom_read_symbol( + r, ec_ctx->intrabc_cdf, 2, ACCT_INFO("use_intrabc", "chroma")); +#endif // CONFIG_NEW_CONTEXT_MODELING + } + if (is_intrabc_block(mbmi, xd->tree_type)) { + mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = + read_skip_txfm(cm, xd, mbmi->segment_id, r); + } else { + mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0; + } +#else mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = read_skip_txfm(cm, xd, mbmi->segment_id, r); +#endif // CONFIG_SKIP_TXFM_OPT if (!seg->segid_preskip) mbmi->segment_id = read_intra_segment_id( @@ -1590,7 +1755,11 @@ mbmi->mrl_index = (cm->seq_params.enable_mrls && av1_is_directional_mode(mbmi->mode)) +#if CONFIG_EXT_DIR + ? read_mrl_index(ec_ctx, r, xd->neighbors[0], xd->neighbors[1]) +#else ? read_mrl_index(ec_ctx, r) +#endif // CONFIG_EXT_DIR : 0; } @@ -1649,13 +1818,13 @@ static int read_mv_component_low_precision(aom_reader *r, nmv_component *mvcomp, MvSubpelPrecision precision) { int offset, mag; - const int sign = aom_read_symbol(r, mvcomp->sign_cdf, 2, ACCT_STR); + const int sign = aom_read_symbol(r, mvcomp->sign_cdf, 2, ACCT_INFO("sign")); const int num_mv_classes = MV_CLASSES - (precision <= MV_PRECISION_FOUR_PEL) - (precision <= MV_PRECISION_8_PEL); int mv_class = aom_read_symbol( r, mvcomp->classes_cdf[av1_get_mv_class_context(precision)], - num_mv_classes, ACCT_STR); + num_mv_classes, ACCT_INFO("mv_class")); if (precision <= MV_PRECISION_FOUR_PEL && mv_class >= MV_CLASS_1) mv_class += (precision == MV_PRECISION_FOUR_PEL ? 1 : 2); @@ -1673,7 +1842,8 @@ const int n = (mv_class == MV_CLASS_0) ? 1 : mv_class; offset = 0; for (int i = start_lsb; i < n; ++i) - offset |= aom_read_symbol(r, mvcomp->bits_cdf[i], 2, ACCT_STR) << i; + offset |= aom_read_symbol(r, mvcomp->bits_cdf[i], 2, ACCT_INFO("offset")) + << i; const int base = mv_class ? (1 << mv_class) : 0; mag = (offset + base); // int mv data } @@ -1705,19 +1875,21 @@ #endif int mag, d, fr, hp; - const int sign = aom_read_symbol(r, mvcomp->sign_cdf, 2, ACCT_STR); + const int sign = aom_read_symbol(r, mvcomp->sign_cdf, 2, ACCT_INFO("sign")); const int mv_class = #if CONFIG_ADAPTIVE_MVD is_adaptive_mvd - ? aom_read_symbol(r, mvcomp->amvd_classes_cdf, MV_CLASSES, ACCT_STR) + ? aom_read_symbol(r, mvcomp->amvd_classes_cdf, MV_CLASSES, + ACCT_INFO("mv_class", "amvd_classes_cdf")) : #endif // CONFIG_ADAPTIVE_MVD #if CONFIG_FLEX_MVRES aom_read_symbol( r, mvcomp->classes_cdf[av1_get_mv_class_context(precision)], - MV_CLASSES, ACCT_STR); + MV_CLASSES, ACCT_INFO("mv_class", "classes_cdf")); #else - aom_read_symbol(r, mvcomp->classes_cdf, MV_CLASSES, ACCT_STR); + aom_read_symbol(r, mvcomp->classes_cdf, MV_CLASSES, + ACCT_INFO("mv_class", "classes_cdf")); #endif const int class0 = mv_class == MV_CLASS_0; @@ -1729,13 +1901,15 @@ #endif // CONFIG_ADAPTIVE_MVD // Integer part if (class0) { - d = aom_read_symbol(r, mvcomp->class0_cdf, CLASS0_SIZE, ACCT_STR); + d = aom_read_symbol(r, mvcomp->class0_cdf, CLASS0_SIZE, + ACCT_INFO("class0_cdf")); mag = 0; } else { const int n = mv_class + CLASS0_BITS - 1; // number of bits d = 0; for (int i = 0; i < n; ++i) - d |= aom_read_symbol(r, mvcomp->bits_cdf[i], 2, ACCT_STR) << i; + d |= aom_read_symbol(r, mvcomp->bits_cdf[i], 2, ACCT_INFO("bits_cdf")) + << i; mag = CLASS0_SIZE << (mv_class + 2); } #if CONFIG_ADAPTIVE_MVD @@ -1771,17 +1945,19 @@ #if CONFIG_FLEX_MVRES fr = aom_read_symbol( r, class0 ? mvcomp->class0_fp_cdf[d][0] : mvcomp->fp_cdf[0], 2, - ACCT_STR) + ACCT_INFO("class0_fp_cdf")) << 1; - fr += precision > MV_PRECISION_HALF_PEL - ? aom_read_symbol(r, - class0 ? mvcomp->class0_fp_cdf[d][1 + (fr >> 1)] - : mvcomp->fp_cdf[1 + (fr >> 1)], - 2, ACCT_STR) - : 1; + fr += + precision > MV_PRECISION_HALF_PEL + ? aom_read_symbol(r, + class0 ? mvcomp->class0_fp_cdf[d][1 + (fr >> 1)] + : mvcomp->fp_cdf[1 + (fr >> 1)], + 2, ACCT_INFO(class0 ? "class0_fp_cdf" : "fp_cdf")) + : 1; #else fr = aom_read_symbol(r, class0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf, - MV_FP_SIZE, ACCT_STR); + MV_FP_SIZE, + ACCT_INFO(class0 ? "class0_fp_cdf" : "fp_cdf")); #endif // CONFIG_FLEX_MVRES #if CONFIG_FLEX_MVRES @@ -1790,9 +1966,9 @@ #else hp = usehp #endif - ? aom_read_symbol(r, - class0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf, - 2, ACCT_STR) + ? aom_read_symbol( + r, class0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf, 2, + ACCT_INFO(class0 ? "class0_hp_cdf" : "hp_cdf")) : 1; } else { fr = 3; @@ -1825,12 +2001,14 @@ #endif // IMPROVED_AMVD && CONFIG_JOINT_MVD const MV_JOINT_TYPE joint_type = #if CONFIG_ADAPTIVE_MVD - is_adaptive_mvd ? (MV_JOINT_TYPE)aom_read_symbol(r, ctx->amvd_joints_cdf, - MV_JOINTS, ACCT_STR) - : + is_adaptive_mvd + ? (MV_JOINT_TYPE)aom_read_symbol( + r, ctx->amvd_joints_cdf, MV_JOINTS, + ACCT_INFO("joint_type", "amvd_joints_cdf")) + : #endif // CONFIG_ADAPTIVE_MVD - (MV_JOINT_TYPE)aom_read_symbol(r, ctx->joints_cdf, - MV_JOINTS, ACCT_STR); + (MV_JOINT_TYPE)aom_read_symbol(r, ctx->joints_cdf, MV_JOINTS, + ACCT_INFO("joint_type", "joints_cdf")); if (mv_joint_vertical(joint_type)) diff.row = read_mv_component(r, &ctx->comps[0], #if CONFIG_ADAPTIVE_MVD @@ -1879,7 +2057,7 @@ if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) { const int ctx = av1_get_reference_mode_context(cm, xd); const REFERENCE_MODE mode = (REFERENCE_MODE)aom_read_symbol( - r, xd->tile_ctx->comp_inter_cdf[ctx], 2, ACCT_STR); + r, xd->tile_ctx->comp_inter_cdf[ctx], 2, ACCT_INFO()); return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE } else { assert(cm->current_frame.reference_mode == SINGLE_REFERENCE); @@ -1893,7 +2071,7 @@ const int n_refs = ref_frames_info->num_total_refs; for (int i = 0; i < n_refs - 1; i++) { const int bit = aom_read_symbol( - r, av1_get_pred_cdf_single_ref(xd, i, n_refs), 2, ACCT_STR); + r, av1_get_pred_cdf_single_ref(xd, i, n_refs), 2, ACCT_INFO()); if (bit) { ref_frame[0] = i; return; @@ -1924,7 +2102,7 @@ : aom_read_symbol(r, av1_get_pred_cdf_compound_ref( xd, i, n_bits, bit_type, n_refs), - 2, ACCT_STR); + 2, ACCT_INFO()); if (bit) { ref_frame[n_bits++] = i; #if CONFIG_ALLOW_SAME_REF_COMPOUND @@ -1968,7 +2146,8 @@ is_tip_allowed_bsize(bsize)) { #endif // CONFIG_EXT_RECUR_PARTITIONS const int tip_ctx = get_tip_ctx(xd); - if (aom_read_symbol(r, xd->tile_ctx->tip_cdf[tip_ctx], 2, ACCT_STR)) { + if (aom_read_symbol(r, xd->tile_ctx->tip_cdf[tip_ctx], 2, + ACCT_INFO("tip_cdf"))) { ref_frame[0] = TIP_FRAME; } } @@ -2015,7 +2194,8 @@ } else { const int ctx = av1_get_pred_context_switchable_interp(xd, 0); const InterpFilter filter = (InterpFilter)aom_read_symbol( - r, ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS, ACCT_STR); + r, ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS, + ACCT_INFO("switchable_interp_cdf")); mbmi->interp_fltr = filter; } } @@ -2040,6 +2220,10 @@ mbmi->bawp_flag = 0; #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + FRAME_CONTEXT *ec_ctx = xd->tile_ctx; #if CONFIG_AIMC @@ -2074,7 +2258,11 @@ // Parsing reference line index mbmi->mrl_index = (cm->seq_params.enable_mrls && av1_is_directional_mode(mbmi->mode)) +#if CONFIG_EXT_DIR + ? read_mrl_index(ec_ctx, r, xd->neighbors[0], xd->neighbors[1]) +#else ? read_mrl_index(ec_ctx, r) +#endif // CONFIG_EXT_DIR : 0; if (!cm->seq_params.monochrome && xd->is_chroma_ref) { @@ -2153,6 +2341,9 @@ allow_hp = MV_SUBPEL_NONE; } #endif +#if CONFIG_CWG_D067_IMPROVED_WARP + (void)ref_warp_model; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #if CONFIG_JOINT_MVD int first_ref_dist = 0; int sec_ref_dist = 0; @@ -2202,6 +2393,28 @@ } #if CONFIG_WARPMV case WARPMV: { +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->mv[0] = ref_mv[0]; + if (mbmi->warpmv_with_mvd_flag) { + nmv_context *const nmvc = &ec_ctx->nmvc; + read_mv(r, &mv[0].as_mv, +#if CONFIG_FLEX_MVRES + ref_mv[0].as_mv, +#else + &ref_mv[0].as_mv, +#endif +#if CONFIG_ADAPTIVE_MVD + is_adaptive_mvd, +#endif // CONFIG_ADAPTIVE_MVD + nmvc, +#if CONFIG_FLEX_MVRES + precision); +#else + allow_hp); +#endif + } + +#else assert(ref_warp_model); mbmi->mv[0] = get_mv_from_wrl(xd, ref_warp_model, @@ -2211,6 +2424,8 @@ 1, 0, #endif bsize, xd->mi_col, xd->mi_row); +#endif // CONFIG_CWG_D067_IMPROVED_WARP + break; } #endif // CONFIG_WARPMV @@ -2437,10 +2652,10 @@ static int read_is_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd, int segment_id, aom_reader *r -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT , const int skip_txfm -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT ) { if (segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) { return 1; @@ -2448,11 +2663,12 @@ const int ctx = av1_get_intra_inter_context(xd); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; const int is_inter = -#if CONFIG_CONTEXT_DERIVATION - aom_read_symbol(r, ec_ctx->intra_inter_cdf[skip_txfm][ctx], 2, ACCT_STR); +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT + aom_read_symbol(r, ec_ctx->intra_inter_cdf[skip_txfm][ctx], 2, + ACCT_INFO()); #else - aom_read_symbol(r, ec_ctx->intra_inter_cdf[ctx], 2, ACCT_STR); -#endif // CONFIG_CONTEXT_DERIVATION + aom_read_symbol(r, ec_ctx->intra_inter_cdf[ctx], 2, ACCT_INFO()); +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT return is_inter; } @@ -2489,6 +2705,22 @@ } #endif // DEC_MISMATCH_DEBUG +#if CONFIG_REFINEMV +// This function read the refinemv_flag ( if require) from the bitstream +static void read_refinemv_flag(AV1_COMMON *const cm, MACROBLOCKD *xd, + aom_reader *r, BLOCK_SIZE bsize) { + MB_MODE_INFO *const mbmi = xd->mi[0]; + mbmi->refinemv_flag = get_default_refinemv_flag(cm, mbmi); + int signal_refinemv = switchable_refinemv_flag(cm, mbmi); + if (signal_refinemv) { + const int refinemv_ctx = av1_get_refinemv_context(cm, xd, bsize); + mbmi->refinemv_flag = + aom_read_symbol(r, xd->tile_ctx->refinemv_flag_cdf[refinemv_ctx], + REFINEMV_NUM_MODES, ACCT_INFO("refinemv_flag")); + } +} +#endif // CONFIG_REFINEMV + #if CONFIG_FLEX_MVRES MvSubpelPrecision av1_read_pb_mv_precision(AV1_COMMON *const cm, MACROBLOCKD *const xd, @@ -2505,8 +2737,9 @@ cm->features.most_probable_fr_mv_precision); const int mpp_flag_context = av1_get_mpp_flag_context(cm, xd); - const int mpp_flag = aom_read_symbol( - r, xd->tile_ctx->pb_mv_mpp_flag_cdf[mpp_flag_context], 2, ACCT_STR); + const int mpp_flag = + aom_read_symbol(r, xd->tile_ctx->pb_mv_mpp_flag_cdf[mpp_flag_context], 2, + ACCT_INFO("mpp_flag")); if (mpp_flag) return mbmi->most_probable_pb_mv_precision; const PRECISION_SET *precision_def = &av1_mv_precision_sets[mbmi->mb_precision_set]; @@ -2515,7 +2748,7 @@ r, xd->tile_ctx->pb_mv_precision_cdf[down_ctx] [max_precision - MV_PRECISION_HALF_PEL], - nsymbs, ACCT_STR); + nsymbs, ACCT_INFO("down")); return av1_get_precision_from_index(mbmi, down); } #endif // CONFIG_FLEX_MVRES @@ -2561,6 +2794,10 @@ mbmi->bawp_flag = 0; #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + av1_collect_neighbors_ref_counts(xd); read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame); @@ -2573,6 +2810,7 @@ xd->valid_num_warp_candidates); #endif // CONFIG_WARP_REF_LIST +#if !CONFIG_SEP_COMP_DRL av1_find_mv_refs( cm, xd, mbmi, ref_frame, dcb->ref_mv_count, xd->ref_mv_stack, xd->weight, ref_mvs, /*global_mvs=*/NULL @@ -2586,32 +2824,42 @@ ref_frame < INTER_REFS_PER_FRAME ? MAX_WARP_REF_CANDIDATES : 0, xd->valid_num_warp_candidates #endif // CONFIG_WARP_REF_LIST - ); +#endif // !CONFIG_SEP_COMP_DRL #if CONFIG_C076_INTER_MOD_CTX av1_find_mode_ctx(cm, xd, inter_mode_ctx, ref_frame); #endif // CONFIG_C076_INTER_MOD_CTX +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = 0; + mbmi->ref_mv_idx[1] = 0; +#else mbmi->ref_mv_idx = 0; +#endif // CONFIG_SEP_COMP_DRL + +#if CONFIG_CWP + mbmi->cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP +#if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD + mbmi->jmvd_scale_mode = 0; +#endif // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD + #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; #endif // CONFIG_WARP_REF_LIST #if CONFIG_WARPMV +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP mbmi->motion_mode = SIMPLE_TRANSLATION; WARP_CANDIDATE warp_param_stack[MAX_WARP_REF_CANDIDATES]; - WarpedMotionParams ref_warp_model; + WarpedMotionParams ref_warp_model = default_warp_params; #endif // CONFIG_WARPMV if (mbmi->skip_mode) { assert(is_compound); -#if CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT - mbmi->mode = - (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV); -#else - mbmi->mode = NEAR_NEARMV; -#endif // CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT #if CONFIG_SKIP_MODE_ENHANCEMENT read_drl_idx(cm->features.max_drl_bits, @@ -2619,12 +2867,52 @@ ec_ctx, dcb, mbmi, r); #endif // CONFIG_SKIP_MODE_ENHANCEMENT -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SEP_COMP_DRL + av1_find_mv_refs( + cm, xd, mbmi, ref_frame, dcb->ref_mv_count, xd->ref_mv_stack, + xd->weight, ref_mvs, /*global_mvs=*/NULL +#if !CONFIG_C076_INTER_MOD_CTX + , + inter_mode_ctx +#endif // !CONFIG_C076_INTER_MOD_CTX +#if CONFIG_WARP_REF_LIST + , + xd->warp_param_stack, + ref_frame < SINGLE_REF_FRAMES ? MAX_WARP_REF_CANDIDATES : 0, + xd->valid_num_warp_candidates +#endif // CONFIG_WARP_REF_LIST + ); +#endif // CONFIG_SEP_COMP_DRL + +#if CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_SEP_COMP_DRL + mbmi->ref_frame[0] = + xd->skip_mvp_candidate_list.ref_frame0[get_ref_mv_idx(mbmi, 0)]; + mbmi->ref_frame[1] = + xd->skip_mvp_candidate_list.ref_frame1[get_ref_mv_idx(mbmi, 1)]; +#else mbmi->ref_frame[0] = xd->skip_mvp_candidate_list.ref_frame0[mbmi->ref_mv_idx]; mbmi->ref_frame[1] = xd->skip_mvp_candidate_list.ref_frame1[mbmi->ref_mv_idx]; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif +#endif // CONFIG_SKIP_MODE_ENHANCEMENT + +#if CONFIG_REFINEMV && !CONFIG_CWP + mbmi->refinemv_flag = get_default_refinemv_flag(cm, mbmi); +#endif // CONFIG_REFINEMV + +#if CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT + mbmi->mode = (cm->features.opfl_refine_type +#if CONFIG_CWP + && !cm->features.enable_cwp +#endif // CONFIG_CWP + ? NEAR_NEARMV_OPTFLOW + : NEAR_NEARMV); +#else + mbmi->mode = NEAR_NEARMV; +#endif // CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT + } else { if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) || segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_GLOBALMV)) { @@ -2646,12 +2934,29 @@ #endif // CONFIG_WARPMV ); +#if CONFIG_SEP_COMP_DRL + av1_find_mv_refs( + cm, xd, mbmi, ref_frame, dcb->ref_mv_count, xd->ref_mv_stack, + xd->weight, ref_mvs, /*global_mvs=*/NULL +#if !CONFIG_C076_INTER_MOD_CTX + , + inter_mode_ctx +#endif // !CONFIG_C076_INTER_MOD_CTX +#if CONFIG_WARP_REF_LIST + , + xd->warp_param_stack, + ref_frame < SINGLE_REF_FRAMES ? MAX_WARP_REF_CANDIDATES : 0, + xd->valid_num_warp_candidates +#endif // CONFIG_WARP_REF_LIST + ); +#endif // CONFIG_SEP_COMP_DRL + #if CONFIG_WARPMV #if CONFIG_BAWP if (cm->features.enable_bawp && av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col)) { - mbmi->bawp_flag = - aom_read_symbol(r, xd->tile_ctx->bawp_cdf, 2, ACCT_STR); + mbmi->bawp_flag = aom_read_symbol(r, xd->tile_ctx->bawp_cdf, 2, + ACCT_INFO("bawp_flag")); } #endif @@ -2671,12 +2976,15 @@ av1_count_overlappable_neighbors(cm, xd); mbmi->motion_mode = read_motion_mode(cm, xd, mbmi, r); int is_warpmv_warp_causal = - (mbmi->motion_mode == WARPED_CAUSAL && mbmi->mode == WARPMV); + ((mbmi->motion_mode == WARPED_CAUSAL) && mbmi->mode == WARPMV); if (mbmi->motion_mode == WARP_DELTA || is_warpmv_warp_causal) { - mbmi->max_num_warp_candidates = - (mbmi->mode == GLOBALMV || mbmi->mode == NEARMV) - ? 1 - : MAX_WARP_REF_CANDIDATES; + mbmi->max_num_warp_candidates = (mbmi->mode == GLOBALMV || +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->mode == AMVDNEWMV || +#endif // CONFIG_CWG_D067_IMPROVED_WARP + mbmi->mode == NEARMV) + ? 1 + : MAX_WARP_REF_CANDIDATES; if (is_warpmv_warp_causal) { mbmi->max_num_warp_candidates = MAX_WARP_REF_CANDIDATES; } @@ -2691,6 +2999,14 @@ } #endif // CONFIG_WARPMV +#if CONFIG_CWG_D067_IMPROVED_WARP + if (allow_warpmv_with_mvd_coding(cm, mbmi)) { + read_warpmv_with_mvd_flag(xd->tile_ctx, mbmi, r); + } else { + mbmi->warpmv_with_mvd_flag = 0; + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD mbmi->jmvd_scale_mode = read_jmvd_scale_mode(xd, r, mbmi); #endif // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD @@ -2729,23 +3045,68 @@ mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]); } +#if CONFIG_CWG_D067_IMPROVED_WARP + if (mbmi->mode == WARPMV) { + ref_mv[0] = get_mv_from_wrl(xd, &ref_warp_model, + !mbmi->warpmv_with_mvd_flag + ? MV_PRECISION_ONE_EIGHTH_PEL + : mbmi->pb_mv_precision, + bsize, xd->mi_col, xd->mi_row); + + } else { +#endif // CONFIG_CWG_D067_IMPROVED_WARP +#if CONFIG_SEP_COMP_DRL + if (has_second_drl(mbmi)) + ref_mv[0] = + xd->ref_mv_stack[mbmi->ref_frame[0]][get_ref_mv_idx(mbmi, 0)].this_mv; + else + ref_mv[0] = xd->ref_mv_stack[ref_frame][get_ref_mv_idx(mbmi, 0)].this_mv; +#else ref_mv[0] = xd->ref_mv_stack[ref_frame][mbmi->ref_mv_idx].this_mv; +#endif // CONFIG_SEP_COMP_DRL +#if CONFIG_CWG_D067_IMPROVED_WARP + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP if (is_compound && mbmi->mode != GLOBAL_GLOBALMV) { +#if CONFIG_SEP_COMP_DRL + if (has_second_drl(mbmi)) + ref_mv[1] = + xd->ref_mv_stack[mbmi->ref_frame[1]][get_ref_mv_idx(mbmi, 1)].this_mv; + else + ref_mv[1] = xd->ref_mv_stack[ref_frame][get_ref_mv_idx(mbmi, 1)].comp_mv; +#else ref_mv[1] = xd->ref_mv_stack[ref_frame][mbmi->ref_mv_idx].comp_mv; -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif +#if CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode) { +#if CONFIG_SEP_COMP_DRL + ref_mv[0] = + xd->skip_mvp_candidate_list.ref_mv_stack[get_ref_mv_idx(mbmi, 0)] + .this_mv; + ref_mv[1] = + xd->skip_mvp_candidate_list.ref_mv_stack[get_ref_mv_idx(mbmi, 1)] + .comp_mv; +#else ref_mv[0] = xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].this_mv; ref_mv[1] = xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].comp_mv; +#endif // CONFIG_SEP_COMP_DRL } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT } if (mbmi->skip_mode) { #if CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_CWP + assert(mbmi->mode == + (cm->features.opfl_refine_type && !cm->features.enable_cwp + ? NEAR_NEARMV_OPTFLOW + : NEAR_NEARMV)); +#else // CONFIG_CWP assert(mbmi->mode == (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV)); +#endif // CONFIG_CWP #else assert(mbmi->mode == NEAR_NEARMV); #endif // CONFIG_SKIP_MODE_ENHANCEMENT && CONFIG_OPTFLOW_REFINEMENT @@ -2773,7 +3134,8 @@ #if CONFIG_BAWP && !CONFIG_WARPMV if (cm->features.enable_bawp && av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col)) - mbmi->bawp_flag = aom_read_symbol(r, xd->tile_ctx->bawp_cdf, 2, ACCT_STR); + mbmi->bawp_flag = + aom_read_symbol(r, xd->tile_ctx->bawp_cdf, 2, ACCT_INFO("bawp_flag")); #endif #if CONFIG_EXTENDED_WARP_PREDICTION @@ -2812,8 +3174,8 @@ if (cm->seq_params.enable_interintra_compound && !mbmi->skip_mode && is_interintra_allowed(mbmi)) { const int bsize_group = size_group_lookup[bsize]; - const int interintra = - aom_read_symbol(r, ec_ctx->interintra_cdf[bsize_group], 2, ACCT_STR); + const int interintra = aom_read_symbol( + r, ec_ctx->interintra_cdf[bsize_group], 2, ACCT_INFO("interintra")); assert(mbmi->ref_frame[1] == NONE_FRAME); if (interintra) { const INTERINTRA_MODE interintra_mode = @@ -2824,15 +3186,17 @@ mbmi->angle_delta[PLANE_TYPE_UV] = 0; mbmi->filter_intra_mode_info.use_filter_intra = 0; if (av1_is_wedge_used(bsize)) { - mbmi->use_wedge_interintra = aom_read_symbol( - r, ec_ctx->wedge_interintra_cdf[bsize], 2, ACCT_STR); + mbmi->use_wedge_interintra = + aom_read_symbol(r, ec_ctx->wedge_interintra_cdf[bsize], 2, + ACCT_INFO("use_wedge_interintra")); if (mbmi->use_wedge_interintra) { #if CONFIG_WEDGE_MOD_EXT mbmi->interintra_wedge_index = read_wedge_mode(r, ec_ctx, bsize); assert(mbmi->interintra_wedge_index != -1); #else mbmi->interintra_wedge_index = (int8_t)aom_read_symbol( - r, ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES, ACCT_STR); + r, ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES, + ACCT_INFO("interintra_wedge_index")); #endif } } @@ -2859,6 +3223,12 @@ mbmi->motion_mode = read_motion_mode(cm, xd, mbmi, r); #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_REFINEMV + if (!mbmi->skip_mode) { + read_refinemv_flag(cm, xd, r, bsize); + } +#endif // CONFIG_REFINEMV + // init mbmi->comp_group_idx = 0; mbmi->interinter_comp.type = COMPOUND_AVERAGE; @@ -2867,6 +3237,9 @@ #if CONFIG_OPTFLOW_REFINEMENT mbmi->mode < NEAR_NEARMV_OPTFLOW && #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + (!mbmi->refinemv_flag || !switchable_refinemv_flag(cm, mbmi)) && +#endif // CONFIG_REFINEMV #if IMPROVED_AMVD && CONFIG_JOINT_MVD !is_joint_amvd_coding_mode(mbmi->mode) && #endif // IMPROVED_AMVD && CONFIG_JOINT_MVD @@ -2878,7 +3251,8 @@ if (masked_compound_used) { const int ctx_comp_group_idx = get_comp_group_idx_context(cm, xd); mbmi->comp_group_idx = (uint8_t)aom_read_symbol( - r, ec_ctx->comp_group_idx_cdf[ctx_comp_group_idx], 2, ACCT_STR); + r, ec_ctx->comp_group_idx_cdf[ctx_comp_group_idx], 2, + ACCT_INFO("comp_group_idx")); } if (mbmi->comp_group_idx == 0) { @@ -2892,9 +3266,9 @@ // compound_diffwtd, wedge if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) { mbmi->interinter_comp.type = - COMPOUND_WEDGE + aom_read_symbol(r, - ec_ctx->compound_type_cdf[bsize], - MASKED_COMPOUND_TYPES, ACCT_STR); + COMPOUND_WEDGE + + aom_read_symbol(r, ec_ctx->compound_type_cdf[bsize], + MASKED_COMPOUND_TYPES, ACCT_INFO("comp_type")); } else { mbmi->interinter_comp.type = COMPOUND_DIFFWTD; } @@ -2905,17 +3279,41 @@ mbmi->interinter_comp.wedge_index = read_wedge_mode(r, ec_ctx, bsize); assert(mbmi->interinter_comp.wedge_index != -1); #else - mbmi->interinter_comp.wedge_index = (int8_t)aom_read_symbol( - r, ec_ctx->wedge_idx_cdf[bsize], MAX_WEDGE_TYPES, ACCT_STR); + mbmi->interinter_comp.wedge_index = + (int8_t)aom_read_symbol(r, ec_ctx->wedge_idx_cdf[bsize], + MAX_WEDGE_TYPES, ACCT_INFO("wedge_index")); #endif // CONFIG_WEDGE_MOD_EXT - mbmi->interinter_comp.wedge_sign = (int8_t)aom_read_bit(r, ACCT_STR); + mbmi->interinter_comp.wedge_sign = + (int8_t)aom_read_bit(r, ACCT_INFO("wedge_sign")); } else { assert(mbmi->interinter_comp.type == COMPOUND_DIFFWTD); mbmi->interinter_comp.mask_type = - aom_read_literal(r, MAX_DIFFWTD_MASK_BITS, ACCT_STR); + aom_read_literal(r, MAX_DIFFWTD_MASK_BITS, ACCT_INFO("mask_type")); } } } +#if CONFIG_CWP && CONFIG_SKIP_MODE_ENHANCEMENT + mbmi->cwp_idx = CWP_EQUAL; + if (cm->features.enable_cwp) { + if (is_cwp_allowed(mbmi) && !mbmi->skip_mode) + mbmi->cwp_idx = read_cwp_idx(xd, r, cm, mbmi); + if (is_cwp_allowed(mbmi) && mbmi->skip_mode) + mbmi->cwp_idx = +#if CONFIG_SEP_COMP_DRL + xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx[0]].cwp_idx; +#else + xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].cwp_idx; +#endif // CONFIG_SEP_COMP_DRL + } +#if CONFIG_REFINEMV + if (mbmi->skip_mode) { + mbmi->refinemv_flag = + (mbmi->cwp_idx == CWP_EQUAL && is_refinemv_allowed_skip_mode(cm, mbmi)) + ? 1 + : 0; + } +#endif // CONFIG_REFINEMV +#endif // CONFIG_CWP && CONFIG_SKIP_MODE_ENHANCEMENT read_mb_interp_filter(xd, features->interp_filter, cm, mbmi, r); @@ -2946,7 +3344,12 @@ } if (mbmi->motion_mode == WARP_EXTEND) { +#if CONFIG_SEP_COMP_DRL + CANDIDATE_MV *neighbor = + &xd->ref_mv_stack[ref_frame][get_ref_mv_idx(mbmi, 0)]; +#else CANDIDATE_MV *neighbor = &xd->ref_mv_stack[ref_frame][mbmi->ref_mv_idx]; +#endif POSITION base_pos = { 0, 0 }; if (!get_extend_base_pos(cm, xd, mbmi, neighbor->row_offset, neighbor->col_offset, &base_pos)) { @@ -3006,14 +3409,14 @@ if (xd->tree_type != LUMA_PART) xd->cfl.store_y = store_cfl_required(cm, xd); -#if CONFIG_REF_MV_BANK && !CONFIG_BVP_IMPROVEMENT +#if CONFIG_REF_MV_BANK && !CONFIG_IBC_BV_IMPROVEMENT #if CONFIG_IBC_SR_EXT if (cm->seq_params.enable_refmvbank && !is_intrabc_block(mbmi, xd->tree_type)) #else if (cm->seq_params.enable_refmvbank) #endif // CONFIG_IBC_SR_EXT av1_update_ref_mv_bank(cm, xd, mbmi); -#endif // CONFIG_REF_MV_BANK && !CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_REF_MV_BANK && !CONFIG_IBC_BV_IMPROVEMENT #if DEC_MISMATCH_DEBUG dec_dump_logs(cm, mi, mi_row, mi_col, mode_ctx); @@ -3045,10 +3448,70 @@ mbmi->bawp_flag = 0; #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + mbmi->segment_id = read_inter_segment_id(cm, xd, 1, r); mbmi->skip_mode = read_skip_mode(cm, xd, mbmi->segment_id, r); + mbmi->fsc_mode[PLANE_TYPE_Y] = 0; + mbmi->fsc_mode[PLANE_TYPE_UV] = 0; + +#if CONFIG_CWP + mbmi->cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP + +#if CONFIG_WARP_REF_LIST + mbmi->warp_ref_idx = 0; + mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP +#endif // CONFIG_WARP_REF_LIST +#if CONFIG_NEW_CONTEXT_MODELING + mbmi->use_intrabc[0] = 0; + mbmi->use_intrabc[1] = 0; +#endif // CONFIG_NEW_CONTEXT_MODELING + +#if CONFIG_SKIP_TXFM_OPT + if (!mbmi->skip_mode) { + inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); + } + +#if CONFIG_IBC_SR_EXT + if (!inter_block && av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) { +#if CONFIG_NEW_CONTEXT_MODELING + mbmi->use_intrabc[0] = 0; + mbmi->use_intrabc[1] = 0; + const int intrabc_ctx = get_intrabc_ctx(xd); + mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = + aom_read_symbol(r, xd->tile_ctx->intrabc_cdf[intrabc_ctx], 2, + ACCT_INFO("use_intrabc", "chroma")); +#else + mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = aom_read_symbol( + r, ec_ctx->intrabc_cdf, 2, ACCT_INFO("use_intrabc", "chroma")); +#endif // CONFIG_NEW_CONTEXT_MODELING + } +#endif // CONFIG_IBC_SR_EXT + + if (inter_block +#if CONFIG_IBC_SR_EXT + || (!inter_block && is_intrabc_block(mbmi, xd->tree_type)) +#endif // CONFIG_IBC_SR_EXT + ) { +#if !CONFIG_SKIP_MODE_ENHANCEMENT + if (mbmi->skip_mode) + mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 1; + else +#endif // !CONFIG_SKIP_MODE_ENHANCEMENT + mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = + read_skip_txfm(cm, xd, mbmi->segment_id, r); + } else { + mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0; + } +#else #if !CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode) mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 1; @@ -3056,17 +3519,8 @@ #endif // !CONFIG_SKIP_MODE_ENHANCEMENT mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = read_skip_txfm(cm, xd, mbmi->segment_id, r); +#endif // CONFIG_SKIP_TXFM_OPT - mbmi->fsc_mode[PLANE_TYPE_Y] = 0; - mbmi->fsc_mode[PLANE_TYPE_UV] = 0; -#if CONFIG_WARP_REF_LIST - mbmi->warp_ref_idx = 0; - mbmi->max_num_warp_candidates = 0; -#endif // CONFIG_WARP_REF_LIST -#if CONFIG_NEW_CONTEXT_MODELING - mbmi->use_intrabc[0] = 0; - mbmi->use_intrabc[1] = 0; -#endif // CONFIG_NEW_CONTEXT_MODELING if (!cm->seg.segid_preskip) mbmi->segment_id = read_inter_segment_id(cm, xd, 0, r); @@ -3078,6 +3532,7 @@ read_delta_q_params(cm, xd, r); +#if !CONFIG_SKIP_TXFM_OPT if (!mbmi->skip_mode) inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r @@ -3086,6 +3541,7 @@ mbmi->skip_txfm[xd->tree_type == CHROMA_PART] #endif // CONFIG_CONTEXT_DERIVATION ); +#endif // !CONFIG_SKIP_TXFM_OPT mbmi->current_qindex = xd->current_base_qindex; @@ -3100,6 +3556,17 @@ mbmi->ref_frame[1] = NONE_FRAME; mbmi->palette_mode_info.palette_size[0] = 0; mbmi->palette_mode_info.palette_size[1] = 0; +#if CONFIG_NEW_CONTEXT_MODELING + mbmi->use_intrabc[0] = 0; + mbmi->use_intrabc[1] = 0; + const int intrabc_ctx = get_intrabc_ctx(xd); + mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = + aom_read_symbol(r, xd->tile_ctx->intrabc_cdf[intrabc_ctx], 2, + ACCT_INFO("use_intrabc", "chroma")); +#else + mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = aom_read_symbol( + r, xd->tile_ctx->intrabc_cdf, 2, ACCT_INFO("use_intrabc", "chroma")); +#endif // CONFIG_NEW_CONTEXT_MODELING read_intrabc_info(cm, dcb, r); if (is_intrabc_block(mbmi, xd->tree_type)) return; } @@ -3160,31 +3627,33 @@ MACROBLOCKD *const xd = &dcb->xd; MB_MODE_INFO *const mi = xd->mi[0]; mi->use_intrabc[xd->tree_type == CHROMA_PART] = 0; - +#if CONFIG_CWG_D067_IMPROVED_WARP + mi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP if (xd->tree_type == SHARED_PART) mi->sb_type[PLANE_TYPE_UV] = mi->sb_type[PLANE_TYPE_Y]; if (frame_is_intra_only(cm)) { read_intra_frame_mode_info(cm, dcb, r); -#if CONFIG_BVP_IMPROVEMENT && CONFIG_REF_MV_BANK +#if CONFIG_IBC_BV_IMPROVEMENT && CONFIG_REF_MV_BANK if (cm->seq_params.enable_refmvbank) { MB_MODE_INFO *const mbmi = xd->mi[0]; if (is_intrabc_block(mbmi, xd->tree_type)) av1_update_ref_mv_bank(cm, xd, mbmi); } -#endif // CONFIG_BVP_IMPROVEMENT && CONFIG_REF_MV_BANK +#endif // CONFIG_IBC_BV_IMPROVEMENT && CONFIG_REF_MV_BANK if (cm->seq_params.order_hint_info.enable_ref_frame_mvs) intra_copy_frame_mvs(cm, xd->mi_row, xd->mi_col, x_inside_boundary, y_inside_boundary); } else { read_inter_frame_mode_info(pbi, dcb, r); -#if CONFIG_BVP_IMPROVEMENT && CONFIG_REF_MV_BANK +#if CONFIG_IBC_BV_IMPROVEMENT && CONFIG_REF_MV_BANK if (cm->seq_params.enable_refmvbank) { MB_MODE_INFO *const mbmi = xd->mi[0]; if (is_inter_block(mbmi, xd->tree_type)) av1_update_ref_mv_bank(cm, xd, mbmi); } -#endif // CONFIG_BVP_IMPROVEMENT && CONFIG_REF_MV_BANK +#endif // CONFIG_IBC_BV_IMPROVEMENT && CONFIG_REF_MV_BANK #if CONFIG_WARP_REF_LIST MB_MODE_INFO *const mbmi_tmp = xd->mi[0];
diff --git a/av1/decoder/decodemv.h b/av1/decoder/decodemv.h index ac2e813..208f5c5 100644 --- a/av1/decoder/decodemv.h +++ b/av1/decoder/decodemv.h
@@ -34,7 +34,12 @@ uint16_t *eob, aom_reader *r); void av1_read_tx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd, int blk_row, - int blk_col, TX_SIZE tx_size, aom_reader *r); + int blk_col, TX_SIZE tx_size, aom_reader *r +#if CONFIG_ATC_DCTX_ALIGNED + , + const int plane, const int eob, const int dc_skip +#endif // CONFIG_ATC_DCTX_ALIGNED +); #if CONFIG_CROSS_CHROMA_TX void av1_read_cctx_type(const AV1_COMMON *const cm, MACROBLOCKD *xd,
diff --git a/av1/decoder/decoder.c b/av1/decoder/decoder.c index 56b05f5..4bb23aa 100644 --- a/av1/decoder/decoder.c +++ b/av1/decoder/decoder.c
@@ -486,8 +486,37 @@ update_subgop_stats(cm, &pbi->subgop_stats, cm->cur_frame->order_hint, pbi->enable_subgop_stats); } - +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (cm->seq_params.order_hint_info.enable_order_hint && + cm->seq_params.enable_frame_output_order && cm->show_frame && + !cm->show_existing_frame) { + // Refresh the reference slots of output frames in the output queue. + if (pbi->num_output_frames > 0) { + decrease_ref_count(pbi->output_frames[0], pool); + } + // Add the currently decoded frame into the output queue. + pbi->output_frames[0] = cm->cur_frame; + pbi->num_output_frames = 1; + // Add the next frames (showable_frame == 1) into the output queue. + int successive_output = 1; + for (int k = 1; k <= REF_FRAMES && successive_output > 0; k++) { + unsigned int next_disp_order = cm->cur_frame->display_order_hint + k; + successive_output = 0; + for (int i = 0; i < REF_FRAMES; i++) { + if (cm->ref_frame_map[i]->display_order_hint == next_disp_order && + cm->ref_frame_map[i]->showable_frame == 1) { + pbi->output_frames[k] = cm->ref_frame_map[i]; + pbi->num_output_frames++; + successive_output++; + } + } + } + } else if ((!cm->seq_params.order_hint_info.enable_order_hint || + !cm->seq_params.enable_frame_output_order) && + (cm->show_existing_frame || cm->show_frame)) { +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT if (cm->show_existing_frame || cm->show_frame) { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT if (pbi->output_all_layers) { // Append this frame to the output queue if (pbi->num_output_frames >= MAX_NUM_SPATIAL_LAYERS) { @@ -645,7 +674,15 @@ // TODO(rachelbarker): What should this do? int av1_get_frame_to_show(AV1Decoder *pbi, YV12_BUFFER_CONFIG *frame) { if (pbi->num_output_frames == 0) return -1; - - *frame = pbi->output_frames[pbi->num_output_frames - 1]->buf; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + const size_t out_frame_idx = + (pbi->common.seq_params.order_hint_info.enable_order_hint && + pbi->common.seq_params.enable_frame_output_order) + ? 0 + : pbi->num_output_frames - 1; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + const size_t out_frame_idx = pbi->num_output_frames - 1; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + *frame = pbi->output_frames[out_frame_idx]->buf; return 0; }
diff --git a/av1/decoder/decoder.h b/av1/decoder/decoder.h index b4f734b..11dc3f7 100644 --- a/av1/decoder/decoder.h +++ b/av1/decoder/decoder.h
@@ -62,6 +62,13 @@ * with appropriate offset for the current superblock, for each plane. */ tran_low_t *dqcoeff_block[MAX_MB_PLANE]; +#if CONFIG_INSPECTION + // dqcoeff_block gets clobbered before the inspect callback happens, so keep a + // copy here. + tran_low_t *dqcoeff_block_copy[MAX_MB_PLANE]; + tran_low_t *qcoeff_block[MAX_MB_PLANE]; + tran_low_t *dequant_values[MAX_MB_PLANE]; +#endif /*! * cb_offset[p] is the offset into the dqcoeff_block[p] for the current coding * block, for each plane 'p'. @@ -76,6 +83,13 @@ * with appropriate offset for the current superblock, for each plane. */ eob_info *eob_data[MAX_MB_PLANE]; +#if CONFIG_ATC_DCTX_ALIGNED + /*! + * Pointer to 'bob_data' inside 'td->cb_buffer_base' or 'pbi->cb_buffer_base' + * with appropriate offset for the current superblock, for each plane. + */ + eob_info *bob_data[MAX_MB_PLANE]; +#endif // CONFIG_ATC_DCTX_ALIGNED /*! * txb_offset[p] is the offset into the eob_data[p] for the current coding * block, for each plane 'p'. @@ -273,7 +287,11 @@ // Note: The saved buffers are released at the start of the next time the // application calls aom_codec_decode(). int output_all_layers; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + RefCntBuffer *output_frames[REF_FRAMES]; // Use only for single layer +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT RefCntBuffer *output_frames[MAX_NUM_SPATIAL_LAYERS]; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT size_t num_output_frames; // How many frames are queued up so far? // In order to properly support random-access decoding, we need @@ -296,7 +314,10 @@ int sequence_header_ready; int sequence_header_changed; #if CONFIG_INSPECTION + // Inspection callback at the end of each frame. aom_inspect_cb inspect_cb; + // Inspection callback at the end of each superblock. + aom_inspect_cb inspect_sb_cb; void *inspect_ctx; #endif int operating_point; @@ -408,16 +429,15 @@ } } -#define ACCT_STR __func__ static INLINE int av1_read_uniform(aom_reader *r, int n) { const int l = get_unsigned_bits(n); const int m = (1 << l) - n; - const int v = aom_read_literal(r, l - 1, ACCT_STR); + const int v = aom_read_literal(r, l - 1, ACCT_INFO("v")); assert(l != 0); if (v < m) return v; else - return (v << 1) - m + aom_read_literal(r, 1, ACCT_STR); + return (v << 1) - m + aom_read_literal(r, 1, ACCT_INFO()); } typedef void (*palette_visitor_fn_t)(MACROBLOCKD *const xd, int plane,
diff --git a/av1/decoder/decodetxb.c b/av1/decoder/decodetxb.c index 91046a3..cb43098 100644 --- a/av1/decoder/decodetxb.c +++ b/av1/decoder/decodetxb.c
@@ -20,24 +20,22 @@ #include "av1/common/reconintra.h" #include "av1/decoder/decodemv.h" -#define ACCT_STR __func__ - static int read_golomb(MACROBLOCKD *xd, aom_reader *r) { int x = 1; int length = 0; #if CONFIG_BYPASS_IMPROVEMENT - length = aom_read_unary(r, 21, ACCT_STR); + length = aom_read_unary(r, 21, ACCT_INFO("length")); if (length > 20) { aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME, "Invalid length in read_golomb"); } x = 1 << length; - x += aom_read_literal(r, length, ACCT_STR); + x += aom_read_literal(r, length, ACCT_INFO()); #else int i = 0; while (!i) { - i = aom_read_bit(r, ACCT_STR); + i = aom_read_bit(r, ACCT_INFO()); ++length; if (length > 20) { aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME, @@ -47,7 +45,7 @@ } for (i = 0; i < length - 1; ++i) { x <<= 1; - x += aom_read_bit(r, ACCT_STR); + x += aom_read_bit(r, ACCT_INFO()); } #endif // CONFIG_BYPASS_IMPROVEMENT @@ -73,42 +71,45 @@ static INLINE void read_coeffs_reverse_2d( aom_reader *r, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC TX_SIZE tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC int start_si, int end_si, const int16_t *scan, int bwl, uint8_t *levels, -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC base_lf_cdf_arr base_lf_cdf, br_cdf_arr br_lf_cdf, int plane, -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC base_cdf_arr base_cdf, br_cdf_arr br_cdf) { for (int c = end_si; c >= start_si; --c) { const int pos = scan[c]; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC int level = 0; const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, 0, plane); if (limits) { const int coeff_ctx = get_lower_levels_ctx_lf_2d(levels, pos, bwl); - level += - aom_read_symbol(r, base_lf_cdf[coeff_ctx], LF_BASE_SYMBOLS, ACCT_STR); + level += aom_read_symbol(r, base_lf_cdf[coeff_ctx], LF_BASE_SYMBOLS, + ACCT_INFO("level", "base_lf_cdf")); if (level > LF_NUM_BASE_LEVELS) { const int br_ctx = get_br_lf_ctx_2d(levels, pos, bwl); aom_cdf_prob *cdf = br_lf_cdf[br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = + aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_lf_cdf")); level += k; if (k < BR_CDF_SIZE - 1) break; } } } else { const int coeff_ctx = get_lower_levels_ctx_2d(levels, pos, bwl); - level += aom_read_symbol(r, base_cdf[coeff_ctx], 4, ACCT_STR); + level += aom_read_symbol(r, base_cdf[coeff_ctx], 4, + ACCT_INFO("level", "base_cdf")); if (level > NUM_BASE_LEVELS) { const int br_ctx = get_br_ctx_2d(levels, pos, bwl); aom_cdf_prob *cdf = br_cdf[br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = + aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf")); level += k; if (k < BR_CDF_SIZE - 1) break; } @@ -117,62 +118,67 @@ #else const int coeff_ctx = get_lower_levels_ctx_2d(levels, pos, bwl, tx_size); const int nsymbs = 4; - int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_STR); + int level = + aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_INFO("level")); if (level > NUM_BASE_LEVELS) { const int br_ctx = get_br_ctx_2d(levels, pos, bwl); aom_cdf_prob *cdf = br_cdf[br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = + aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf")); level += k; if (k < BR_CDF_SIZE - 1) break; } } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC levels[get_padded_idx(pos, bwl)] = level; } } static INLINE void read_coeffs_reverse(aom_reader *r, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC TX_SIZE tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC TX_CLASS tx_class, int start_si, int end_si, const int16_t *scan, int bwl, uint8_t *levels, -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC base_lf_cdf_arr base_lf_cdf, br_cdf_arr br_lf_cdf, int plane, -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC base_cdf_arr base_cdf, br_cdf_arr br_cdf) { for (int c = end_si; c >= start_si; --c) { const int pos = scan[c]; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC int level = 0; const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); if (limits) { const int coeff_ctx = get_lower_levels_lf_ctx(levels, pos, bwl, tx_class); - level += - aom_read_symbol(r, base_lf_cdf[coeff_ctx], LF_BASE_SYMBOLS, ACCT_STR); + level += aom_read_symbol(r, base_lf_cdf[coeff_ctx], LF_BASE_SYMBOLS, + ACCT_INFO("level", "base_lf_cdf")); if (level > LF_NUM_BASE_LEVELS) { const int br_ctx = get_br_lf_ctx(levels, pos, bwl, tx_class); aom_cdf_prob *cdf = br_lf_cdf[br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = + aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_lf_cdf")); level += k; if (k < BR_CDF_SIZE - 1) break; } } } else { const int coeff_ctx = get_lower_levels_ctx(levels, pos, bwl, tx_class); - level += aom_read_symbol(r, base_cdf[coeff_ctx], 4, ACCT_STR); + level += aom_read_symbol(r, base_cdf[coeff_ctx], 4, + ACCT_INFO("level", "base_cdf")); if (level > NUM_BASE_LEVELS) { const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class); aom_cdf_prob *cdf = br_cdf[br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = + aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf")); level += k; if (k < BR_CDF_SIZE - 1) break; } @@ -182,17 +188,19 @@ const int coeff_ctx = get_lower_levels_ctx(levels, pos, bwl, tx_size, tx_class); const int nsymbs = 4; - int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_STR); + int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, + ACCT_INFO("level", "base_cdf")); if (level > NUM_BASE_LEVELS) { const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class); aom_cdf_prob *cdf = br_cdf[br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = + aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("br_cdf", "k")); level += k; if (k < BR_CDF_SIZE - 1) break; } } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC levels[get_padded_idx(pos, bwl)] = level; } } @@ -206,12 +214,14 @@ const int pos = scan[c]; const int coeff_ctx = get_upper_levels_ctx_2d(levels, pos, bwl); const int nsymbs = 4; - int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, ACCT_STR); + int level = aom_read_symbol(r, base_cdf[coeff_ctx], nsymbs, + ACCT_INFO("level", "base_cdf")); if (level > NUM_BASE_LEVELS) { const int br_ctx = get_br_ctx_skip(levels, pos, bwl); aom_cdf_prob *cdf = br_cdf[br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = + aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf")); level += k; if (k < BR_CDF_SIZE - 1) break; } @@ -220,6 +230,98 @@ } } +#if CONFIG_ATC_DCTX_ALIGNED +// Decode the end-of-block syntax. +static INLINE void decode_eob(DecoderCodingBlock *dcb, aom_reader *const r, + const int plane, const TX_SIZE tx_size) { + MACROBLOCKD *const xd = &dcb->xd; + const PLANE_TYPE plane_type = get_plane_type(plane); + FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; + const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); + eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane]; + uint16_t *const eob = &(eob_data->eob); + eob_info *bob_data = dcb->bob_data[plane] + dcb->txb_offset[plane]; + uint16_t *const bob = &(bob_data->eob); + + int eob_extra = 0; + int eob_pt = 1; + const int eob_multi_size = txsize_log2_minus4[tx_size]; + switch (eob_multi_size) { + case 0: + eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf16[plane_type], + EOB_MAX_SYMS - 6, + ACCT_INFO("eob_pt", "eob_multi_size:0")) + + 1; + break; + case 1: + eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf32[plane_type], + EOB_MAX_SYMS - 5, + ACCT_INFO("eob_pt", "eob_multi_size:1")) + + 1; + break; + case 2: + eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf64[plane_type], + EOB_MAX_SYMS - 4, + ACCT_INFO("eob_pt", "eob_multi_size:2")) + + 1; + break; + case 3: + eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf128[plane_type], + EOB_MAX_SYMS - 3, + ACCT_INFO("eob_pt", "eob_multi_size:3")) + + 1; + break; + case 4: + eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf256[plane_type], + EOB_MAX_SYMS - 2, + ACCT_INFO("eob_pt", "eob_multi_size:4")) + + 1; + break; + case 5: + eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf512[plane_type], + EOB_MAX_SYMS - 1, + ACCT_INFO("eob_pt", "eob_multi_size:5")) + + 1; + break; + case 6: + default: + eob_pt = + aom_read_symbol(r, ec_ctx->eob_flag_cdf1024[plane_type], EOB_MAX_SYMS, + ACCT_INFO("eob_pt", "eob_multi_size:6")) + + 1; + break; + } + const int eob_offset_bits = av1_eob_offset_bits[eob_pt]; + if (eob_offset_bits > 0) { + const int eob_ctx = eob_pt - 3; + int bit = + aom_read_symbol(r, ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], + 2, ACCT_INFO("eob_extra_cdf")); + if (bit) { + eob_extra += (1 << (eob_offset_bits - 1)); + } +#if CONFIG_BYPASS_IMPROVEMENT + eob_extra += + aom_read_literal(r, eob_offset_bits - 1, ACCT_INFO("eob_extra")); +#else + for (int i = 1; i < eob_offset_bits; i++) { + bit = aom_read_bit(r, ACCT_INFO("eob_offset_bits")); + if (bit) { + eob_extra += (1 << (eob_offset_bits - 1 - i)); + } + } +#endif // CONFIG_BYPASS_IMPROVEMENT + } + *eob = rec_eob_pos(eob_pt, eob_extra); + *bob = *eob; // escape character +#if CONFIG_CONTEXT_DERIVATION + if (plane == AOM_PLANE_U) { + xd->eob_u = *eob; + } +#endif // CONFIG_CONTEXT_DERIVATION +} +#endif // CONFIG_ATC_DCTX_ALIGNED + uint8_t av1_read_sig_txtype(const AV1_COMMON *const cm, DecoderCodingBlock *dcb, aom_reader *const r, const int blk_row, const int blk_col, const int plane, @@ -229,6 +331,10 @@ FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); +#if CONFIG_ATC_DCTX_ALIGNED + const int is_inter = is_inter_block(xd->mi[0], xd->tree_type); +#endif // CONFIG_ATC_DCTX_ALIGNED + eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane]; uint16_t *const eob = &(eob_data->eob); uint16_t *const max_scan_line = &(eob_data->max_scan_line); @@ -250,15 +356,16 @@ int all_zero; if (plane == AOM_PLANE_Y || plane == AOM_PLANE_U) { all_zero = aom_read_symbol(r, ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], - 2, ACCT_STR); + 2, ACCT_INFO("all_zero", "plane_y_or_u")); } else { txb_skip_ctx += (xd->eob_u_flag ? V_TXB_SKIP_CONTEXT_OFFSET : 0); - all_zero = - aom_read_symbol(r, ec_ctx->v_txb_skip_cdf[txb_skip_ctx], 2, ACCT_STR); + all_zero = aom_read_symbol(r, ec_ctx->v_txb_skip_cdf[txb_skip_ctx], 2, + ACCT_INFO("all_zero", "plane_v")); } #else - const int all_zero = aom_read_symbol( - r, ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2, ACCT_STR); + const int all_zero = + aom_read_symbol(r, ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], + 2, ACCT_INFO("all_zero")); #endif // CONFIG_CONTEXT_DERIVATION #if CONFIG_INSPECTION @@ -276,7 +383,7 @@ } #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CROSS_CHROMA_TX +#if CONFIG_CROSS_CHROMA_TX && !CONFIG_ATC_DCTX_ALIGNED if (plane == AOM_PLANE_U && is_cctx_allowed(cm, xd)) { if (!all_zero) { av1_read_cctx_type(cm, xd, blk_row, blk_col, tx_size, r); @@ -291,7 +398,7 @@ CCTX_NONE); } } -#endif // CONFIG_CROSS_CHROMA_TX +#endif // CONFIG_CROSS_CHROMA_TX && !CONFIG_ATC_DCTX_ALIGNED if (all_zero) { *max_scan_line = 0; @@ -300,9 +407,33 @@ } return 0; } +#if CONFIG_ATC_DCTX_ALIGNED + decode_eob(dcb, r, plane, tx_size); + av1_read_tx_type(cm, xd, blk_row, blk_col, tx_size, r, plane, *eob, + is_inter ? 0 : *eob); + +#if CONFIG_CROSS_CHROMA_TX + if (plane == AOM_PLANE_U && is_cctx_allowed(cm, xd)) { + const int skip_cctx = is_inter ? 0 : (*eob == 1); + if (!all_zero && !skip_cctx) { + av1_read_cctx_type(cm, xd, blk_row, blk_col, tx_size, r); + } else { + int row_offset, col_offset; +#if CONFIG_EXT_RECUR_PARTITIONS + get_chroma_mi_offsets(xd, &row_offset, &col_offset); +#else + get_chroma_mi_offsets(xd, tx_size, &row_offset, &col_offset); +#endif // CONFIG_EXT_RECUR_PARTITIONS + update_cctx_array(xd, blk_row, blk_col, row_offset, col_offset, tx_size, + CCTX_NONE); + } + } +#endif // CONFIG_CROSS_CHROMA_TX +#else if (plane == AOM_PLANE_Y) { // only y plane's tx_type is transmitted av1_read_tx_type(cm, xd, blk_row, blk_col, tx_size, r); } +#endif // CONFIG_ATC_DCTX_ALIGNED return 1; } @@ -324,6 +455,18 @@ const int shift = av1_get_tx_scale(tx_size); const int bwl = get_txb_bwl(tx_size); const int width = get_txb_wide(tx_size); + const int height = get_txb_high(tx_size); +#if CONFIG_INSPECTION + tran_low_t *const tcoeffs_copy = + dcb->dqcoeff_block_copy[plane] + dcb->cb_offset[plane]; + tran_low_t *const quant_coeffs = + dcb->qcoeff_block[plane] + dcb->cb_offset[plane]; + tran_low_t *const dequant_values = + dcb->dequant_values[plane] + dcb->cb_offset[plane]; + memset(tcoeffs_copy, 0, sizeof(tran_low_t) * width * height); + memset(quant_coeffs, 0, sizeof(tran_low_t) * width * height); + memset(dequant_values, 0, sizeof(tran_low_t) * width * height); +#endif // CONFIG_INSPECTION int cul_level = 0; int dc_val = 0; uint8_t levels_buf[TX_PAD_2D]; @@ -333,12 +476,21 @@ eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane]; eob_data->max_scan_line = 0; eob_data->eob = av1_get_max_eob(tx_size); +#if CONFIG_ATC_DCTX_ALIGNED + eob_info *bob_data = dcb->bob_data[plane] + dcb->txb_offset[plane]; + bob_data->max_scan_line = 0; +#endif // CONFIG_ATC_DCTX_ALIGNED const TX_TYPE tx_type = av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size, cm->features.reduced_tx_set_used); const qm_val_t *iqmatrix = av1_get_iqmatrix(&cm->quant_params, xd, plane, tx_size, tx_type); +#if CONFIG_INSPECTION + for (int c = 0; c < width * height; c++) { + dequant_values[c] = get_dqv(dequant, c, iqmatrix); + } +#endif // CONFIG_INSPECTION const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); const int16_t *const scan = scan_order->scan; @@ -347,8 +499,34 @@ memset(signs_buf, 0, sizeof(*signs_buf) * TX_PAD_2D); base_cdf_arr base_cdf = ec_ctx->coeff_base_cdf_idtx; br_cdf_arr br_cdf = ec_ctx->coeff_br_cdf_idtx; +#if CONFIG_ATC_DCTX_ALIGNED + const int bob = av1_get_max_eob(tx_size) - bob_data->eob; + { + const int pos = scan[bob]; + const int coeff_ctx_bob = get_lower_levels_ctx_bob(bwl, height, bob); + const int nsymbs_bob = 3; + aom_cdf_prob *cdf_bob = ec_ctx->coeff_base_bob_cdf[coeff_ctx_bob]; + int level = aom_read_symbol(r, cdf_bob, nsymbs_bob, + ACCT_INFO("level", "cdf_bob")) + + 1; + if (level > NUM_BASE_LEVELS) { + const int br_ctx = get_br_ctx_skip(levels, pos, bwl); + aom_cdf_prob *cdf = br_cdf[br_ctx]; + for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { + const int k = + aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_INFO("k", "br_cdf")); + level += k; + if (k < BR_CDF_SIZE - 1) break; + } + } + levels[get_padded_idx_left(pos, bwl)] = level; + } + read_coeffs_forward_2d(r, bob + 1, eob_data->eob - 1, scan, bwl, levels, + base_cdf, br_cdf); +#else read_coeffs_forward_2d(r, 0, eob_data->eob - 1, scan, bwl, levels, base_cdf, br_cdf); +#endif // CONFIG_ATC_DCTX_ALIGNED } for (int c = eob_data->eob - 1; c >= 0; --c) { @@ -358,8 +536,8 @@ if (level) { eob_data->max_scan_line = AOMMAX(eob_data->max_scan_line, pos); int idtx_sign_ctx = get_sign_ctx_skip(signs, levels, pos, bwl); - sign = - aom_read_symbol(r, ec_ctx->idtx_sign_cdf[idtx_sign_ctx], 2, ACCT_STR); + sign = aom_read_symbol(r, ec_ctx->idtx_sign_cdf[idtx_sign_ctx], 2, + ACCT_INFO("sign")); signs[get_padded_idx(pos, bwl)] = sign > 0 ? -1 : 1; if (level >= MAX_BASE_BR_RANGE) { level += read_golomb(xd, r); @@ -381,6 +559,10 @@ dq_coeff = -dq_coeff; } tcoeffs[pos] = clamp(dq_coeff, min_value, max_value); +#if CONFIG_INSPECTION + tcoeffs_copy[pos] = tcoeffs[pos]; + quant_coeffs[pos] = sign ? -level : level; +#endif // CONFIG_INSPECTION } } cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level); @@ -399,13 +581,14 @@ int q_index; const int pos = scan[0]; int ctx_idx = get_base_ctx_ph(levels, pos, bwl, tx_class); - q_index = aom_read_symbol(r, base_cdf_ph[ctx_idx], 4, ACCT_STR); + q_index = aom_read_symbol(r, base_cdf_ph[ctx_idx], 4, ACCT_INFO("q_index")); if (q_index > NUM_BASE_LEVELS) { ctx_idx = get_par_br_ctx(levels, pos, bwl, tx_class); aom_cdf_prob *cdf_br = br_cdf_ph[ctx_idx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf_br, BR_CDF_SIZE, ACCT_STR); + const int k = + aom_read_symbol(r, cdf_br, BR_CDF_SIZE, ACCT_INFO("k", "cdf_br")); q_index += k; if (k < BR_CDF_SIZE - 1) break; } @@ -436,15 +619,32 @@ const int bwl = get_txb_bwl(tx_size); const int width = get_txb_wide(tx_size); const int height = get_txb_high(tx_size); +#if CONFIG_INSPECTION + tran_low_t *const tcoeffs_copy = + dcb->dqcoeff_block_copy[plane] + dcb->cb_offset[plane]; + tran_low_t *const quant_coeffs = + dcb->qcoeff_block[plane] + dcb->cb_offset[plane]; + tran_low_t *const dequant_values = + dcb->dequant_values[plane] + dcb->cb_offset[plane]; + memset(tcoeffs_copy, 0, sizeof(tran_low_t) * width * height); + memset(quant_coeffs, 0, sizeof(tran_low_t) * width * height); + memset(dequant_values, 0, sizeof(tran_low_t) * width * height); +#endif // CONFIG_INSPECTION int cul_level = 0; int dc_val = 0; uint8_t levels_buf[TX_PAD_2D]; uint8_t *const levels = set_levels(levels_buf, width); +#if !CONFIG_ATC_DCTX_ALIGNED eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane]; uint16_t *const eob = &(eob_data->eob); uint16_t *const max_scan_line = &(eob_data->max_scan_line); *max_scan_line = 0; *eob = 0; +#else + eob_info *eob_data = dcb->eob_data[plane] + dcb->txb_offset[plane]; + uint16_t *const eob = &(eob_data->eob); + uint16_t *const max_scan_line = &(eob_data->max_scan_line); +#endif // !CONFIG_ATC_DCTX_ALIGNED #if DEBUG_EXTQUANT fprintf(cm->fDecCoeffLog, @@ -459,8 +659,14 @@ const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)]; const qm_val_t *iqmatrix = av1_get_iqmatrix(&cm->quant_params, xd, plane, tx_size, tx_type); +#if CONFIG_INSPECTION + for (int c = 0; c < width * height; c++) { + dequant_values[c] = get_dqv(dequant, c, iqmatrix); + } +#endif // CONFIG_INSPECTION const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); const int16_t *const scan = scan_order->scan; +#if !CONFIG_ATC_DCTX_ALIGNED int eob_extra = 0; int eob_pt = 1; @@ -470,44 +676,44 @@ case 0: eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf16[plane_type][eob_multi_ctx], - 5, ACCT_STR) + + 5, ACCT_INFO("eob_pt", "eob_multi_size:0")) + 1; break; case 1: eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf32[plane_type][eob_multi_ctx], - 6, ACCT_STR) + + 6, ACCT_INFO("eob_pt", "eob_multi_size:1")) + 1; break; case 2: eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf64[plane_type][eob_multi_ctx], - 7, ACCT_STR) + + 7, ACCT_INFO("eob_pt", "eob_multi_size:2")) + 1; break; case 3: eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf128[plane_type][eob_multi_ctx], - 8, ACCT_STR) + + 8, ACCT_INFO("eob_pt", "eob_multi_size:3")) + 1; break; case 4: eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf256[plane_type][eob_multi_ctx], - 9, ACCT_STR) + + 9, ACCT_INFO("eob_pt", "eob_multi_size:4")) + 1; break; case 5: eob_pt = aom_read_symbol(r, ec_ctx->eob_flag_cdf512[plane_type][eob_multi_ctx], - 10, ACCT_STR) + + 10, ACCT_INFO("eob_pt", "eob_multi_size:5")) + 1; break; case 6: default: eob_pt = aom_read_symbol( r, ec_ctx->eob_flag_cdf1024[plane_type][eob_multi_ctx], 11, - ACCT_STR) + + ACCT_INFO("eob_pt", "eob_multi_size:6")) + 1; break; } @@ -515,17 +721,19 @@ const int eob_offset_bits = av1_eob_offset_bits[eob_pt]; if (eob_offset_bits > 0) { const int eob_ctx = eob_pt - 3; - int bit = aom_read_symbol( - r, ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2, ACCT_STR); + int bit = + aom_read_symbol(r, ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], + 2, ACCT_INFO("eob_offset_bits")); if (bit) { eob_extra += (1 << (eob_offset_bits - 1)); } #if CONFIG_BYPASS_IMPROVEMENT - eob_extra += aom_read_literal(r, eob_offset_bits - 1, ACCT_STR); + eob_extra += + aom_read_literal(r, eob_offset_bits - 1, ACCT_INFO("eob_extra")); #else for (int i = 1; i < eob_offset_bits; i++) { - bit = aom_read_bit(r, ACCT_STR); + bit = aom_read_bit(r, ACCT_INFO("eob_offset_bits")); if (bit) { eob_extra += (1 << (eob_offset_bits - 1 - i)); } @@ -539,10 +747,15 @@ xd->eob_u = *eob; } #endif // CONFIG_CONTEXT_DERIVATION +#endif // !CONFIG_ATC_DCTX_ALIGNED // read sec_tx_type here // Only y plane's sec_tx_type is transmitted - if ((plane == AOM_PLANE_Y) && (cm->seq_params.enable_ist)) { + if ((plane == AOM_PLANE_Y) && (cm->seq_params.enable_ist) +#if CONFIG_ATC_DCTX_ALIGNED + && (*eob != 1) +#endif // CONFIG_ATC_DCTX_ALIGNED + ) { av1_read_sec_tx_type(cm, xd, blk_row, blk_col, tx_size, eob, r); } // @@ -560,7 +773,7 @@ const int c = *eob - 1; const int pos = scan[c]; const int coeff_ctx = get_lower_levels_ctx_eob(bwl, height, c); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC int level = 0; const int row = pos >> bwl; const int col = pos - (row << bwl); @@ -568,12 +781,15 @@ if (limits) { aom_cdf_prob *cdf = ec_ctx->coeff_base_lf_eob_cdf[txs_ctx][plane_type][coeff_ctx]; - level += aom_read_symbol(r, cdf, LF_BASE_SYMBOLS - 1, ACCT_STR) + 1; + level += aom_read_symbol(r, cdf, LF_BASE_SYMBOLS - 1, + ACCT_INFO("level", "coeff_base_lf_eob_cdf")) + + 1; if (level > LF_NUM_BASE_LEVELS) { const int br_ctx = get_br_ctx_lf_eob(pos, tx_class); cdf = ec_ctx->coeff_br_lf_cdf[plane_type][br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, + ACCT_INFO("k", "coeff_br_lf_cdf")); level += k; if (k < BR_CDF_SIZE - 1) break; } @@ -581,12 +797,15 @@ } else { aom_cdf_prob *cdf = ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx]; - level += aom_read_symbol(r, cdf, 3, ACCT_STR) + 1; + level += + aom_read_symbol(r, cdf, 3, ACCT_INFO("level", "coeff_base_eob_cdf")) + + 1; if (level > NUM_BASE_LEVELS) { const int br_ctx = 0; /* get_lf_ctx_eob */ cdf = ec_ctx->coeff_br_cdf[plane_type][br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, + ACCT_INFO("k", "coeff_br_cdf")); level += k; if (k < BR_CDF_SIZE - 1) break; } @@ -596,17 +815,20 @@ const int nsymbs = 3; aom_cdf_prob *cdf = ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx]; - int level = aom_read_symbol(r, cdf, nsymbs, ACCT_STR) + 1; + int level = aom_read_symbol(r, cdf, nsymbs, + ACCT_INFO("level", "coeff_base_eob_cdf")) + + 1; if (level > NUM_BASE_LEVELS) { const int br_ctx = get_br_ctx_eob(pos, bwl, tx_class); cdf = ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx]; for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, ACCT_STR); + const int k = aom_read_symbol(r, cdf, BR_CDF_SIZE, + ACCT_INFO("k", "coeff_br_cdf")); level += k; if (k < BR_CDF_SIZE - 1) break; } } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC levels[get_padded_idx(pos, bwl)] = level; } #if CONFIG_PAR_HIDING @@ -617,21 +839,21 @@ bool is_hidden = false; #endif // CONFIG_PAR_HIDING if (*eob > 1) { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC base_lf_cdf_arr base_lf_cdf = ec_ctx->coeff_base_lf_cdf[txs_ctx][plane_type]; br_cdf_arr br_lf_cdf = ec_ctx->coeff_br_lf_cdf[plane_type]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC base_cdf_arr base_cdf = ec_ctx->coeff_base_cdf[txs_ctx][plane_type]; br_cdf_arr br_cdf = -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC ec_ctx->coeff_br_cdf[plane_type]; #else ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #if CONFIG_PAR_HIDING if (tx_class == TX_CLASS_2D) { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC read_coeffs_reverse_2d(r, 1, *eob - 2, scan, bwl, levels, base_lf_cdf, br_lf_cdf, plane, base_cdf, br_cdf); if (enable_parity_hiding) { @@ -675,9 +897,9 @@ read_coeffs_reverse(r, tx_size, tx_class, 0, 0, scan, bwl, levels, base_cdf, br_cdf); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC read_coeffs_reverse(r, tx_class, 1, *eob - 2, scan, bwl, levels, base_lf_cdf, br_lf_cdf, plane, base_cdf, br_cdf); if (enable_parity_hiding) { @@ -721,11 +943,11 @@ read_coeffs_reverse(r, tx_size, tx_class, 0, 0, scan, bwl, levels, base_cdf, br_cdf); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } #else if (tx_class == TX_CLASS_2D) { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC read_coeffs_reverse_2d(r, 1, *eob - 2, scan, bwl, levels, base_lf_cdf, br_lf_cdf, plane, base_cdf, br_cdf); read_coeffs_reverse(r, tx_class, 0, 0, scan, bwl, levels, base_lf_cdf, @@ -735,15 +957,15 @@ base_cdf, br_cdf); read_coeffs_reverse(r, tx_size, tx_class, 0, 0, scan, bwl, levels, base_cdf, br_cdf); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC read_coeffs_reverse(r, tx_class, 0, *eob - 2, scan, bwl, levels, base_lf_cdf, br_lf_cdf, plane, base_cdf, br_cdf); #else read_coeffs_reverse(r, tx_size, tx_class, 0, *eob - 2, scan, bwl, levels, base_cdf, br_cdf); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } #endif // CONFIG_PAR_HIDING } @@ -764,35 +986,37 @@ #if CONFIG_CONTEXT_DERIVATION if (plane == AOM_PLANE_Y || plane == AOM_PLANE_U) { sign = aom_read_symbol( - r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], 2, ACCT_STR); + r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], 2, + ACCT_INFO("sign", "dc_sign_cdf", "plane_y_or_u")); } else { int32_t tmp_sign = 0; if (c < xd->eob_u) tmp_sign = xd->tmp_sign[0]; - sign = aom_read_symbol( - r, ec_ctx->v_dc_sign_cdf[tmp_sign][dc_sign_ctx], 2, ACCT_STR); + sign = + aom_read_symbol(r, ec_ctx->v_dc_sign_cdf[tmp_sign][dc_sign_ctx], + 2, ACCT_INFO("sign", "v_dc_sign_cdf", "plane_v")); } if (plane == AOM_PLANE_U) xd->tmp_sign[0] = (sign ? 2 : 1); #else sign = aom_read_symbol(r, ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], - 2, ACCT_STR); + 2, ACCT_INFO("sign", "dc_sign_cdf")); #endif // CONFIG_CONTEXT_DERIVATION } else { #if CONFIG_CONTEXT_DERIVATION if (plane == AOM_PLANE_Y || plane == AOM_PLANE_U) - sign = aom_read_bit(r, ACCT_STR); + sign = aom_read_bit(r, ACCT_INFO("sign", "plane_y_or_u")); else { int32_t tmp_sign = 0; if (c < xd->eob_u) tmp_sign = xd->tmp_sign[pos]; - sign = - aom_read_symbol(r, ec_ctx->v_ac_sign_cdf[tmp_sign], 2, ACCT_STR); + sign = aom_read_symbol(r, ec_ctx->v_ac_sign_cdf[tmp_sign], 2, + ACCT_INFO("sign", "v_ac_sign_cdf", "plane_v")); } if (plane == AOM_PLANE_U) xd->tmp_sign[pos] = (sign ? 2 : 1); #else - sign = aom_read_bit(r, ACCT_STR); + sign = aom_read_bit(r, ACCT_INFO("sign")); #endif // CONFIG_CONTEXT_DERIVATION } #if CONFIG_PAR_HIDING -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (is_hidden && c == 0) { if (level >= (MAX_BASE_BR_RANGE << 1)) { level += (read_golomb(xd, r) << 1); @@ -821,9 +1045,9 @@ level += read_golomb(xd, r); } } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #else -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -840,7 +1064,7 @@ if (level >= MAX_BASE_BR_RANGE) { level += read_golomb(xd, r); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #endif // CONFIG_PAR_HIDING if (c == 0) dc_val = sign ? -level : level; @@ -860,6 +1084,10 @@ dq_coeff = -dq_coeff; } tcoeffs[pos] = clamp(dq_coeff, min_value, max_value); +#if CONFIG_INSPECTION + tcoeffs_copy[pos] = tcoeffs[pos]; + quant_coeffs[pos] = sign ? -level : level; +#endif // CONFIG_INSPECTION } } #if DEBUG_EXTQUANT
diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c index 1fb2975..b71fd40 100644 --- a/av1/decoder/detokenize.c +++ b/av1/decoder/detokenize.c
@@ -17,8 +17,6 @@ #include "av1/common/blockd.h" #include "av1/decoder/detokenize.h" -#define ACCT_STR __func__ - #include "av1/common/common.h" #include "av1/common/entropy.h" #include "av1/common/idct.h" @@ -33,13 +31,13 @@ int rows = param->rows; int cols = param->cols; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS IdentityRowCdf identity_row_cdf = param->identity_row_cdf; int prev_identity_row_flag = 0; for (int y = 0; y < rows; y++) { const int ctx = y == 0 ? 2 : prev_identity_row_flag; - int identity_row_flag = - aom_read_symbol(r, identity_row_cdf[ctx], 2, ACCT_STR); + int identity_row_flag = aom_read_symbol(r, identity_row_cdf[ctx], 2, + ACCT_INFO("identity_row_flag")); for (int x = 0; x < cols; x++) { if (identity_row_flag && x > 0) { color_map[y * plane_block_width + x] = @@ -50,8 +48,9 @@ const int color_ctx = av1_get_palette_color_index_context( color_map, plane_block_width, y, x, n, color_order, NULL, identity_row_flag, prev_identity_row_flag); - const int color_idx = aom_read_symbol( - r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, ACCT_STR); + const int color_idx = + aom_read_symbol(r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], + n, ACCT_INFO("color_idx")); assert(color_idx >= 0 && color_idx < n); color_map[y * plane_block_width + x] = color_order[color_idx]; } @@ -68,8 +67,9 @@ for (int j = AOMMIN(i, cols - 1); j >= AOMMAX(0, i - rows + 1); --j) { const int color_ctx = av1_get_palette_color_index_context( color_map, plane_block_width, (i - j), j, n, color_order, NULL); - const int color_idx = aom_read_symbol( - r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, ACCT_STR); + const int color_idx = + aom_read_symbol(r, color_map_cdf[n - PALETTE_MIN_SIZE][color_ctx], n, + ACCT_INFO("color_idx")); assert(color_idx >= 0 && color_idx < n); color_map[(i - j) * plane_block_width + j] = color_order[color_idx]; } @@ -98,10 +98,10 @@ xd->plane[plane].color_index_map + xd->color_index_map_offset[plane]; params.map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf : xd->tile_ctx->palette_y_color_index_cdf; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS params.identity_row_cdf = plane ? xd->tile_ctx->identity_row_cdf_uv : xd->tile_ctx->identity_row_cdf_y; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS const MB_MODE_INFO *const mbmi = xd->mi[0]; params.n_colors = mbmi->palette_mode_info.palette_size[plane]; av1_get_block_dimensions(mbmi->sb_type[plane > 0], plane, xd,
diff --git a/av1/decoder/inspection.c b/av1/decoder/inspection.c index 75e06cc..2cba1fb 100644 --- a/av1/decoder/inspection.c +++ b/av1/decoder/inspection.c
@@ -11,6 +11,7 @@ */ #include "av1/decoder/decoder.h" #include "av1/decoder/inspection.h" +#include "av1/common/blockd.h" #include "av1/common/enums.h" #include "av1/common/cdef.h" @@ -19,6 +20,12 @@ fd->mi_rows = mi_rows; fd->mi_grid = (insp_mi_data *)aom_malloc(sizeof(insp_mi_data) * fd->mi_rows * fd->mi_cols); + fd->max_sb_rows = + (mi_rows + (1 << MIN_MIB_SIZE_LOG2) - 1) / (1 << MIN_MIB_SIZE_LOG2); + fd->max_sb_cols = + (mi_cols + (1 << MIN_MIB_SIZE_LOG2) - 1) / (1 << MIN_MIB_SIZE_LOG2); + fd->sb_grid = (insp_sb_data *)aom_calloc(sizeof(insp_sb_data), + fd->max_sb_rows * fd->max_sb_cols); } void ifd_init(insp_frame_data *fd, int frame_width, int frame_height) { @@ -30,6 +37,70 @@ void ifd_clear(insp_frame_data *fd) { aom_free(fd->mi_grid); fd->mi_grid = NULL; + for (int i = 0; i < fd->max_sb_rows; i++) { + for (int j = 0; j < fd->max_sb_cols; j++) { + insp_sb_data *sb = &fd->sb_grid[i * fd->max_sb_cols + j]; + // Note: NULL checking happens within av1_free_ptree_recursive + av1_free_ptree_recursive(sb->partition_tree_luma); + av1_free_ptree_recursive(sb->partition_tree_chroma); + } + } + aom_free(fd->sb_grid); + fd->sb_grid = NULL; +} + +PARTITION_TREE *copy_partition_tree(PARTITION_TREE *orig, + PARTITION_TREE *parent) { + PARTITION_TREE *copy = av1_alloc_ptree_node(NULL, 0); + memcpy(copy, orig, sizeof(PARTITION_TREE)); + copy->parent = parent; + for (size_t i = 0; i < sizeof(copy->sub_tree) / sizeof(copy->sub_tree[0]); + i++) { + if (copy->sub_tree[i] != NULL) { + copy->sub_tree[i] = copy_partition_tree(orig->sub_tree[i], copy); + } + } + return copy; +} + +int ifd_inspect_superblock(insp_frame_data *fd, void *decoder) { + struct AV1Decoder *pbi = (struct AV1Decoder *)decoder; + AV1_COMMON *const cm = &pbi->common; + const CommonModeInfoParams *const mi_params = &cm->mi_params; + if (fd->mi_rows != mi_params->mi_rows || fd->mi_cols != mi_params->mi_cols) { + ifd_clear(fd); + ifd_init_mi_rc(fd, mi_params->mi_rows, mi_params->mi_cols); + } + + int sb_size = cm->seq_params.sb_size; + int sb_width = mi_size_wide[sb_size]; + int sb_height = mi_size_high[sb_size]; + + int sb_row = pbi->td.dcb.xd.sbi->mi_row / sb_height; + int sb_col = pbi->td.dcb.xd.sbi->mi_col / sb_width; + + PARTITION_TREE *luma_tree = pbi->td.dcb.xd.sbi->ptree_root[0]; + PARTITION_TREE *chroma_tree = pbi->td.dcb.xd.sbi->ptree_root[1]; + insp_sb_data *sb = &fd->sb_grid[sb_row * fd->max_sb_cols + sb_col]; + sb->partition_tree_luma = copy_partition_tree(luma_tree, NULL); + // Semi-decoupled partitioning is enabled only for intra-frames. + int use_sdp = (frame_is_intra_only(cm) && !cm->seq_params.monochrome && + cm->seq_params.enable_sdp); + if (chroma_tree != NULL && use_sdp) { + sb->partition_tree_chroma = copy_partition_tree(chroma_tree, NULL); + } else { + // For consistency, use a copy of the luma tree when SDP is not enabled for + // the frame. + sb->partition_tree_chroma = copy_partition_tree(luma_tree, NULL); + } + sb->has_separate_chroma_partition_tree = use_sdp; + + for (int i = 0; i < MAX_MB_PLANE; i++) { + memcpy(sb->dqcoeff[i], pbi->td.dcb.dqcoeff_block_copy[i], MAX_SB_SQUARE); + memcpy(sb->qcoeff[i], pbi->td.dcb.qcoeff_block[i], MAX_SB_SQUARE); + memcpy(sb->dequant_values[i], pbi->td.dcb.dequant_values[i], MAX_SB_SQUARE); + } + return 1; } /* TODO(negge) This function may be called by more than one thread when using @@ -39,7 +110,9 @@ AV1_COMMON *const cm = &pbi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params; const CommonQuantParams *quant_params = &cm->quant_params; - + fd->recon_frame_buffer = cm->cur_frame->buf; + fd->predicted_frame_buffer = cm->predicted_pixels; + fd->prefiltered_frame_buffer = cm->prefiltered_pixels; if (fd->mi_rows != mi_params->mi_rows || fd->mi_cols != mi_params->mi_cols) { ifd_clear(fd); ifd_init_mi_rc(fd, mi_params->mi_rows, mi_params->mi_cols); @@ -49,6 +122,7 @@ fd->show_frame = cm->show_frame; fd->frame_type = cm->current_frame.frame_type; fd->base_qindex = quant_params->base_qindex; + fd->superblock_size = cm->seq_params.sb_size; // Set width and height of the first tile until generic support can be added TileInfo tile_info; av1_tile_set_row(&tile_info, cm, 0); @@ -57,6 +131,11 @@ fd->tile_mi_rows = tile_info.mi_row_end - tile_info.mi_row_start; fd->delta_q_present_flag = cm->delta_q_info.delta_q_present_flag; fd->delta_q_res = cm->delta_q_info.delta_q_res; + fd->bit_depth = cm->seq_params.bit_depth; + fd->width = cm->width; + fd->height = cm->height; + fd->render_width = cm->render_width; + fd->render_height = cm->render_height; #if CONFIG_ACCOUNTING fd->accounting = &pbi->accounting; #endif @@ -101,14 +180,15 @@ // Block Size mi->sb_type = mbmi->sb_type[0]; + mi->sb_type_chroma = mbmi->sb_type[1]; // Skip Flag + // TODO(comc): Check handling of skip_txfm vs tx_skip. mi->skip = mbmi->skip_txfm[0]; mi->filter[0] = mbmi->interp_fltr; mi->filter[1] = mbmi->interp_fltr; mi->dual_filter_type = mi->filter[0] * 3 + mi->filter[1]; // Transform - // TODO(anyone): extract tx type info from mbmi->txk_type[]. const BLOCK_SIZE bsize = mbmi->sb_type[0]; const int c = i % mi_size_wide[bsize]; const int r = j % mi_size_high[bsize]; @@ -120,19 +200,19 @@ if (skip_not_transform && mi->skip) mi->tx_size = -1; - if (mi->skip) { - const int tx_type_row = j - j % tx_size_high_unit[mi->tx_size]; - const int tx_type_col = i - i % tx_size_wide_unit[mi->tx_size]; - const int tx_type_map_idx = - tx_type_row * mi_params->mi_stride + tx_type_col; - mi->tx_type = mi_params->tx_type_map[tx_type_map_idx]; - } else { - mi->tx_type = 0; - } + const int tx_type_row = j - j % tx_size_high_unit[mi->tx_size]; + const int tx_type_col = i - i % tx_size_wide_unit[mi->tx_size]; + const int tx_type_map_idx = + tx_type_row * mi_params->mi_stride + tx_type_col; + mi->tx_type = mi_params->tx_type_map[tx_type_map_idx]; + + bool skip = mbmi->tx_skip[av1_get_txk_type_index(bsize, r, c)]; + mi->skip |= skip; if (skip_not_transform && - (mi->skip || mbmi->tx_skip[av1_get_txk_type_index(bsize, r, c)])) + (mi->skip || mbmi->tx_skip[av1_get_txk_type_index(bsize, r, c)])) { mi->tx_type = -1; + } mi->cdef_level = cm->cdef_info.cdef_strengths[mbmi->cdef_strength] / CDEF_SEC_STRENGTHS;
diff --git a/av1/decoder/inspection.h b/av1/decoder/inspection.h index 19d4f5f..cb8175f 100644 --- a/av1/decoder/inspection.h +++ b/av1/decoder/inspection.h
@@ -16,6 +16,7 @@ extern "C" { #endif // __cplusplus +#include "av1/common/blockd.h" #include "av1/common/seg_common.h" #if CONFIG_ACCOUNTING #include "av1/decoder/accounting.h" @@ -32,6 +33,12 @@ int16_t col; }; +typedef struct insp_pixel_data insp_pixel_data; + +struct insp_pixel_data { + int16_t samples[MAX_SB_SIZE][MAX_SB_SIZE]; +}; + typedef struct insp_mi_data insp_mi_data; struct insp_mi_data { @@ -40,6 +47,7 @@ int16_t mode; int16_t uv_mode; int16_t sb_type; + int16_t sb_type_chroma; int16_t skip; int16_t segment_id; int16_t dual_filter_type; @@ -58,6 +66,19 @@ int16_t uv_palette; }; +typedef struct insp_sb_data insp_sb_data; + +struct insp_sb_data { + PARTITION_TREE *partition_tree_luma; + PARTITION_TREE *partition_tree_chroma; + bool has_separate_chroma_partition_tree; + int16_t prediction_samples[MAX_SB_SIZE][MAX_SB_SIZE]; + int16_t recon_samples[MAX_SB_SIZE][MAX_SB_SIZE]; + tran_low_t dqcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]; + tran_low_t qcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]; + tran_low_t dequant_values[MAX_MB_PLANE][MAX_SB_SQUARE]; +}; + typedef struct insp_frame_data insp_frame_data; struct insp_frame_data { @@ -65,6 +86,9 @@ Accounting *accounting; #endif insp_mi_data *mi_grid; + insp_sb_data *sb_grid; + int max_sb_rows; + int max_sb_cols; int16_t frame_number; int show_frame; int frame_type; @@ -82,10 +106,21 @@ int delta_q_present_flag; int delta_q_res; int show_existing_frame; + int superblock_size; + // Points to the same underlying allocations as the decoder + YV12_BUFFER_CONFIG recon_frame_buffer; + YV12_BUFFER_CONFIG predicted_frame_buffer; + YV12_BUFFER_CONFIG prefiltered_frame_buffer; + int bit_depth; + int render_width; + int render_height; + int width; + int height; }; void ifd_init(insp_frame_data *fd, int frame_width, int frame_height); void ifd_clear(insp_frame_data *fd); +int ifd_inspect_superblock(insp_frame_data *fd, void *decoder); int ifd_inspect(insp_frame_data *fd, void *decoder, int skip_not_transform); #ifdef __cplusplus
diff --git a/av1/encoder/bitstream.c b/av1/encoder/bitstream.c index 5a695bc..252b2ab 100644 --- a/av1/encoder/bitstream.c +++ b/av1/encoder/bitstream.c
@@ -145,27 +145,61 @@ // 0 -> 0 10 -> 1 110 -> 2 111 -> 3 // Also use the number of reference MVs for a frame type to reduce the // number of bits written if there are less than 4 valid DRL indices. -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SEP_COMP_DRL + if (has_second_drl(mbmi)) { + if (mbmi->mode == NEAR_NEWMV) + max_drl_bits = AOMMIN(max_drl_bits, SEP_COMP_DRL_SIZE); + else + assert(mbmi->mode == NEAR_NEARMV); + } + +#if CONFIG_SKIP_MODE_ENHANCEMENT + if (mbmi->skip_mode) + assert(mbmi->ref_mv_idx[0] < + mbmi_ext_frame->skip_mvp_candidate_list.ref_mv_count); + else +#endif // CONFIG_SKIP_MODE_ENHANCEMENT + assert(mbmi->ref_mv_idx[0] < mbmi_ext_frame->ref_mv_count[0]); + if (has_second_drl(mbmi)) + assert(mbmi->ref_mv_idx[1] < mbmi_ext_frame->ref_mv_count[1]); + assert(mbmi->ref_mv_idx[0] < max_drl_bits + 1); + if (has_second_drl(mbmi)) assert(mbmi->ref_mv_idx[1] < max_drl_bits + 1); + for (int ref = 0; ref < 1 + has_second_drl(mbmi); ref++) { + for (int idx = 0; idx < max_drl_bits; ++idx) { + aom_cdf_prob *drl_cdf = +#if CONFIG_SKIP_MODE_ENHANCEMENT + mbmi->skip_mode ? ec_ctx->skip_drl_cdf[AOMMIN(idx, 2)] + : av1_get_drl_cdf(ec_ctx, mbmi_ext_frame->weight[ref], + mode_ctx, idx); +#else + av1_get_drl_cdf(ec_ctx, mbmi_ext_frame->weight[ref], mode_ctx, idx); +#endif // CONFIG_SKIP_MODE_ENHANCEMENT + aom_write_symbol(w, mbmi->ref_mv_idx[ref] != idx, drl_cdf, 2); + if (mbmi->ref_mv_idx[ref] == idx) break; + } + } +#else +#if CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode) assert(mbmi->ref_mv_idx < mbmi_ext_frame->skip_mvp_candidate_list.ref_mv_count); else -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT assert(mbmi->ref_mv_idx < mbmi_ext_frame->ref_mv_count); - assert(mbmi->ref_mv_idx < max_drl_bits + 1); for (int idx = 0; idx < max_drl_bits; ++idx) { aom_cdf_prob *drl_cdf = -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT mbmi->skip_mode ? ec_ctx->skip_drl_cdf[AOMMIN(idx, 2)] : av1_get_drl_cdf(ec_ctx, mbmi_ext_frame->weight, mode_ctx, idx); #else av1_get_drl_cdf(ec_ctx, mbmi_ext_frame->weight, mode_ctx, idx); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT aom_write_symbol(w, mbmi->ref_mv_idx != idx, drl_cdf, 2); if (mbmi->ref_mv_idx == idx) break; } +#endif // CONFIG_SEP_COMP_DRL } #if CONFIG_WARP_REF_LIST static void write_warp_ref_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi, @@ -184,6 +218,16 @@ if (mbmi->warp_ref_idx == bit_idx) break; } } +#if CONFIG_CWG_D067_IMPROVED_WARP +static void write_warpmv_with_mvd_flag(FRAME_CONTEXT *ec_ctx, + const MB_MODE_INFO *mbmi, + aom_writer *w) { + aom_write_symbol( + w, mbmi->warpmv_with_mvd_flag, + ec_ctx->warpmv_with_mvd_flag_cdf[mbmi->sb_type[PLANE_TYPE_Y]], 2); +} +#endif // CONFIG_CWG_D067_IMPROVED_WARP + #endif // CONFIG_WARP_REF_LIST #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD @@ -208,6 +252,24 @@ } #endif // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD +#if CONFIG_CWP +// Write the index for the weighting factor of compound weighted prediction +static AOM_INLINE void write_cwp_idx(MACROBLOCKD *xd, aom_writer *w, + const AV1_COMMON *const cm, + const MB_MODE_INFO *const mbmi) { + const int8_t final_idx = get_cwp_coding_idx(mbmi->cwp_idx, 1, cm, mbmi); + + int bit_cnt = 0; + const int ctx = 0; + for (int idx = 0; idx < MAX_CWP_NUM - 1; ++idx) { + aom_write_symbol(w, final_idx != idx, + xd->tile_ctx->cwp_idx_cdf[ctx][bit_cnt], 2); + if (final_idx == idx) break; + ++bit_cnt; + } +} +#endif // CONFIG_CWP + static AOM_INLINE void write_inter_compound_mode(MACROBLOCKD *xd, aom_writer *w, PREDICTION_MODE mode, #if CONFIG_OPTFLOW_REFINEMENT @@ -397,10 +459,10 @@ static AOM_INLINE void write_is_inter(const AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id, aom_writer *w, const int is_inter -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT , const int skip_txfm -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT ) { if (segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) { assert(is_inter); @@ -408,11 +470,11 @@ } const int ctx = av1_get_intra_inter_context(xd); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT aom_write_symbol(w, is_inter, ec_ctx->intra_inter_cdf[skip_txfm][ctx], 2); #else aom_write_symbol(w, is_inter, ec_ctx->intra_inter_cdf[ctx], 2); -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT } #if CONFIG_WEDGE_MOD_EXT @@ -464,7 +526,11 @@ #if !CONFIG_WARPMV write_warp_ref_idx(xd->tile_ctx, mbmi, w); #endif // !CONFIG_WARPMV - if (!allow_warp_parameter_signaling(mbmi)) { + if (!allow_warp_parameter_signaling( +#if CONFIG_CWG_D067_IMPROVED_WARP + cm, +#endif // CONFIG_CWG_D067_IMPROVED_WARP + mbmi)) { return; } #endif // CONFIG_WARP_REF_LIST @@ -501,7 +567,11 @@ const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame, aom_writer *w) { const BLOCK_SIZE bsize = mbmi->sb_type[PLANE_TYPE_Y]; const int allowed_motion_modes = +#if CONFIG_SEP_COMP_DRL + motion_mode_allowed(cm, xd, mbmi_ext_frame->ref_mv_stack[0], mbmi); +#else motion_mode_allowed(cm, xd, mbmi_ext_frame->ref_mv_stack, mbmi); +#endif // CONFIG_SEP_COMP_DRL assert((allowed_motion_modes & (1 << mbmi->motion_mode)) != 0); assert((cm->features.enabled_motion_modes & (1 << mbmi->motion_mode)) != 0); @@ -662,7 +732,7 @@ } } -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS static AOM_INLINE void pack_map_tokens(aom_writer *w, const TokenExtra **tp, int n, int cols, int rows) { const TokenExtra *p = *tp; @@ -693,7 +763,7 @@ } *tp = p; } -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS static AOM_INLINE void av1_write_coeffs_txb_facade( aom_writer *w, AV1_COMMON *cm, MACROBLOCK *const x, MACROBLOCKD *xd, @@ -1016,7 +1086,12 @@ #if CONFIG_OPTFLOW_REFINEMENT // Sharp filter is always used whenever optical flow refinement is applied. int mb_interp_filter = - (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)) + (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi) + +#if CONFIG_REFINEMV + || mbmi->refinemv_flag +#endif // CONFIG_REFINEMV + ) ? MULTITAP_SHARP : cm->features.interp_filter; #else @@ -1029,10 +1104,20 @@ } if (cm->features.interp_filter == SWITCHABLE) { #if CONFIG_OPTFLOW_REFINEMENT - if (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)) { + if (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi) +#if CONFIG_REFINEMV + || mbmi->refinemv_flag +#endif // CONFIG_REFINEMV + ) { +#if CONFIG_REFINEMV + assert(IMPLIES(mbmi->mode >= NEAR_NEARMV_OPTFLOW || + use_opfl_refine_all(cm, mbmi) || mbmi->refinemv_flag, + mbmi->interp_fltr == MULTITAP_SHARP)); +#else assert(IMPLIES( mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi), mbmi->interp_fltr == MULTITAP_SHARP)); +#endif // CONFIG_REFINEMV return; } #endif // CONFIG_OPTFLOW_REFINEMENT @@ -1202,16 +1287,23 @@ } void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, - TX_TYPE tx_type, TX_SIZE tx_size, aom_writer *w) { + TX_TYPE tx_type, TX_SIZE tx_size, aom_writer *w +#if CONFIG_ATC_DCTX_ALIGNED + , + const int plane, const int eob, const int dc_skip) { + if (plane != PLANE_TYPE_Y || dc_skip) return; +#else +) { +#endif // CONFIG_ATC_DCTX_ALIGNED MB_MODE_INFO *mbmi = xd->mi[0]; -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC PREDICTION_MODE intra_dir; if (mbmi->filter_intra_mode_info.use_filter_intra) intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode]; else intra_dir = mbmi->mode; -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC const FeatureFlags *const features = &cm->features; const int is_inter = is_inter_block(mbmi, xd->tree_type); if (get_ext_tx_types(tx_size, is_inter, features->reduced_tx_set_used) > 1 && @@ -1228,7 +1320,7 @@ // eset == 0 should correspond to a set with only DCT_DCT and there // is no need to send the tx_type assert(eset > 0); -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC const int size_info = av1_size_class[tx_size]; if (!is_inter) { const int mode_info = av1_md_class[intra_dir]; @@ -1240,25 +1332,33 @@ } #else assert(av1_ext_tx_used[tx_set_type][get_primary_tx_type(tx_type)]); -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC if (is_inter) { +#if CONFIG_ATC_DCTX_ALIGNED + const int eob_tx_ctx = get_lp2tx_ctx(tx_size, get_txb_bwl(tx_size), eob); + aom_write_symbol( + w, av1_ext_tx_ind[tx_set_type][tx_type], + ec_ctx->inter_ext_tx_cdf[eset][eob_tx_ctx][square_tx_size], + av1_num_ext_tx_set[tx_set_type]); +#else aom_write_symbol(w, av1_ext_tx_ind[tx_set_type][tx_type], ec_ctx->inter_ext_tx_cdf[eset][square_tx_size], av1_num_ext_tx_set[tx_set_type]); +#endif // CONFIG_ATC_DCTX_ALIGNED } else { if (mbmi->fsc_mode[xd->tree_type == CHROMA_PART]) { return; } -#if !CONFIG_ATC_NEWTXSETS +#if !CONFIG_ATC PREDICTION_MODE intra_dir; if (mbmi->filter_intra_mode_info.use_filter_intra) intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode]; else intra_dir = mbmi->mode; -#endif // !CONFIG_ATC_NEWTXSETS +#endif // !CONFIG_ATC aom_write_symbol( -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC w, av1_tx_type_to_idx(get_primary_tx_type(tx_type), tx_set_type, intra_dir, size_info), @@ -1276,7 +1376,7 @@ w, av1_ext_tx_ind_intra[tx_set_type][get_primary_tx_type(tx_type)], ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_dir], av1_num_ext_tx_set_intra[tx_set_type]); -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC } } } @@ -1347,9 +1447,19 @@ INTRA_MODES); } #endif // !CONFIG_AIMC -static AOM_INLINE void write_mrl_index(FRAME_CONTEXT *ec_ctx, uint8_t mrl_index, - aom_writer *w) { +static AOM_INLINE void write_mrl_index(FRAME_CONTEXT *ec_ctx, +#if CONFIG_EXT_DIR + const MB_MODE_INFO *neighbor0, + const MB_MODE_INFO *neighbor1, +#endif // CONFIG_EXT_DIR + uint8_t mrl_index, aom_writer *w) { +#if CONFIG_EXT_DIR + int ctx = get_mrl_index_ctx(neighbor0, neighbor1); + aom_cdf_prob *mrl_cdf = ec_ctx->mrl_index_cdf[ctx]; + aom_write_symbol(w, mrl_index, mrl_cdf, MRL_LINE_NUMBER); +#else aom_write_symbol(w, mrl_index, ec_ctx->mrl_index_cdf, MRL_LINE_NUMBER); +#endif // CONFIG_EXT_DIR } static AOM_INLINE void write_fsc_mode(uint8_t fsc_mode, aom_writer *w, @@ -1645,7 +1755,11 @@ #endif // CONFIG_AIMC // Encoding reference line index if (cm->seq_params.enable_mrls && av1_is_directional_mode(mode)) { - write_mrl_index(ec_ctx, mbmi->mrl_index, w); + write_mrl_index(ec_ctx, +#if CONFIG_EXT_DIR + xd->neighbors[0], xd->neighbors[1], +#endif // CONFIG_EXT_DIR + mbmi->mrl_index, w); } } @@ -1705,19 +1819,39 @@ static INLINE int_mv get_ref_mv_from_stack( int ref_idx, const MV_REFERENCE_FRAME *ref_frame, int ref_mv_idx, - const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame) { + const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame +#if CONFIG_SEP_COMP_DRL + , + const MB_MODE_INFO *mbmi +#endif // CONFIG_SEP_COMP_DRL +) { const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); +#if CONFIG_SEP_COMP_DRL + const CANDIDATE_MV *curr_ref_mv_stack = + has_second_drl(mbmi) ? mbmi_ext_frame->ref_mv_stack[ref_idx] + : mbmi_ext_frame->ref_mv_stack[0]; +#else const CANDIDATE_MV *curr_ref_mv_stack = mbmi_ext_frame->ref_mv_stack; +#endif // CONFIG_SEP_COMP_DRL if (is_inter_ref_frame(ref_frame[1])) { assert(ref_idx == 0 || ref_idx == 1); +#if CONFIG_SEP_COMP_DRL + return ref_idx && !has_second_drl(mbmi) + ? curr_ref_mv_stack[ref_mv_idx].comp_mv +#else return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv - : curr_ref_mv_stack[ref_mv_idx].this_mv; +#endif // CONFIG_SEP_COMP_DRL + : curr_ref_mv_stack[ref_mv_idx].this_mv; } assert(ref_idx == 0); #if CONFIG_TIP +#if CONFIG_SEP_COMP_DRL + if (ref_mv_idx < mbmi_ext_frame->ref_mv_count[0]) { +#else if (ref_mv_idx < mbmi_ext_frame->ref_mv_count) { +#endif // CONFIG_SEP_COMP_DRL return curr_ref_mv_stack[ref_mv_idx].this_mv; } else if (is_tip_ref_frame(ref_frame_type)) { int_mv zero_mv; @@ -1736,13 +1870,42 @@ static INLINE int_mv get_ref_mv(const MACROBLOCK *x, int ref_idx) { const MACROBLOCKD *xd = &x->e_mbd; const MB_MODE_INFO *mbmi = xd->mi[0]; +#if CONFIG_SEP_COMP_DRL + const int ref_mv_idx = get_ref_mv_idx(mbmi, ref_idx); +#else const int ref_mv_idx = mbmi->ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL assert(IMPLIES(have_nearmv_newmv_in_inter_mode(mbmi->mode), has_second_ref(mbmi))); return get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx, +#if CONFIG_SEP_COMP_DRL + x->mbmi_ext_frame, mbmi); +#else x->mbmi_ext_frame); +#endif // CONFIG_SEP_COMP_DRL } +#if CONFIG_REFINEMV +// This function write the refinemv_flag ( if require) to the bitstream +static void write_refinemv_flag(const AV1_COMMON *const cm, + MACROBLOCKD *const xd, aom_writer *w, + BLOCK_SIZE bsize) { + const MB_MODE_INFO *const mbmi = xd->mi[0]; + int signal_refinemv = switchable_refinemv_flag(cm, mbmi); + + if (signal_refinemv) { + const int refinemv_ctx = av1_get_refinemv_context(cm, xd, bsize); + assert(mbmi->refinemv_flag < REFINEMV_NUM_MODES); + aom_write_symbol(w, mbmi->refinemv_flag, + xd->tile_ctx->refinemv_flag_cdf[refinemv_ctx], + REFINEMV_NUM_MODES); + + } else { + assert(mbmi->refinemv_flag == get_default_refinemv_flag(cm, mbmi)); + } +} +#endif // CONFIG_REFINEMV + #if CONFIG_FLEX_MVRES static void write_pb_mv_precision(const AV1_COMMON *const cm, MACROBLOCKD *const xd, aom_writer *w) { @@ -1823,6 +1986,39 @@ write_skip_mode(cm, xd, segment_id, mbmi, w); +#if CONFIG_SKIP_TXFM_OPT + if (!mbmi->skip_mode) { + write_is_inter(cm, xd, mbmi->segment_id, w, is_inter); + +#if CONFIG_IBC_SR_EXT + if (!is_inter && av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) { + const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type); + if (xd->tree_type == CHROMA_PART) assert(use_intrabc == 0); +#if CONFIG_NEW_CONTEXT_MODELING + const int intrabc_ctx = get_intrabc_ctx(xd); + aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf[intrabc_ctx], 2); +#else + aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2); +#endif // CONFIG_NEW_CONTEXT_MODELING + } +#endif // CONFIG_IBC_SR_EXT + } + + int skip = 0; + if (is_inter +#if CONFIG_IBC_SR_EXT + || (!is_inter && is_intrabc_block(mbmi, xd->tree_type)) +#endif // CONFIG_IBC_SR_EXT + ) { +#if CONFIG_SKIP_MODE_ENHANCEMENT + skip = write_skip(cm, xd, segment_id, mbmi, w); +#else + assert(IMPLIES(mbmi->skip_mode, + mbmi->skip_txfm[xd->tree_type == CHROMA_PART])); + skip = mbmi->skip_mode ? 1 : write_skip(cm, xd, segment_id, mbmi, w); +#endif // !CONFIG_SKIP_MODE_ENHANCEMENT + } +#else #if CONFIG_SKIP_MODE_ENHANCEMENT const int skip = write_skip(cm, xd, segment_id, mbmi, w); #else @@ -1831,6 +2027,7 @@ const int skip = mbmi->skip_mode ? 1 : write_skip(cm, xd, segment_id, mbmi, w); #endif // !CONFIG_SKIP_MODE_ENHANCEMENT +#endif // CONFIG_SKIP_TXFM_OPT write_inter_segment_id(cpi, w, seg, segp, skip, 0); write_cdef(cm, xd, w, skip); @@ -1841,13 +2038,31 @@ write_delta_q_params(cpi, skip, w); +#if CONFIG_REFINEMV + assert(IMPLIES(mbmi->refinemv_flag, + mbmi->skip_mode ? is_refinemv_allowed_skip_mode(cm, mbmi) + : is_refinemv_allowed(cm, mbmi, bsize))); + if (mbmi->refinemv_flag && switchable_refinemv_flag(cm, mbmi)) { + assert(mbmi->interinter_comp.type == COMPOUND_AVERAGE); + assert(mbmi->comp_group_idx == 0); + assert(mbmi->bawp_flag == 0); + } +#if CONFIG_CWP + assert(IMPLIES(mbmi->refinemv_flag, mbmi->cwp_idx == CWP_EQUAL)); +#endif // CONFIG_CWP +#endif // CONFIG_REFINEMV #if CONFIG_WARPMV // Just for debugging purpose if (mbmi->mode == WARPMV) { assert(mbmi->skip_mode == 0); assert(mbmi->motion_mode == WARP_DELTA || mbmi->motion_mode == WARPED_CAUSAL); +#if CONFIG_SEP_COMP_DRL + assert(get_ref_mv_idx(mbmi, 0) == 0); + assert(get_ref_mv_idx(mbmi, 1) == 0); +#else assert(mbmi->ref_mv_idx == 0); +#endif // CONFIG_SEP_COMP_DRL assert(!is_tip_ref_frame(mbmi->ref_frame[0])); assert(is_inter); assert(!have_drl_index(mode)); @@ -1860,6 +2075,7 @@ } #endif // CONFIG_WARPMV +#if !CONFIG_SKIP_TXFM_OPT if (!mbmi->skip_mode) write_is_inter(cm, xd, mbmi->segment_id, w, is_inter #if CONFIG_CONTEXT_DERIVATION @@ -1867,6 +2083,7 @@ skip #endif // CONFIG_CONTEXT_DERIVATION ); +#endif // !CONFIG_SKIP_TXFM_OPT #if CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode) { @@ -1881,6 +2098,13 @@ #if CONFIG_IBC_SR_EXT if (!is_inter && av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) { +#if CONFIG_NEW_CONTEXT_MODELING + const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type); + const int intrabc_ctx = get_intrabc_ctx(xd); + aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf[intrabc_ctx], 2); +#else + aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2); +#endif // CONFIG_NEW_CONTEXT_MODELING write_intrabc_info(xd, mbmi_ext_frame, w); if (is_intrabc_block(mbmi, xd->tree_type)) return; } @@ -1941,11 +2165,19 @@ #endif write_motion_mode(cm, xd, mbmi, mbmi_ext_frame, w); int is_warpmv_warp_causal = - (mbmi->motion_mode == WARPED_CAUSAL && mbmi->mode == WARPMV); + ((mbmi->motion_mode == WARPED_CAUSAL) && mbmi->mode == WARPMV); if (mbmi->motion_mode == WARP_DELTA || is_warpmv_warp_causal) write_warp_ref_idx(xd->tile_ctx, mbmi, w); #endif // CONFIG_WARPMV +#if CONFIG_CWG_D067_IMPROVED_WARP + if (allow_warpmv_with_mvd_coding(cm, mbmi)) { + write_warpmv_with_mvd_flag(xd->tile_ctx, mbmi, w); + } else { + assert(mbmi->warpmv_with_mvd_flag == 0); + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD write_jmvd_scale_mode(xd, w, mbmi); #endif // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD @@ -1963,7 +2195,14 @@ #endif // IMPROVED_AMVD mbmi_ext_frame->mode_context, ec_ctx, mbmi, mbmi_ext_frame, w); else +#if CONFIG_SEP_COMP_DRL + { + assert(get_ref_mv_idx(mbmi, 0) == 0); + assert(get_ref_mv_idx(mbmi, 1) == 0); + } +#else assert(mbmi->ref_mv_idx == 0); +#endif // CONFIG_SEP_COMP_DRL #if CONFIG_FLEX_MVRES if (is_pb_mv_precision_active(cm, mbmi, bsize)) { write_pb_mv_precision(cm, xd, w); @@ -1971,65 +2210,19 @@ #endif // CONFIG_FLEX_MVRES } - if (have_newmv_in_each_reference(mode)) { - for (ref = 0; ref < 1 + is_compound; ++ref) { - nmv_context *nmvc = &ec_ctx->nmvc; - const int_mv ref_mv = get_ref_mv(x, ref); - - av1_encode_mv(cpi, w, -#if CONFIG_FLEX_MVRES - mbmi->mv[ref].as_mv, ref_mv.as_mv, -#else - &mbmi->mv[ref].as_mv, &ref_mv.as_mv, -#endif - nmvc, -#if CONFIG_FLEX_MVRES - pb_mv_precision); -#else - allow_hp); -#endif - } - } else if (mode == NEAR_NEWMV -#if CONFIG_OPTFLOW_REFINEMENT - || mode == NEAR_NEWMV_OPTFLOW -#endif // CONFIG_OPTFLOW_REFINEMENT -#if CONFIG_JOINT_MVD - || (is_joint_mvd_coding_mode(mode) && jmvd_base_ref_list == 1) -#endif // CONFIG_JOINT_MVD - ) { +#if CONFIG_CWG_D067_IMPROVED_WARP + if (mbmi->mode == WARPMV && mbmi->warpmv_with_mvd_flag) { nmv_context *nmvc = &ec_ctx->nmvc; - const int_mv ref_mv = get_ref_mv(x, 1); - - av1_encode_mv(cpi, w, -#if CONFIG_FLEX_MVRES - mbmi->mv[1].as_mv, ref_mv.as_mv, -#else - &mbmi->mv[1].as_mv, &ref_mv.as_mv, -#endif - nmvc, -#if CONFIG_FLEX_MVRES - pb_mv_precision); -#else - allow_hp); -#endif - - } else if (mode == NEW_NEARMV -#if CONFIG_OPTFLOW_REFINEMENT - || mode == NEW_NEARMV_OPTFLOW -#endif // CONFIG_OPTFLOW_REFINEMENT -#if CONFIG_JOINT_MVD - || (is_joint_mvd_coding_mode(mode) && jmvd_base_ref_list == 0) -#endif // CONFIG_JOINT_MVD - ) { - nmv_context *nmvc = &ec_ctx->nmvc; - const int_mv ref_mv = get_ref_mv(x, 0); - + WarpedMotionParams ref_warp_model = + x->mbmi_ext_frame->warp_param_stack[mbmi->warp_ref_idx].wm_params; + const int_mv ref_mv = + get_mv_from_wrl(xd, &ref_warp_model, mbmi->pb_mv_precision, bsize, + xd->mi_col, xd->mi_row); av1_encode_mv(cpi, w, #if CONFIG_FLEX_MVRES mbmi->mv[0].as_mv, ref_mv.as_mv, - #else - &mbmi->mv[0].as_mv, &ref_mv.as_mv, + &mbmi->mv[ref].as_mv, &ref_mv.as_mv, #endif nmvc, #if CONFIG_FLEX_MVRES @@ -2038,6 +2231,81 @@ allow_hp); #endif } + + else { +#endif // CONFIG_CWG_D067_IMPROVED_WARP + + if (have_newmv_in_each_reference(mode)) { + for (ref = 0; ref < 1 + is_compound; ++ref) { + nmv_context *nmvc = &ec_ctx->nmvc; + const int_mv ref_mv = get_ref_mv(x, ref); + + av1_encode_mv(cpi, w, +#if CONFIG_FLEX_MVRES + mbmi->mv[ref].as_mv, ref_mv.as_mv, +#else + &mbmi->mv[ref].as_mv, &ref_mv.as_mv, +#endif + nmvc, +#if CONFIG_FLEX_MVRES + pb_mv_precision); +#else + allow_hp); +#endif + } + } else if (mode == NEAR_NEWMV +#if CONFIG_OPTFLOW_REFINEMENT + || mode == NEAR_NEWMV_OPTFLOW +#endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_JOINT_MVD + || (is_joint_mvd_coding_mode(mode) && jmvd_base_ref_list == 1) +#endif // CONFIG_JOINT_MVD + ) { + nmv_context *nmvc = &ec_ctx->nmvc; + const int_mv ref_mv = get_ref_mv(x, 1); + + av1_encode_mv(cpi, w, +#if CONFIG_FLEX_MVRES + mbmi->mv[1].as_mv, ref_mv.as_mv, +#else + &mbmi->mv[1].as_mv, &ref_mv.as_mv, +#endif + nmvc, +#if CONFIG_FLEX_MVRES + pb_mv_precision); +#else + allow_hp); +#endif + + } else if (mode == NEW_NEARMV +#if CONFIG_OPTFLOW_REFINEMENT + || mode == NEW_NEARMV_OPTFLOW +#endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_JOINT_MVD + || (is_joint_mvd_coding_mode(mode) && jmvd_base_ref_list == 0) +#endif // CONFIG_JOINT_MVD + ) { + nmv_context *nmvc = &ec_ctx->nmvc; + const int_mv ref_mv = get_ref_mv(x, 0); + + av1_encode_mv(cpi, w, +#if CONFIG_FLEX_MVRES + mbmi->mv[0].as_mv, ref_mv.as_mv, + +#else + &mbmi->mv[0].as_mv, &ref_mv.as_mv, +#endif + nmvc, +#if CONFIG_FLEX_MVRES + pb_mv_precision); +#else + allow_hp); +#endif + } + +#if CONFIG_CWG_D067_IMPROVED_WARP + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP #if CONFIG_BAWP && !CONFIG_WARPMV if (cm->features.enable_bawp && av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col)) { @@ -2083,6 +2351,12 @@ if (mbmi->ref_frame[1] != INTRA_FRAME) write_motion_mode(cm, xd, mbmi, w); #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_REFINEMV + if (!mbmi->skip_mode) { + write_refinemv_flag(cm, xd, w, bsize); + } +#endif // CONFIG_REFINEMV + // First write idx to indicate current compound inter prediction mode // group Group A (0): dist_wtd_comp, compound_average Group B (1): // interintra, compound_diffwtd, wedge @@ -2091,6 +2365,9 @@ #if CONFIG_OPTFLOW_REFINEMENT && mbmi->mode < NEAR_NEARMV_OPTFLOW #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + && (!mbmi->refinemv_flag || !switchable_refinemv_flag(cm, mbmi)) +#endif // CONFIG_REFINEMV #if IMPROVED_AMVD && CONFIG_JOINT_MVD && !is_joint_amvd_coding_mode(mbmi->mode) #endif // IMPROVED_AMVD && CONFIG_JOINT_MVD @@ -2139,17 +2416,25 @@ } } } +#if CONFIG_CWP + if (cm->features.enable_cwp && is_cwp_allowed(mbmi) && !mbmi->skip_mode) + write_cwp_idx(xd, w, cm, mbmi); +#endif // CONFIG_CWP write_mb_interp_filter(cm, xd, w); } } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT static void write_intrabc_drl_idx(int max_ref_bv_num, FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame, aom_writer *w) { assert(!mbmi->skip_mode); +#if CONFIG_SEP_COMP_DRL + assert(mbmi->intrabc_drl_idx < mbmi_ext_frame->ref_mv_count[0]); +#else assert(mbmi->intrabc_drl_idx < mbmi_ext_frame->ref_mv_count); +#endif assert(mbmi->intrabc_drl_idx < max_ref_bv_num); (void)mbmi_ext_frame; @@ -2161,7 +2446,7 @@ ++bit_cnt; } } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT static AOM_INLINE void write_intrabc_info( MACROBLOCKD *xd, const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame, @@ -2170,12 +2455,14 @@ int use_intrabc = is_intrabc_block(mbmi, xd->tree_type); if (xd->tree_type == CHROMA_PART) assert(use_intrabc == 0); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; +#if !CONFIG_SKIP_TXFM_OPT #if CONFIG_NEW_CONTEXT_MODELING const int intrabc_ctx = get_intrabc_ctx(xd); aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf[intrabc_ctx], 2); #else aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2); #endif // CONFIG_NEW_CONTEXT_MODELING +#endif // !CONFIG_SKIP_TXFM_OPT if (use_intrabc) { assert(mbmi->mode == DC_PRED); @@ -2185,9 +2472,13 @@ assert(mbmi->pb_mv_precision == MV_PRECISION_ONE_PEL); #endif +#if CONFIG_SEP_COMP_DRL + int_mv dv_ref = mbmi_ext_frame->ref_mv_stack[0][0].this_mv; +#else int_mv dv_ref = mbmi_ext_frame->ref_mv_stack[0].this_mv; +#endif -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT aom_write_symbol(w, mbmi->intrabc_mode, ec_ctx->intrabc_mode_cdf, 2); write_intrabc_drl_idx(MAX_REF_BV_STACK_SIZE, ec_ctx, mbmi, mbmi_ext_frame, w); @@ -2196,7 +2487,7 @@ av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc); #else av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc); -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT } } @@ -2212,8 +2503,25 @@ if (seg->segid_preskip && seg->update_map) write_segment_id(cpi, mbmi, w, seg, segp, 0); - const int skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w); +#if CONFIG_SKIP_TXFM_OPT + if (av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) { + const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type); + if (xd->tree_type == CHROMA_PART) assert(use_intrabc == 0); +#if CONFIG_NEW_CONTEXT_MODELING + const int intrabc_ctx = get_intrabc_ctx(xd); + aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf[intrabc_ctx], 2); +#else + aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2); +#endif // CONFIG_NEW_CONTEXT_MODELING + } + int skip = 0; + if (is_intrabc_block(mbmi, xd->tree_type)) { + skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w); + } +#else + const int skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w); +#endif // CONFIG_SKIP_TXFM_OPT if (!seg->segid_preskip && seg->update_map) write_segment_id(cpi, mbmi, w, seg, segp, skip); @@ -2550,11 +2858,11 @@ av1_get_block_dimensions(mbmi->sb_type[plane], plane, xd, NULL, NULL, &rows, &cols); assert(*tok < tok_end); -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS pack_map_tokens(w, tok, palette_size_plane, cols, rows); #else pack_map_tokens(w, tok, palette_size_plane, rows * cols); -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS } } @@ -2597,7 +2905,8 @@ write_tokens_b(cpi, w, tok, tok_end); } #if CONFIG_PC_WIENER - else { + else if (!is_global_intrabc_allowed(cm) && !cm->features.coded_lossless) { + // Assert only when LR is enabled. assert(1 == av1_get_txk_skip(cm, xd->mi_row, xd->mi_col, 0, 0, 0)); } #endif // CONFIG_PC_WIENER @@ -2614,36 +2923,28 @@ const PARTITION_TREE *ptree_luma, #endif // CONFIG_EXT_RECUR_PARTITIONS aom_writer *w) { - if (!is_partition_point(bsize)) { - return; - } - const int plane = xd->tree_type == CHROMA_PART; - if (bsize == BLOCK_8X8 && plane > 0) { - return; - } +#if !CONFIG_EXT_RECUR_PARTITIONS + if (!is_partition_point(bsize)) return; + if (bsize == BLOCK_8X8 && plane > 0) return; +#endif // !CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_EXT_RECUR_PARTITIONS const int ssx = cm->seq_params.subsampling_x; const int ssy = cm->seq_params.subsampling_y; - if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) { - assert(p == - sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ssx, ssy)); - return; - } - - PARTITION_TYPE implied_partition; - const bool is_part_implied = is_partition_implied_at_boundary( - &cm->mi_params, xd->tree_type, ssx, ssy, mi_row, mi_col, bsize, - &ptree->chroma_ref_info, &implied_partition); - if (is_part_implied) { - assert(p == implied_partition); + const PARTITION_TYPE derived_partition = + av1_get_normative_forced_partition_type( + &cm->mi_params, xd->tree_type, ssx, ssy, mi_row, mi_col, bsize, + ptree_luma, &ptree->chroma_ref_info); + if (derived_partition != PARTITION_INVALID) { + assert(p == derived_partition); return; } #endif // CONFIG_EXT_RECUR_PARTITIONS const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; + #if CONFIG_EXT_RECUR_PARTITIONS const bool do_split = p != PARTITION_NONE; aom_write_symbol(w, do_split, ec_ctx->do_split_cdf[plane][ctx], 2); @@ -2658,20 +2959,42 @@ ec_ctx->do_square_split_cdf[plane][square_split_ctx], 2); } if (do_square_split) { + assert(p == PARTITION_SPLIT); return; } RECT_PART_TYPE rect_type = get_rect_part_type(p); if (rect_type_implied_by_bsize(bsize, xd->tree_type) == RECT_INVALID) { - aom_write_symbol(w, rect_type, ec_ctx->rect_type_cdf[plane][ctx], 2); + aom_write_symbol(w, rect_type, ec_ctx->rect_type_cdf[plane][ctx], + NUM_RECT_PARTS); } - const bool disable_ext_part = !cm->seq_params.enable_ext_partitions; const bool ext_partition_allowed = - !disable_ext_part && + cm->seq_params.enable_ext_partitions && is_ext_partition_allowed(bsize, rect_type, xd->tree_type); if (ext_partition_allowed) { const bool do_ext_partition = (p >= PARTITION_HORZ_3); aom_write_symbol(w, do_ext_partition, ec_ctx->do_ext_partition_cdf[plane][rect_type][ctx], 2); +#if CONFIG_UNEVEN_4WAY + if (do_ext_partition) { + const bool uneven_4way_partition_allowed = + is_uneven_4way_partition_allowed(bsize, rect_type, xd->tree_type); + if (uneven_4way_partition_allowed) { + const bool do_uneven_4way_partition = (p >= PARTITION_HORZ_4A); + aom_write_symbol( + w, do_uneven_4way_partition, + ec_ctx->do_uneven_4way_partition_cdf[plane][rect_type][ctx], 2); + if (do_uneven_4way_partition) { + const UNEVEN_4WAY_PART_TYPE uneven_4way_type = + (p == PARTITION_HORZ_4A || p == PARTITION_VERT_4A) ? UNEVEN_4A + : UNEVEN_4B; + aom_write_symbol( + w, uneven_4way_type, + ec_ctx->uneven_4way_partition_type_cdf[plane][rect_type][ctx], + NUM_UNEVEN_4WAY_PARTS); + } + } + } +#endif // CONFIG_UNEVEN_4WAY } #else // CONFIG_EXT_RECUR_PARTITIONS const int hbs_w = mi_size_wide[bsize] / 2; @@ -2728,13 +3051,18 @@ assert(bsize < BLOCK_SIZES_ALL); const int hbs_w = mi_size_wide[bsize] / 2; const int hbs_h = mi_size_high[bsize] / 2; -#if !CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + const int ebs_w = mi_size_wide[bsize] / 8; + const int ebs_h = mi_size_high[bsize] / 8; +#endif // CONFIG_UNEVEN_4WAY +#if !CONFIG_EXT_RECUR_PARTITIONS const int qbs_w = mi_size_wide[bsize] / 4; const int qbs_h = mi_size_high[bsize] / 4; -#endif // !CONFIG_H_PARTITION +#endif // !CONFIG_EXT_RECUR_PARTITIONS assert(ptree); const PARTITION_TYPE partition = ptree->partition; const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); + if (subsize == BLOCK_INVALID) return; if (mi_row >= mi_params->mi_rows || mi_col >= mi_params->mi_cols) return; @@ -2743,7 +3071,12 @@ get_partition_plane_end(xd->tree_type, av1_num_planes(cm)); for (int plane = plane_start; plane < plane_end; ++plane) { int rcol0, rcol1, rrow0, rrow1; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if ((cm->rst_info[plane].frame_restoration_type != RESTORE_NONE || + cm->rst_info[plane].frame_cross_restoration_type != RESTORE_NONE) && +#else if (cm->rst_info[plane].frame_restoration_type != RESTORE_NONE && +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize, &rcol0, &rcol1, &rrow0, &rrow1)) { const int rstride = cm->rst_info[plane].horz_units_per_tile; @@ -2762,9 +3095,12 @@ #if CONFIG_EXT_RECUR_PARTITIONS write_partition(cm, xd, mi_row, mi_col, partition, bsize, ptree, ptree_luma, w); - if (!is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) { + const int track_ptree_luma = + is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize); + if (!track_ptree_luma) { ptree_luma = NULL; } + assert(IMPLIES(track_ptree_luma, ptree_luma)); #else write_partition(cm, xd, mi_row, mi_col, partition, bsize, w); #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -2805,7 +3141,96 @@ #endif break; #if CONFIG_EXT_RECUR_PARTITIONS -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], + get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col, + subsize); + if (mi_row + ebs_h >= mi_params->mi_rows) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], + get_partition_subtree_const(ptree_luma, 1), mi_row + ebs_h, + mi_col, bsize_med); + if (mi_row + 3 * ebs_h >= mi_params->mi_rows) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2], + get_partition_subtree_const(ptree_luma, 2), + mi_row + 3 * ebs_h, mi_col, bsize_big); + if (mi_row + 7 * ebs_h >= mi_params->mi_rows) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[3], + get_partition_subtree_const(ptree_luma, 3), + mi_row + 7 * ebs_h, mi_col, subsize); + break; + } + case PARTITION_HORZ_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], + get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col, + subsize); + if (mi_row + ebs_h >= mi_params->mi_rows) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], + get_partition_subtree_const(ptree_luma, 1), mi_row + ebs_h, + mi_col, bsize_big); + if (mi_row + 5 * ebs_h >= mi_params->mi_rows) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2], + get_partition_subtree_const(ptree_luma, 2), + mi_row + 5 * ebs_h, mi_col, bsize_med); + if (mi_row + 7 * ebs_h >= mi_params->mi_rows) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[3], + get_partition_subtree_const(ptree_luma, 3), + mi_row + 7 * ebs_h, mi_col, subsize); + break; + } + case PARTITION_VERT_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], + get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col, + subsize); + if (mi_col + ebs_w >= mi_params->mi_cols) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], + get_partition_subtree_const(ptree_luma, 1), mi_row, + mi_col + ebs_w, bsize_med); + if (mi_col + 3 * ebs_w >= mi_params->mi_cols) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2], + get_partition_subtree_const(ptree_luma, 2), mi_row, + mi_col + 3 * ebs_w, bsize_big); + if (mi_col + 7 * ebs_w >= mi_params->mi_cols) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[3], + get_partition_subtree_const(ptree_luma, 3), mi_row, + mi_col + 7 * ebs_w, subsize); + break; + } + case PARTITION_VERT_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], + get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col, + subsize); + if (mi_col + ebs_w >= mi_params->mi_cols) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], + get_partition_subtree_const(ptree_luma, 1), mi_row, + mi_col + ebs_w, bsize_big); + if (mi_col + 5 * ebs_w >= mi_params->mi_cols) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2], + get_partition_subtree_const(ptree_luma, 2), mi_row, + mi_col + 5 * ebs_w, bsize_med); + if (mi_col + 7 * ebs_w >= mi_params->mi_cols) break; + write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[3], + get_partition_subtree_const(ptree_luma, 3), mi_row, + mi_col + 7 * ebs_w, subsize); + break; + } +#endif // CONFIG_UNEVEN_4WAY case PARTITION_HORZ_3: case PARTITION_VERT_3: for (int i = 0; i < 4; ++i) { @@ -2829,36 +3254,6 @@ this_mi_col, this_bsize); } break; -#else - case PARTITION_HORZ_3: - write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], - get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col, - subsize); - if (mi_row + qbs_h >= mi_params->mi_rows) break; - write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], - get_partition_subtree_const(ptree_luma, 1), mi_row, - mi_row + qbs_h, mi_col, - get_partition_subsize(bsize, PARTITION_HORZ)); - if (mi_row + 3 * qbs_h >= mi_params->mi_rows) break; - write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2], - get_partition_subtree_const(ptree_luma, 2), mi_row, - mi_row + 3 * qbs_h, mi_col, subsize); - break; - case PARTITION_VERT_3: - write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], - get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col, - subsize); - if (mi_col + qbs_w >= mi_params->mi_cols) break; - write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[1], - get_partition_subtree_const(ptree_luma, 1), mi_row, - mi_col + qbs_w, - get_partition_subsize(bsize, PARTITION_VERT)); - if (mi_col + 3 * qbs_w >= mi_params->mi_cols) break; - write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[2], - get_partition_subtree_const(ptree_luma, 2), mi_row, - mi_col + 3 * qbs_w, subsize); - break; -#endif // CONFIG_H_PARTITION case PARTITION_SPLIT: write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], get_partition_subtree_const(ptree_luma, 0), mi_row, mi_col, @@ -2873,7 +3268,7 @@ get_partition_subtree_const(ptree_luma, 3), mi_row + hbs_h, mi_col + hbs_w, subsize); break; -#else +#else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_SPLIT: write_modes_sb(cpi, tile, w, tok, tok_end, ptree->sub_tree[0], mi_row, mi_col, subsize); @@ -3024,11 +3419,25 @@ if (!cm->seq_params.enable_restoration) return; if (is_global_intrabc_allowed(cm)) return; const int num_planes = av1_num_planes(cm); +#if CONFIG_FLEXIBLE_RU_SIZE + int luma_none = 1, chroma_none = 1; +#else int all_none = 1, chroma_none = 1; +#endif // CONFIG_FLEXIBLE_RU_SIZE for (int p = 0; p < num_planes; ++p) { RestorationInfo *rsi = &cm->rst_info[p]; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (rsi->frame_restoration_type != RESTORE_NONE || + rsi->frame_cross_restoration_type != RESTORE_NONE) { + if (p == 0) assert(rsi->frame_cross_restoration_type == RESTORE_NONE); +#else if (rsi->frame_restoration_type != RESTORE_NONE) { +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +#if CONFIG_FLEXIBLE_RU_SIZE + luma_none &= p > 0; +#else all_none = 0; +#endif // CONFIG_FLEXIBLE_RU_SIZE chroma_none &= p == 0; } #if CONFIG_LR_FLEX_SYNTAX @@ -3046,11 +3455,17 @@ aom_wb_write_bit(wb, 0); } else { aom_wb_write_bit(wb, 1); + int tools_count = cm->features.lr_tools_count[p]; for (int i = 1; i < RESTORE_SWITCHABLE_TYPES; ++i) { if (!(plane_lr_tools_disable_mask & (1 << i))) { - aom_wb_write_bit(wb, ((sw_lr_tools_disable_mask >> i) & 1)); + const int disable_tool = (sw_lr_tools_disable_mask >> i) & 1; + aom_wb_write_bit(wb, disable_tool); plane_lr_tools_disable_mask |= (sw_lr_tools_disable_mask & (1 << i)); + tools_count -= disable_tool; + // if tools_count becomes 2 break from the loop since we + // do not allow any other tool to be disabled. + if (tools_count == 2) break; } } av1_set_lr_tools(plane_lr_tools_disable_mask, p, &cm->features); @@ -3107,7 +3522,29 @@ ? NUM_WIENERNS_CLASS_INIT_LUMA : NUM_WIENERNS_CLASS_INIT_CHROMA)); #endif // CONFIG_WIENER_NONSEP + +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (p > 0) { + aom_wb_write_bit(wb, rsi->frame_cross_restoration_type != RESTORE_NONE); + } +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } +#if CONFIG_FLEXIBLE_RU_SIZE + int size = cm->rst_info[0].max_restoration_unit_size; + if (!luma_none) { + aom_wb_write_bit(wb, cm->rst_info[0].restoration_unit_size == size >> 1); + if (cm->rst_info[0].restoration_unit_size != size >> 1) + aom_wb_write_bit(wb, cm->rst_info[0].restoration_unit_size == size); + } + if (!chroma_none) { + size = cm->rst_info[1].max_restoration_unit_size; + aom_wb_write_bit(wb, cm->rst_info[1].restoration_unit_size == size >> 1); + if (cm->rst_info[1].restoration_unit_size != size >> 1) + aom_wb_write_bit(wb, cm->rst_info[1].restoration_unit_size == size); + assert(cm->rst_info[2].restoration_unit_size == + cm->rst_info[1].restoration_unit_size); + } +#else if (!all_none) { #if CONFIG_BLOCK_256 assert(cm->sb_size == BLOCK_64X64 || cm->sb_size == BLOCK_128X128 || @@ -3161,6 +3598,7 @@ cm->rst_info[1].restoration_unit_size); } } +#endif // CONFIG_FLEXIBLE_RU_SIZE } static AOM_INLINE void write_wiener_filter(MACROBLOCKD *xd, int wiener_win, @@ -3313,8 +3751,9 @@ aom_write_literal(wb, match, 1); if (match) break; } - assert( - IMPLIES(!match, ref == bank->bank_size_for_class[wiener_class_id] - 1)); + assert(IMPLIES( + !match, + ref == AOMMAX(0, bank->bank_size_for_class[wiener_class_id] - 1))); return exact_match; } #endif // CONFIG_LR_MERGE_COEFFS @@ -3323,7 +3762,12 @@ MACROBLOCKD *xd, int plane, const WienerNonsepInfo *wienerns_info, WienerNonsepInfoBank *bank, aom_writer *wb) { const WienernsFilterParameters *nsfilter_params = +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + get_wienerns_parameters(xd->current_base_qindex, plane != AOM_PLANE_Y, + wienerns_info->is_cross_filter); +#else get_wienerns_parameters(xd->current_base_qindex, plane != AOM_PLANE_Y); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER int skip_filter_write_for_class[WIENERNS_MAX_CLASSES] = { 0 }; int ref_for_class[WIENERNS_MAX_CLASSES] = { 0 }; #if CONFIG_LR_MERGE_COEFFS @@ -3344,7 +3788,6 @@ for (int c_id = 0; c_id < num_classes; ++c_id) { if (skip_filter_write_for_class[c_id]) continue; const int ref = ref_for_class[c_id]; - const WienerNonsepInfo *ref_wienerns_info = av1_constref_from_wienerns_bank(bank, ref, c_id); const int16_t *wienerns_info_nsfilter = @@ -3437,13 +3880,25 @@ aom_writer *const w, int plane, FRAME_COUNTS *counts) { const RestorationInfo *rsi = cm->rst_info + plane; RestorationType frame_rtype = rsi->frame_restoration_type; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + RestorationType frame_cross_rtype = rsi->frame_cross_restoration_type; + RestorationType unit_cross_rtype = rui->cross_restoration_type; + assert(frame_rtype != RESTORE_NONE || frame_cross_rtype != RESTORE_NONE); +#else assert(frame_rtype != RESTORE_NONE); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER (void)counts; assert(!cm->features.all_lossless); const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN; RestorationType unit_rtype = rui->restoration_type; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + WienerNonsepInfo *info = (WienerNonsepInfo *)&rui->wienerns_info; + info->is_cross_filter = 0; + info = (WienerNonsepInfo *)&rui->wienerns_cross_info; + info->is_cross_filter = 1; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #if CONFIG_LR_FLEX_SYNTAX assert(((cm->features.lr_tools_disable_mask[plane] >> rui->restoration_type) & 1) == 0); @@ -3533,6 +3988,19 @@ } #endif // CONFIG_PC_WIENER } +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (frame_cross_rtype == RESTORE_WIENER_NONSEP) { + aom_write_symbol(w, unit_cross_rtype != RESTORE_NONE, + xd->tile_ctx->wienerns_restore_cdf, 2); +#if CONFIG_ENTROPY_STATS + ++counts->wienerns_restore[unit_cross_rtype != RESTORE_NONE]; +#endif // CONFIG_ENTROPY_STATS + if (unit_cross_rtype != RESTORE_NONE) { + write_wienerns_filter(xd, plane, &rui->wienerns_cross_info, + &xd->wienerns_cross_info[plane], w); + } + } +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } static AOM_INLINE void encode_loopfilter(AV1_COMMON *cm, @@ -4378,7 +4846,7 @@ aom_wb_write_bit(wb, uv_neq_y); if (uv_neq_y) { for (int i = 1; i < RESTORE_SWITCHABLE_TYPES; ++i) { - if (DEF_UV_LR_TOOLS_DISABLE_MASK | (1 << i)) continue; + if (DEF_UV_LR_TOOLS_DISABLE_MASK & (1 << i)) continue; aom_wb_write_bit(wb, (seq_params->lr_tools_disable_mask[1] >> i) & 1); } } @@ -4392,6 +4860,10 @@ aom_wb_write_bit(wb, seq_params->enable_refmvbank); #endif // CONFIG_REF_MV_BANK aom_wb_write_bit(wb, seq_params->explicit_ref_frame_map); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // 0 : show_existing_frame, 1: implicit derviation + aom_wb_write_bit(wb, seq_params->enable_frame_output_order); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT // A bit is sent here to indicate if the max number of references is 7. If // this bit is 0, then two more bits are sent to indicate the exact number // of references allowed (range: 3 to 6). @@ -4416,6 +4888,12 @@ #if CONFIG_BAWP aom_wb_write_bit(wb, seq_params->enable_bawp); #endif // CONFIG_BAWP +#if CONFIG_CWP + aom_wb_write_bit(wb, seq_params->enable_cwp); +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + aom_wb_write_bit(wb, seq_params->enable_imp_msk_bld); +#endif // CONFIG_D071_IMP_MSK_BLD aom_wb_write_bit(wb, seq_params->enable_fsc); #if CONFIG_CCSO aom_wb_write_bit(wb, seq_params->enable_ccso); @@ -4426,6 +4904,9 @@ #if CONFIG_ORIP aom_wb_write_bit(wb, seq_params->enable_orip); #endif +#if CONFIG_IDIF + aom_wb_write_bit(wb, seq_params->enable_idif); +#endif // CONFIG_IDIF #if CONFIG_OPTFLOW_REFINEMENT if (seq_params->order_hint_info.enable_order_hint) aom_wb_write_literal(wb, seq_params->enable_opfl_refine, 2); @@ -4435,6 +4916,10 @@ aom_wb_write_bit(wb, seq_params->enable_adaptive_mvd); #endif // CONFIG_ADAPTIVE_MVD +#if CONFIG_REFINEMV + aom_wb_write_bit(wb, seq_params->enable_refinemv); +#endif // CONFIG_REFINEMV + #if CONFIG_FLEX_MVRES aom_wb_write_bit(wb, seq_params->enable_flex_mvres); #endif // CONFIG_FLEX_MVRES @@ -4449,22 +4934,41 @@ #if CONFIG_EXT_RECUR_PARTITIONS aom_wb_write_bit(wb, seq_params->enable_ext_partitions); #endif // CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_IMPROVED_GLOBAL_MOTION + if (seq_params->reduced_still_picture_hdr) { + assert(seq_params->enable_global_motion == 0); + } else { + aom_wb_write_bit(wb, seq_params->enable_global_motion); + } +#endif // CONFIG_IMPROVED_GLOBAL_MOTION } static AOM_INLINE void write_global_motion_params( const WarpedMotionParams *params, const WarpedMotionParams *ref_params, #if !CONFIG_FLEX_MVRES struct aom_write_bit_buffer *wb, int allow_hp) { +#if CONFIG_IMPROVED_GLOBAL_MOTION + (void)allow_hp; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION #else struct aom_write_bit_buffer *wb, MvSubpelPrecision precision) { const int precision_loss = get_gm_precision_loss(precision); -#endif +#if CONFIG_IMPROVED_GLOBAL_MOTION + (void)precision_loss; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION +#endif // !CONFIG_FLEX_MVRES const TransformationType type = params->wmtype; aom_wb_write_bit(wb, type != IDENTITY); if (type != IDENTITY) { aom_wb_write_bit(wb, type == ROTZOOM); - if (type != ROTZOOM) aom_wb_write_bit(wb, type == TRANSLATION); + if (type != ROTZOOM) { +#if CONFIG_IMPROVED_GLOBAL_MOTION + assert(type == AFFINE); +#else + aom_wb_write_bit(wb, type == TRANSLATION); +#endif // !CONFIG_IMPROVED_GLOBAL_MOTION + } } if (type >= ROTZOOM) { @@ -4492,6 +4996,10 @@ } if (type >= TRANSLATION) { +#if CONFIG_IMPROVED_GLOBAL_MOTION + const int trans_prec_diff = GM_TRANS_PREC_DIFF; + const int trans_max = GM_TRANS_MAX; +#else #if CONFIG_FLEX_MVRES const int trans_bits = (type == TRANSLATION) ? GM_ABS_TRANS_ONLY_BITS - precision_loss @@ -4506,14 +5014,16 @@ const int trans_prec_diff = (type == TRANSLATION) ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp : GM_TRANS_PREC_DIFF; -#endif +#endif // CONFIG_FLEX_MVRES + const int trans_max = (1 << trans_bits); +#endif // CONFIG_IMPROVED_GLOBAL_MOTION aom_wb_write_signed_primitive_refsubexpfin( - wb, (1 << trans_bits) + 1, SUBEXPFIN_K, + wb, trans_max + 1, SUBEXPFIN_K, (ref_params->wmmat[0] >> trans_prec_diff), (params->wmmat[0] >> trans_prec_diff)); aom_wb_write_signed_primitive_refsubexpfin( - wb, (1 << trans_bits) + 1, SUBEXPFIN_K, + wb, trans_max + 1, SUBEXPFIN_K, (ref_params->wmmat[1] >> trans_prec_diff), (params->wmmat[1] >> trans_prec_diff)); } @@ -4522,11 +5032,88 @@ static AOM_INLINE void write_global_motion(AV1_COMP *cpi, struct aom_write_bit_buffer *wb) { AV1_COMMON *const cm = &cpi->common; + int num_total_refs = cm->ref_frames_info.num_total_refs; +#if CONFIG_IMPROVED_GLOBAL_MOTION + assert(cm->cur_frame->num_ref_frames == num_total_refs); +#endif // CONFIG_IMPROVED_GLOBAL_MOTION int frame; - for (frame = 0; frame < cm->ref_frames_info.num_total_refs; ++frame) { + +#if CONFIG_IMPROVED_GLOBAL_MOTION + const SequenceHeader *const seq_params = &cm->seq_params; + if (!seq_params->enable_global_motion) { + return; + } + + bool use_global_motion = false; + for (frame = 0; frame < num_total_refs; ++frame) { + if (cm->global_motion[frame].wmtype != IDENTITY) { + use_global_motion = true; + break; + } + } + + aom_wb_write_bit(wb, use_global_motion); + if (!use_global_motion) { + return; + } + + int our_ref = cpi->gm_info.base_model_our_ref; + int their_ref = cpi->gm_info.base_model_their_ref; + aom_wb_write_primitive_quniform(wb, num_total_refs + 1, our_ref); + if (our_ref >= num_total_refs) { + // Special case: Use IDENTITY model + // Nothing more to code + assert(their_ref == -1); + } else { + RefCntBuffer *buf = get_ref_frame_buf(cm, our_ref); + assert(buf); + int their_num_refs = buf->num_ref_frames; + if (their_num_refs == 0) { + // Special case: if an intra/key frame is used as a ref, use an + // IDENTITY model + // Nothing more to code + assert(their_ref == -1); + } else { + aom_wb_write_primitive_quniform(wb, their_num_refs, their_ref); + } + } +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + + for (frame = 0; frame < num_total_refs; ++frame) { +#if CONFIG_IMPROVED_GLOBAL_MOTION + int temporal_distance; + if (seq_params->order_hint_info.enable_order_hint) { + const RefCntBuffer *const ref_buf = get_ref_frame_buf(cm, frame); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref_order_hint = ref_buf->display_order_hint; + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else + const int ref_order_hint = ref_buf->order_hint; + const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + temporal_distance = get_relative_dist(&seq_params->order_hint_info, + cur_order_hint, ref_order_hint); + } else { + temporal_distance = 1; + } + + if (temporal_distance == 0) { + // Don't code global motion for frames at the same temporal instant + assert(cm->global_motion[frame].wmtype == IDENTITY); + continue; + } + + WarpedMotionParams ref_params_; + av1_scale_warp_model(&cm->base_global_motion_model, + cm->base_global_motion_distance, &ref_params_, + temporal_distance); + WarpedMotionParams *ref_params = &ref_params_; +#else const WarpedMotionParams *ref_params = cm->prev_frame ? &cm->prev_frame->global_motion[frame] : &default_warp_params; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + write_global_motion_params(&cm->global_motion[frame], ref_params, wb, #if !CONFIG_FLEX_MVRES cm->features.allow_high_precision_mv); @@ -4610,11 +5197,7 @@ } } aom_wb_write_bit(wb, features->disable_cdf_update); -#if DS_FRAME_LEVEL - if (current_frame->frame_type == KEY_FRAME) { - aom_wb_write_literal(wb, features->ds_filter_type, 2); - } -#endif // DS_FRAME_LEVEL + if (seq_params->force_screen_content_tools == 2) { aom_wb_write_bit(wb, features->allow_screen_content_tools); } else { @@ -4734,11 +5317,11 @@ if (features->allow_global_intrabc) { aom_wb_write_bit(wb, features->allow_local_intrabc); } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT aom_wb_write_primitive_quniform( wb, MAX_MAX_DRL_BITS - MIN_MAX_DRL_BITS + 1, features->max_drl_bits - MIN_MAX_DRL_BITS); -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT } #endif // CONFIG_IBC_SR_EXT } else { @@ -4753,11 +5336,11 @@ if (features->allow_global_intrabc) { aom_wb_write_bit(wb, features->allow_local_intrabc); } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT aom_wb_write_primitive_quniform( wb, MAX_MAX_DRL_BITS - MIN_MAX_DRL_BITS + 1, features->max_drl_bits - MIN_MAX_DRL_BITS); -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT } #endif // CONFIG_IBC_SR_EXT } else if (current_frame->frame_type == INTER_FRAME || @@ -4997,6 +5580,15 @@ aom_wb_write_bit(wb, features->enable_bawp); #endif // CONFIG_BAWP +#if CONFIG_CWG_D067_IMPROVED_WARP + if (!frame_is_intra_only(cm) && + (features->enabled_motion_modes & (1 << WARP_DELTA)) != 0) { + aom_wb_write_bit(wb, features->allow_warpmv_mode); + } else { + assert(IMPLIES(!frame_is_intra_only(cm), !features->allow_warpmv_mode)); + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + aom_wb_write_bit(wb, features->reduced_tx_set_used); if (!frame_is_intra_only(cm)) write_global_motion(cpi, wb); @@ -5881,7 +6473,16 @@ } const int write_frame_header = +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + (cpi->num_tg > 1 || + (encode_show_existing_frame(cm) && + (!cm->seq_params.order_hint_info.enable_order_hint || + !cm->seq_params.enable_frame_output_order)) || + (encode_show_existing_frame(cm) && + cm->cur_frame->frame_type == KEY_FRAME) +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT (cpi->num_tg > 1 || encode_show_existing_frame(cm) +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT #if CONFIG_TIP || (cm->features.tip_frame_mode == TIP_FRAME_AS_OUTPUT) #endif // CONFIG_TIP @@ -5907,7 +6508,19 @@ data += fh_info.total_length; } +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // When enable_frame_output_order == 1, the OBU packet of show_existing_frame + // is not signaled for non-error-resilient mode. + // For error-resilienet mode, still an OBU is signaled. + if ((cm->seq_params.order_hint_info.enable_order_hint && + cm->seq_params.enable_frame_output_order && cm->show_existing_frame && + !cm->features.error_resilient_mode) || + ((!cm->seq_params.order_hint_info.enable_order_hint || + !cm->seq_params.enable_frame_output_order) && + encode_show_existing_frame(cm)) +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT if (encode_show_existing_frame(cm) +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT #if CONFIG_TIP || (cm->features.tip_frame_mode == TIP_FRAME_AS_OUTPUT) #endif // CONFIG_TIP @@ -5916,7 +6529,7 @@ } else { // Since length_field is determined adaptively after frame header // encoding, saved_wb must be adjusted accordingly. - saved_wb.bit_buffer += length_field; + if (saved_wb.bit_buffer) saved_wb.bit_buffer += length_field; // Each tile group obu will be preceded by 4-byte size of the tile group // obu
diff --git a/av1/encoder/bitstream.h b/av1/encoder/bitstream.h index 7257870..7148835 100644 --- a/av1/encoder/bitstream.h +++ b/av1/encoder/bitstream.h
@@ -49,7 +49,12 @@ aom_writer *w); void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, - TX_TYPE tx_type, TX_SIZE tx_size, aom_writer *w); + TX_TYPE tx_type, TX_SIZE tx_size, aom_writer *w +#if CONFIG_ATC_DCTX_ALIGNED + , + const int plane, const int eob, const int dc_skip +#endif // CONFIG_ATC_DCTX_ALIGNED +); #if CONFIG_CROSS_CHROMA_TX void av1_write_cctx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
diff --git a/av1/encoder/block.h b/av1/encoder/block.h index 3051f9d..ac9663e 100644 --- a/av1/encoder/block.h +++ b/av1/encoder/block.h
@@ -113,6 +113,10 @@ tran_low_t *coeff; //! Location of the end of qcoeff (end of block). uint16_t *eobs; +#if CONFIG_ATC_DCTX_ALIGNED + //! Location of the beginning of qcoeff (beginning of block). + uint16_t *bobs; +#endif // CONFIG_ATC_DCTX_ALIGNED //! Contexts used to code the transform coefficients. uint8_t *txb_entropy_ctx; //! A buffer containing the source frame. @@ -154,7 +158,7 @@ //! Cost to skip txfm for the current AOM_PLANE_V txfm block. int v_txb_skip_cost[V_TXB_SKIP_CONTEXTS][2]; #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC //! Cost for encoding the base_eob level of a low-frequency coefficient int base_lf_eob_cost[SIG_COEF_CONTEXTS_EOB][LF_BASE_SYMBOLS - 1]; //! Cost for encoding the base level of a low-frequency coefficient @@ -162,7 +166,7 @@ //! Cost for encoding an increment to the low-frequency coefficient int lps_lf_cost[LF_LEVEL_CONTEXTS] [COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #if CONFIG_PAR_HIDING //! Cost for encoding the base level of a parity-hidden coefficient int base_ph_cost[COEFF_BASE_PH_CONTEXTS][4]; @@ -202,13 +206,24 @@ //! Cost for encoding an increment to the coefficient for IDTX blocks int lps_cost_skip[IDTX_LEVEL_CONTEXTS] [COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1]; +#if CONFIG_ATC_DCTX_ALIGNED + /*! \brief Cost for encoding the base_bob of a level for IDTX blocks. + * + * Decoder uses base_bob to derive the base_level as base_bob := base_bob+1. + */ + int base_bob_cost[SIG_COEF_CONTEXTS_BOB][3]; +#endif // CONFIG_ATC_DCTX_ALIGNED } LV_MAP_COEFF_COST; /*! \brief Costs for encoding the eob. */ typedef struct { //! eob_cost. +#if CONFIG_ATC_DCTX_ALIGNED + int eob_cost[EOB_MAX_SYMS]; +#else int eob_cost[2][11]; +#endif // CONFIG_ATC_DCTX_ALIGNED } LV_MAP_EOB_COST; /*! \brief Stores the transforms coefficients for the whole superblock. @@ -218,6 +233,10 @@ tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]; //! Where the transformed coefficients end. uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; +#if CONFIG_ATC_DCTX_ALIGNED + //! Where the transformed coefficients begin. + uint16_t bobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; +#endif // CONFIG_ATC_DCTX_ALIGNED /*! \brief Transform block entropy contexts. * * Each element is used as a bit field. @@ -241,7 +260,7 @@ //! Global mvs int_mv global_mvs[INTER_REFS_PER_FRAME]; //! skip_mvp_candidate_list is the MVP list for skip mode. -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT SKIP_MODE_MVP_LIST skip_mvp_candidate_list; #endif @@ -265,14 +284,23 @@ * memory. */ typedef struct { +#if CONFIG_SEP_COMP_DRL + //! \copydoc MB_MODE_INFO_EXT::ref_mv_stack + CANDIDATE_MV ref_mv_stack[2][USABLE_REF_MV_STACK_SIZE]; + //! \copydoc MB_MODE_INFO_EXT::weight + uint16_t weight[2][USABLE_REF_MV_STACK_SIZE]; + //! \copydoc MB_MODE_INFO_EXT::ref_mv_count + uint8_t ref_mv_count[2]; +#else //! \copydoc MB_MODE_INFO_EXT::ref_mv_stack CANDIDATE_MV ref_mv_stack[USABLE_REF_MV_STACK_SIZE]; //! \copydoc MB_MODE_INFO_EXT::weight uint16_t weight[USABLE_REF_MV_STACK_SIZE]; //! \copydoc MB_MODE_INFO_EXT::ref_mv_count uint8_t ref_mv_count; +#endif // CONFIG_SEP_COMP_DRL //! skip_mvp_candidate_list is the MVP list for skip mode. -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT SKIP_MODE_MVP_LIST skip_mvp_candidate_list; #endif // TODO(Ravi/Remya): Reduce the buffer size of global_mvs @@ -339,6 +367,10 @@ int rate; //! Location of the end of non-zero entries. uint16_t eob; +#if CONFIG_ATC_DCTX_ALIGNED + //! Location of the first of non-zero entries. + uint16_t bob; +#endif // CONFIG_ATC_DCTX_ALIGNED //! Transform type used on the current block. TX_TYPE tx_type; //! Unknown usage @@ -390,11 +422,19 @@ //! Current interpolation filter. InterpFilter interp_fltr; //! Refmv index in the drl. +#if CONFIG_SEP_COMP_DRL + int ref_mv_idx[2]; +#else int ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL //! Whether the predictors are GLOBALMV. int is_global[2]; //! Current parameters for interinter mode. INTERINTER_COMPOUND_DATA interinter_comp; +#if CONFIG_CWP + //! Index for compound weighted prediction parameters. + int cwp_idx; +#endif // CONFIG_CWP } COMP_RD_STATS; /*! \brief Contains buffers used to speed up rdopt for obmc. @@ -495,19 +535,37 @@ /*!\cond */ #if CONFIG_BLOCK_256 + #define BLOCK_256_COUNT 1 #define BLOCK_128_COUNT 3 #define BLOCK_64_COUNT 7 + +#if CONFIG_UNEVEN_4WAY +#define BLOCK_32_COUNT 31 +#define BLOCK_16_COUNT 63 +#define BLOCK_8_COUNT 64 +#else #define BLOCK_32_COUNT 15 #define BLOCK_16_COUNT 31 -#define BLOCK_8_COUNT 127 -#define BLOCK_4_COUNT 128 +#define BLOCK_8_COUNT 63 +#endif // CONFIG_UNEVEN_4WAY + +#define BLOCK_4_COUNT 64 + #else #define BLOCK_128_COUNT 1 #define BLOCK_64_COUNT 3 + +#if CONFIG_UNEVEN_4WAY +#define BLOCK_32_COUNT 15 +#define BLOCK_16_COUNT 31 +#define BLOCK_8_COUNT 32 +#else #define BLOCK_32_COUNT 7 #define BLOCK_16_COUNT 15 #define BLOCK_8_COUNT 31 +#endif // CONFIG_UNEVEN_4WAY + #define BLOCK_4_COUNT 32 #endif // CONFIG_BLOCK_256 @@ -768,6 +826,15 @@ /*! Cost for sending do_ext_partition token. */ int do_ext_partition_cost[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS] [PARTITION_CONTEXTS][2]; +#if CONFIG_UNEVEN_4WAY + /*! Cost for sending do_uneven_4way_partition token. */ + int do_uneven_4way_partition_cost[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS] + [PARTITION_CONTEXTS][2]; + /*! Cost for sending uneven_4way_partition_type token. */ + int uneven_4way_partition_type_cost[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS] + [PARTITION_CONTEXTS] + [NUM_UNEVEN_4WAY_PARTS]; +#endif // CONFIG_UNEVEN_4WAY //! Cost for coding the partition. int partition_cost[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS] [ALL_PARTITION_TYPES]; @@ -799,7 +866,11 @@ [2 * MAX_ANGLE_DELTA + 1]; //! mrl_index_cost +#if CONFIG_EXT_DIR + int mrl_index_cost[MRL_INDEX_CONTEXTS][MRL_LINE_NUMBER]; +#else int mrl_index_cost[MRL_LINE_NUMBER]; +#endif // CONFIG_EXT_DIR //! Cost of signaling the forward skip coding mode int fsc_cost[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS][FSC_MODES]; #if CONFIG_IMPROVED_CFL @@ -834,12 +905,12 @@ #else int intrabc_cost[2]; #endif // CONFIG_NEW_CONTEXT_MODELING -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT //! intrabc_mode_cost int intrabc_mode_cost[2]; //! intrabc_drl_idx_cost int intrabc_drl_idx_cost[MAX_REF_BV_STACK_SIZE - 1][2]; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT //! palette_y_size_cost int palette_y_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES]; @@ -855,12 +926,12 @@ int palette_y_mode_cost[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2]; //! palette_uv_mode_cost int palette_uv_mode_cost[PALETTE_UV_MODE_CONTEXTS][2]; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS //! palette_y_row_flag_cost int palette_y_row_flag_cost[PALETTE_ROW_FLAG_CONTEXTS][2]; //! palette_uv_row_flag_cost int palette_uv_row_flag_cost[PALETTE_ROW_FLAG_CONTEXTS][2]; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS /**@}*/ /***************************************************************************** @@ -887,10 +958,10 @@ int pb_block_mv_precision_costs[MV_PREC_DOWN_CONTEXTS][FLEX_MV_COSTS_SIZE] [NUM_MV_PRECISIONS]; #endif -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT //! skip_drl_mode_cost int skip_drl_mode_cost[3][2]; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT /**@}*/ /***************************************************************************** @@ -924,11 +995,11 @@ ****************************************************************************/ /**@{*/ //! intra_inter_cost -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT int intra_inter_cost[INTRA_INTER_SKIP_TXFM_CONTEXTS][INTRA_INTER_CONTEXTS][2]; #else int intra_inter_cost[INTRA_INTER_CONTEXTS][2]; -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT //! inter_compound_mode_cost #if CONFIG_OPTFLOW_REFINEMENT /*! use_optflow_cost */ @@ -941,6 +1012,10 @@ int inter_compound_mode_cost[INTER_COMPOUND_MODE_CONTEXTS] [INTER_COMPOUND_MODES]; #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_CWP + //! cwp_idx_cost for compound weighted prediction + int cwp_idx_cost[MAX_CWP_CONTEXTS][MAX_CWP_NUM - 1][2]; +#endif // CONFIG_CWP #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD //! jmvd_scale_mode_cost for JOINT_NEWMV int jmvd_scale_mode_cost[JOINT_NEWMV_SCALE_FACTOR_CNT]; @@ -997,11 +1072,20 @@ int warped_causal_warpmv_cost[BLOCK_SIZES_ALL][2]; #endif // CONFIG_WARPMV +#if CONFIG_REFINEMV + //! refinemv_flag_cost + int refinemv_flag_cost[NUM_REFINEMV_CTX][REFINEMV_NUM_MODES]; +#endif // CONFIG_REFINEMV + //! warp_delta_param_cost int warp_delta_param_cost[2][WARP_DELTA_NUM_SYMBOLS]; #if CONFIG_WARP_REF_LIST //! warp_ref_idx_cost int warp_ref_idx_cost[3][WARP_REF_CONTEXTS][2]; +#if CONFIG_CWG_D067_IMPROVED_WARP + //! warpmv_with_mvd_flag_cost + int warpmv_with_mvd_flag_cost[BLOCK_SIZES_ALL][2]; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST //! warp_extend_cost int warp_extend_cost[WARP_EXTEND_CTXS1][WARP_EXTEND_CTXS2][2]; @@ -1045,7 +1129,12 @@ int txfm_partition_cost[TXFM_PARTITION_CONTEXTS][2]; #endif // CONFIG_NEW_TX_PARTITION //! inter_tx_type_costs +#if CONFIG_ATC_DCTX_ALIGNED + int inter_tx_type_costs[EXT_TX_SETS_INTER][EOB_TX_CTXS][EXT_TX_SIZES] + [TX_TYPES]; +#else int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; +#endif // CONFIG_ATC_DCTX_ALIGNED //! intra_tx_type_costs int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] [TX_TYPES]; @@ -1144,7 +1233,7 @@ int *amvd_nmv_cost[2]; #endif // CONFIG_ADAPTIVE_MVD -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT /*! Costs for coding the zero components of dv cost. */ int *dv_joint_cost; @@ -1226,7 +1315,7 @@ } IntraBCMvCosts; #endif -#if CONFIG_BVCOST_UPDATE && !CONFIG_FLEX_MVRES +#if CONFIG_IBC_BV_IMPROVEMENT && !CONFIG_FLEX_MVRES /*! \brief Holds mv costs for intrabc. */ typedef struct { @@ -1395,7 +1484,7 @@ //! multipliers for motion search. #if CONFIG_FLEX_MVRES IntraBCMvCosts dv_costs; -#elif CONFIG_BVCOST_UPDATE +#elif CONFIG_IBC_BV_IMPROVEMENT IntraBCMVCosts dv_costs; #endif
diff --git a/av1/encoder/compound_type.c b/av1/encoder/compound_type.c index 9b7c1dc..5cf2f22 100644 --- a/av1/encoder/compound_type.c +++ b/av1/encoder/compound_type.c
@@ -38,6 +38,10 @@ // Check if interp filter matches with previous case if (st->interp_fltr != mi->interp_fltr) return 0; +#if CONFIG_CWP + if (st->cwp_idx != mi->cwp_idx) return 0; +#endif // CONFIG_CWP + const MACROBLOCKD *const xd = &x->e_mbd; // Match MV and reference indices for (int i = 0; i < 2; ++i) { @@ -87,6 +91,10 @@ int32_t *comp_model_rate, int64_t *comp_model_dist, int *comp_rs2, int *match_index) { +#if CONFIG_CWP + if (mbmi->cwp_idx != CWP_EQUAL) return 0; +#endif // CONFIG_CWP + for (int j = 0; j < x->comp_rd_stats_idx; ++j) { if (is_comp_rd_match(cpi, x, &x->comp_rd_stats[j], mbmi, comp_rate, comp_dist, comp_model_rate, comp_model_dist, @@ -904,19 +912,23 @@ COMPOUND_TYPE *valid_comp_types) { int valid_type_count = 0; int comp_type, valid_check; -#if CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_OPTFLOW_REFINEMENT || CONFIG_REFINEMV MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = xd->mi[0]; const PREDICTION_MODE this_mode = mbmi->mode; // For implementation simplicity, set compound type to COMPOUND_AVERAGE for // now to avoid compound type RD search. In practice, dist_wtd will always // be applied instead. - if (this_mode >= NEAR_NEARMV_OPTFLOW) { + if (this_mode >= NEAR_NEARMV_OPTFLOW +#if CONFIG_REFINEMV + || (mbmi->refinemv_flag && switchable_refinemv_flag(&cpi->common, mbmi)) +#endif // CONFIG_REFINEMV + ) { *try_average_and_distwtd_comp = 0; valid_comp_types[0] = COMPOUND_AVERAGE; return 1; } -#endif // CONFIG_OPTFLOW_REFINEMENT +#endif // CONFIG_OPTFLOW_REFINEMENT || CONFIG_REFINEMV int8_t enable_masked_type[MASKED_COMPOUND_TYPES] = { 0, 0 }; const int try_average_comp = (mode_search_mask & (1 << COMPOUND_AVERAGE)); @@ -976,6 +988,9 @@ COMPOUND_TYPE cur_type) { mbmi->interinter_comp.type = cur_type; mbmi->comp_group_idx = (cur_type >= COMPOUND_WEDGE); +#if CONFIG_CWP + mbmi->cwp_idx = (cur_type == COMPOUND_AVERAGE) ? mbmi->cwp_idx : CWP_EQUAL; +#endif // CONFIG_CWP } // When match is found, populate the compound type data @@ -1010,6 +1025,9 @@ best_type_stats->comp_best_model_rd = comp_model_rd_cur; best_type_stats->best_compound_data = mbmi->interinter_comp; best_type_stats->best_compmode_interinter_cost = rs2; +#if CONFIG_CWP + best_type_stats->cwp_idx = mbmi->cwp_idx; +#endif // CONFIG_CWP } // Updates best_mv for masked compound types @@ -1034,6 +1052,9 @@ MACROBLOCK *x, const MB_MODE_INFO *const mbmi, const int32_t *comp_rate, const int64_t *comp_dist, const int32_t *comp_model_rate, const int64_t *comp_model_dist, const int_mv *cur_mv, const int *comp_rs2) { +#if CONFIG_CWP + if (mbmi->cwp_idx != CWP_EQUAL) return; +#endif // CONFIG_CWP const int offset = x->comp_rd_stats_idx; if (offset < MAX_COMP_RD_STATS) { COMP_RD_STATS *const rd_stats = x->comp_rd_stats + offset; @@ -1046,7 +1067,15 @@ memcpy(rd_stats->ref_frames, mbmi->ref_frame, sizeof(rd_stats->ref_frames)); rd_stats->mode = mbmi->mode; rd_stats->interp_fltr = mbmi->interp_fltr; +#if CONFIG_SEP_COMP_DRL + rd_stats->ref_mv_idx[0] = mbmi->ref_mv_idx[0]; + rd_stats->ref_mv_idx[1] = mbmi->ref_mv_idx[1]; +#else rd_stats->ref_mv_idx = mbmi->ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL +#if CONFIG_CWP + rd_stats->cwp_idx = mbmi->cwp_idx; +#endif // CONFIG_CWP const MACROBLOCKD *const xd = &x->e_mbd; for (int i = 0; i < 2; ++i) { const WarpedMotionParams *const wm = @@ -1324,6 +1353,10 @@ best_type_stats.best_compmode_interinter_cost = 0; best_type_stats.comp_best_model_rd = INT64_MAX; +#if CONFIG_CWP + best_type_stats.cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP + int tmp_rate_mv; const int num_pix = 1 << num_pels_log2_lookup[bsize]; const int mask_len = 2 * num_pix * sizeof(uint8_t); @@ -1369,6 +1402,11 @@ av1_zero_array(masked_type_cost, COMPOUND_TYPES); else #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + if (mbmi->refinemv_flag && switchable_refinemv_flag(cm, mbmi)) + av1_zero_array(masked_type_cost, COMPOUND_TYPES); + else +#endif // CONFIG_REFINEMV // Populates masked_type_cost local array for the 4 compound types calc_masked_type_cost(&x->mode_costs, bsize, comp_group_idx_ctx, masked_compound_used, masked_type_cost); @@ -1384,6 +1422,9 @@ #if CONFIG_OPTFLOW_REFINEMENT this_mode < NEAR_NEARMV_OPTFLOW && #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + (!mbmi->refinemv_flag || !switchable_refinemv_flag(cm, mbmi)) && +#endif // CONFIG_REFINEMV cpi->sf.inter_sf.reuse_compound_type_decision) { return populate_reuse_comp_type_data(x, mbmi, &best_type_stats, cur_mv, comp_rate, comp_dist, comp_rs2, @@ -1396,6 +1437,7 @@ // Loop over valid compound types for (int i = 0; i < valid_type_count; i++) { cur_type = valid_comp_types[i]; + comp_model_rd_cur = INT64_MAX; tmp_rate_mv = *rate_mv; best_rd_cur = INT64_MAX; @@ -1404,6 +1446,13 @@ if (cur_type < COMPOUND_WEDGE) { update_mbmi_for_compound_type(mbmi, cur_type); rs2 = masked_type_cost[cur_type]; + +#if CONFIG_CWP + if (cm->features.enable_cwp && is_cwp_allowed(mbmi) && !mbmi->skip_mode) { + rs2 += av1_get_cwp_idx_cost(mbmi->cwp_idx, cm, x); + } +#endif // CONFIG_CWP + const int64_t mode_rd = RDCOST(x->rdmult, rs2 + rd_stats->rate, 0); if (mode_rd < ref_best_rd) { // Reuse data if matching record is found @@ -1506,6 +1555,11 @@ mbmi->interinter_comp = best_type_stats.best_compound_data; memcpy(xd->seg_mask, buffers->tmp_best_mask_buf, mask_len); } +#if CONFIG_CWP + // update best cwp_idx + mbmi->cwp_idx = best_type_stats.cwp_idx; +#endif // CONFIG_CWP + if (have_newmv_in_inter_mode(this_mode)) { mbmi->mv[0].as_int = best_mv[0].as_int; mbmi->mv[1].as_int = best_mv[1].as_int;
diff --git a/av1/encoder/compound_type.h b/av1/encoder/compound_type.h index 7543aa0..eb66e4f 100644 --- a/av1/encoder/compound_type.h +++ b/av1/encoder/compound_type.h
@@ -25,6 +25,10 @@ INTERINTER_COMPOUND_DATA best_compound_data; int64_t comp_best_model_rd; int best_compmode_interinter_cost; +#if CONFIG_CWP + // Index for the weighting factor of compound weighted prediction + int8_t cwp_idx; +#endif // CONFIG_CWP } BEST_COMP_TYPE_STATS; #define IGNORE_MODE -1
diff --git a/av1/encoder/context_tree.c b/av1/encoder/context_tree.c index bce1223..a057bc7 100644 --- a/av1/encoder/context_tree.c +++ b/av1/encoder/context_tree.c
@@ -31,6 +31,7 @@ dst_ctx->mbmi_ext_best = src_ctx->mbmi_ext_best; dst_ctx->num_4x4_blk = src_ctx->num_4x4_blk; + dst_ctx->num_4x4_blk_chroma = src_ctx->num_4x4_blk_chroma; dst_ctx->skippable = src_ctx->skippable; memcpy(dst_ctx->blk_skip, src_ctx->blk_skip, @@ -52,8 +53,11 @@ const int num_pix = src_ctx->num_4x4_blk * 16; if (num_pix <= MAX_PALETTE_SQUARE) { for (int i = 0; i < 2; ++i) { + const int num_blk = + (i == 0) ? src_ctx->num_4x4_blk : src_ctx->num_4x4_blk_chroma; + const int color_map_size = num_blk * 16; memcpy(dst_ctx->color_index_map[i], src_ctx->color_index_map[i], - sizeof(src_ctx->color_index_map[i][0]) * num_pix); + sizeof(src_ctx->color_index_map[i][0]) * color_map_size); } } #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -83,8 +87,9 @@ } } -PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, int mi_row, int mi_col, - BLOCK_SIZE bsize, PC_TREE *parent, +PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, TREE_TYPE tree_type, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *parent, PARTITION_TYPE parent_partition, int index, int subsampling_x, int subsampling_y, PC_TREE_SHARED_BUFFERS *shared_bufs) { @@ -95,7 +100,8 @@ ctx->rd_mode_is_ready = 0; ctx->parent = parent; ctx->index = index; - set_chroma_ref_info(mi_row, mi_col, index, bsize, &ctx->chroma_ref_info, + set_chroma_ref_info(tree_type, mi_row, mi_col, index, bsize, + &ctx->chroma_ref_info, parent ? &parent->chroma_ref_info : NULL, parent ? parent->block_size : BLOCK_INVALID, parent_partition, subsampling_x, subsampling_y); @@ -105,6 +111,19 @@ const int num_pix = block_size_wide[bsize] * block_size_high[bsize]; const int num_blk = num_pix / 16; +#if CONFIG_UNEVEN_4WAY + // Biggest chroma block covering multiple luma blocks is of size 8X16 / 16X8, + // when a 16X32 / 32X16 block uses a HORZ / VERTICAL 4A/4B partition. + const int num_pix_chroma = AOMMAX(num_pix, 16 * 8); +#else + // Biggest chroma block covering multiple luma blocks is of size 8X8, + // when a 16X16 block uses a HORZ_3 / VERTICAL_3 partition. + // However, we don't explicitly need to allocate that minimum, because palette + // is only allowed for bsize >= BLOCK_8X8, and all these block sizes have at + // least 64 pixels. + const int num_pix_chroma = num_pix; +#endif // CONFIG_UNEVEN_4WAY + AOM_CHECK_MEM_ERROR(&error, ctx->blk_skip, aom_calloc(num_blk, sizeof(*ctx->blk_skip))); AOM_CHECK_MEM_ERROR(&error, ctx->tx_type_map, @@ -119,6 +138,7 @@ aom_calloc(num_blk, sizeof(*ctx->cctx_type_map))); #endif // CONFIG_CROSS_CHROMA_TX ctx->num_4x4_blk = num_blk; + ctx->num_4x4_blk_chroma = num_pix_chroma / 16; for (int i = 0; i < num_planes; ++i) { ctx->coeff[i] = shared_bufs->coeff_buf[i]; @@ -126,6 +146,10 @@ ctx->dqcoeff[i] = shared_bufs->dqcoeff_buf[i]; AOM_CHECK_MEM_ERROR(&error, ctx->eobs[i], aom_memalign(32, num_blk * sizeof(*ctx->eobs[i]))); +#if CONFIG_ATC_DCTX_ALIGNED + AOM_CHECK_MEM_ERROR(&error, ctx->bobs[i], + aom_memalign(32, num_blk * sizeof(*ctx->bobs[i]))); +#endif // CONFIG_ATC_DCTX_ALIGNED AOM_CHECK_MEM_ERROR( &error, ctx->txb_entropy_ctx[i], aom_memalign(32, num_blk * sizeof(*ctx->txb_entropy_ctx[i]))); @@ -133,9 +157,10 @@ if (num_pix <= MAX_PALETTE_SQUARE) { for (int i = 0; i < 2; ++i) { + const int color_map_size = (i == 0) ? num_pix : num_pix_chroma; AOM_CHECK_MEM_ERROR( &error, ctx->color_index_map[i], - aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i]))); + aom_memalign(32, color_map_size * sizeof(*ctx->color_index_map[i]))); } } av1_invalid_rd_stats(&ctx->rd_stats); @@ -162,6 +187,10 @@ ctx->dqcoeff[i] = NULL; aom_free(ctx->eobs[i]); ctx->eobs[i] = NULL; +#if CONFIG_ATC_DCTX_ALIGNED + aom_free(ctx->bobs[i]); + ctx->bobs[i] = NULL; +#endif // CONFIG_ATC_DCTX_ALIGNED aom_free(ctx->txb_entropy_ctx[i]); ctx->txb_entropy_ctx[i] = NULL; } @@ -174,8 +203,8 @@ aom_free(ctx); } -PC_TREE *av1_alloc_pc_tree_node(int mi_row, int mi_col, BLOCK_SIZE bsize, - PC_TREE *parent, +PC_TREE *av1_alloc_pc_tree_node(TREE_TYPE tree_type, int mi_row, int mi_col, + BLOCK_SIZE bsize, PC_TREE *parent, PARTITION_TYPE parent_partition, int index, int is_last, int subsampling_x, int subsampling_y) { @@ -196,7 +225,8 @@ av1_invalid_rd_stats(&pc_tree->none_rd); pc_tree->skippable = false; #endif // CONFIG_EXT_RECUR_PARTITIONS - set_chroma_ref_info(mi_row, mi_col, index, bsize, &pc_tree->chroma_ref_info, + set_chroma_ref_info(tree_type, mi_row, mi_col, index, bsize, + &pc_tree->chroma_ref_info, parent ? &parent->chroma_ref_info : NULL, parent ? parent->block_size : BLOCK_INVALID, parent_partition, subsampling_x, subsampling_y); @@ -207,19 +237,20 @@ pc_tree->vertical[i] = NULL; } #if CONFIG_EXT_RECUR_PARTITIONS -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + for (int i = 0; i < 4; ++i) { + pc_tree->horizontal4a[i] = NULL; + pc_tree->horizontal4b[i] = NULL; + pc_tree->vertical4a[i] = NULL; + pc_tree->vertical4b[i] = NULL; + } +#endif // CONFIG_UNEVEN_4WAY for (int i = 0; i < 4; ++i) { pc_tree->horizontal3[i] = NULL; pc_tree->vertical3[i] = NULL; } #else for (int i = 0; i < 3; ++i) { - pc_tree->horizontal3[i] = NULL; - pc_tree->vertical3[i] = NULL; - } -#endif // CONFIG_H_PARTITION -#else - for (int i = 0; i < 3; ++i) { pc_tree->horizontala[i] = NULL; pc_tree->horizontalb[i] = NULL; pc_tree->verticala[i] = NULL; @@ -272,11 +303,45 @@ #endif // CONFIG_EXT_RECUR_PARTITIONS } #if CONFIG_EXT_RECUR_PARTITIONS -#if CONFIG_H_PARTITION + +#if CONFIG_UNEVEN_4WAY + if (!keep_best || (partition != PARTITION_HORZ_4A)) { + for (int i = 0; i < 4; ++i) { + if (pc_tree->horizontal4a[i] != NULL) { + av1_free_pc_tree_recursive(pc_tree->horizontal4a[i], num_planes, 0, 0); + pc_tree->horizontal4a[i] = NULL; + } + } + } + + if (!keep_best || (partition != PARTITION_HORZ_4B)) { + for (int i = 0; i < 4; ++i) { + if (pc_tree->horizontal4b[i] != NULL) { + av1_free_pc_tree_recursive(pc_tree->horizontal4b[i], num_planes, 0, 0); + pc_tree->horizontal4b[i] = NULL; + } + } + } + + if (!keep_best || (partition != PARTITION_VERT_4A)) { + for (int i = 0; i < 4; ++i) { + if (pc_tree->vertical4a[i] != NULL) { + av1_free_pc_tree_recursive(pc_tree->vertical4a[i], num_planes, 0, 0); + pc_tree->vertical4a[i] = NULL; + } + } + } + + if (!keep_best || (partition != PARTITION_VERT_4B)) { + for (int i = 0; i < 4; ++i) { + if (pc_tree->vertical4b[i] != NULL) { + av1_free_pc_tree_recursive(pc_tree->vertical4b[i], num_planes, 0, 0); + pc_tree->vertical4b[i] = NULL; + } + } + } +#endif // CONFIG_UNEVEN_4WAY for (int i = 0; i < 4; ++i) { -#else - for (int i = 0; i < 3; ++i) { -#endif // CONFIG_H_PARTITION if ((!keep_best || (partition != PARTITION_HORZ_3)) && pc_tree->horizontal3[i] != NULL) { av1_free_pc_tree_recursive(pc_tree->horizontal3[i], num_planes, 0, 0); @@ -323,13 +388,20 @@ void av1_copy_pc_tree_recursive(const AV1_COMMON *cm, PC_TREE *dst, PC_TREE *src, int ss_x, int ss_y, PC_TREE_SHARED_BUFFERS *shared_bufs, - int num_planes) { + TREE_TYPE tree_type, int num_planes) { // Copy the best partition type. For basic information like bsize and index, // we assume they have been set properly when initializing the dst PC_TREE dst->partitioning = src->partitioning; dst->rd_cost = src->rd_cost; dst->none_rd = src->none_rd; dst->skippable = src->skippable; +#if CONFIG_MVP_IMPROVEMENT + dst->ref_mv_bank = src->ref_mv_bank; +#endif // CONFIG_MVP_IMPROVEMENT +#if WARP_CU_BANK + dst->warp_param_bank = src->warp_param_bank; +#endif // WARP_CU_BANK + const BLOCK_SIZE bsize = dst->block_size; const BLOCK_SIZE subsize = get_partition_subsize(bsize, src->partitioning); const int mi_row = src->mi_row; @@ -341,7 +413,7 @@ if (dst->none) av1_free_pmc(dst->none, num_planes); dst->none = NULL; if (src->none) { - dst->none = av1_alloc_pmc(cm, mi_row, mi_col, bsize, dst, + dst->none = av1_alloc_pmc(cm, tree_type, mi_row, mi_col, bsize, dst, PARTITION_NONE, 0, ss_x, ss_y, shared_bufs); av1_copy_tree_context(dst->none, src->none); } @@ -358,10 +430,11 @@ const int x_idx = (i & 1) * (mi_size_wide[bsize] >> 1); const int y_idx = (i >> 1) * (mi_size_high[bsize] >> 1); dst->split[i] = av1_alloc_pc_tree_node( - mi_row + y_idx, mi_col + x_idx, subsize, dst, PARTITION_SPLIT, - i, i == 3, ss_x, ss_y); + tree_type, mi_row + y_idx, mi_col + x_idx, subsize, dst, + PARTITION_SPLIT, i, i == 3, ss_x, ss_y); av1_copy_pc_tree_recursive(cm, dst->split[i], src->split[i], ss_x, - ss_y, shared_bufs, num_planes); + ss_y, shared_bufs, tree_type, + num_planes); } } } @@ -376,12 +449,12 @@ } if (src->horizontal[i]) { const int this_mi_row = mi_row + i * (mi_size_high[bsize] >> 1); - dst->horizontal[i] = - av1_alloc_pc_tree_node(this_mi_row, mi_col, subsize, dst, - PARTITION_HORZ, i, i == 1, ss_x, ss_y); + dst->horizontal[i] = av1_alloc_pc_tree_node( + tree_type, this_mi_row, mi_col, subsize, dst, PARTITION_HORZ, i, + i == 1, ss_x, ss_y); av1_copy_pc_tree_recursive(cm, dst->horizontal[i], src->horizontal[i], ss_x, ss_y, - shared_bufs, num_planes); + shared_bufs, tree_type, num_planes); } } } @@ -396,16 +469,135 @@ } if (src->vertical[i]) { const int this_mi_col = mi_col + i * (mi_size_wide[bsize] >> 1); - dst->vertical[i] = - av1_alloc_pc_tree_node(mi_row, this_mi_col, subsize, dst, - PARTITION_VERT, i, i == 1, ss_x, ss_y); + dst->vertical[i] = av1_alloc_pc_tree_node( + tree_type, mi_row, this_mi_col, subsize, dst, PARTITION_VERT, i, + i == 1, ss_x, ss_y); av1_copy_pc_tree_recursive(cm, dst->vertical[i], src->vertical[i], - ss_x, ss_y, shared_bufs, num_planes); + ss_x, ss_y, shared_bufs, tree_type, + num_planes); } } } break; -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + // PARTITION_HORZ_4A + case PARTITION_HORZ_4A: + if (is_partition_valid(bsize, PARTITION_HORZ_4A)) { + const int ebh = (mi_size_high[bsize] >> 3); + const int mi_rows[4] = { mi_row, mi_row + ebh, mi_row + ebh * 3, + mi_row + ebh * 7 }; + const BLOCK_SIZE bsize_big = + get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + const BLOCK_SIZE subsizes[4] = { subsize, bsize_med, bsize_big, + subsize }; + for (int i = 0; i < 4; ++i) { + if (dst->horizontal4a[i]) { + av1_free_pc_tree_recursive(dst->horizontal4a[i], num_planes, 0, 0); + dst->horizontal4a[i] = NULL; + } + if (src->horizontal4a[i]) { + dst->horizontal4a[i] = av1_alloc_pc_tree_node( + tree_type, mi_rows[i], mi_col, subsizes[i], dst, + PARTITION_HORZ_4A, i, i == 3, ss_x, ss_y); + av1_copy_pc_tree_recursive(cm, dst->horizontal4a[i], + src->horizontal4a[i], ss_x, ss_y, + shared_bufs, tree_type, num_planes); + } + } + } + break; + // PARTITION_HORZ_4B + case PARTITION_HORZ_4B: + if (is_partition_valid(bsize, PARTITION_HORZ_4B)) { + const int ebh = (mi_size_high[bsize] >> 3); + const int mi_rows[4] = { mi_row, mi_row + ebh, mi_row + ebh * 5, + mi_row + ebh * 7 }; + const BLOCK_SIZE bsize_big = + get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + const BLOCK_SIZE subsizes[4] = { subsize, bsize_big, bsize_med, + subsize }; + for (int i = 0; i < 4; ++i) { + if (dst->horizontal4b[i]) { + av1_free_pc_tree_recursive(dst->horizontal4b[i], num_planes, 0, 0); + dst->horizontal4b[i] = NULL; + } + if (src->horizontal4b[i]) { + dst->horizontal4b[i] = av1_alloc_pc_tree_node( + tree_type, mi_rows[i], mi_col, subsizes[i], dst, + PARTITION_HORZ_4B, i, i == 3, ss_x, ss_y); + av1_copy_pc_tree_recursive(cm, dst->horizontal4b[i], + src->horizontal4b[i], ss_x, ss_y, + shared_bufs, tree_type, num_planes); + } + } + } + break; + // PARTITION_VERT_4A + case PARTITION_VERT_4A: + if (is_partition_valid(bsize, PARTITION_VERT_4A)) { + const int ebw = (mi_size_wide[bsize] >> 3); + const int mi_cols[4] = { mi_col, mi_col + ebw, mi_col + ebw * 3, + mi_col + ebw * 7 }; + const BLOCK_SIZE bsize_big = + get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + const BLOCK_SIZE subsizes[4] = { subsize, bsize_med, bsize_big, + subsize }; + for (int i = 0; i < 4; ++i) { + if (dst->vertical4a[i]) { + av1_free_pc_tree_recursive(dst->vertical4a[i], num_planes, 0, 0); + dst->vertical4a[i] = NULL; + } + if (src->vertical4a[i]) { + dst->vertical4a[i] = av1_alloc_pc_tree_node( + tree_type, mi_row, mi_cols[i], subsizes[i], dst, + PARTITION_VERT_4A, i, i == 3, ss_x, ss_y); + av1_copy_pc_tree_recursive(cm, dst->vertical4a[i], + src->vertical4a[i], ss_x, ss_y, + shared_bufs, tree_type, num_planes); + } + } + } + break; + // PARTITION_VERT_4B + case PARTITION_VERT_4B: + if (is_partition_valid(bsize, PARTITION_VERT_4B)) { + const int ebw = (mi_size_wide[bsize] >> 3); + const int mi_cols[4] = { mi_col, mi_col + ebw, mi_col + ebw * 5, + mi_col + ebw * 7 }; + const BLOCK_SIZE bsize_big = + get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + const BLOCK_SIZE subsizes[4] = { subsize, bsize_big, bsize_med, + subsize }; + for (int i = 0; i < 4; ++i) { + if (dst->vertical4b[i]) { + av1_free_pc_tree_recursive(dst->vertical4b[i], num_planes, 0, 0); + dst->vertical4b[i] = NULL; + } + if (src->vertical4b[i]) { + dst->vertical4b[i] = av1_alloc_pc_tree_node( + tree_type, mi_row, mi_cols[i], subsizes[i], dst, + PARTITION_VERT_4B, i, i == 3, ss_x, ss_y); + av1_copy_pc_tree_recursive(cm, dst->vertical4b[i], + src->vertical4b[i], ss_x, ss_y, + shared_bufs, tree_type, num_planes); + } + } + } + break; +#endif // CONFIG_UNEVEN_4WAY + // PARTITION_HORZ_3 case PARTITION_HORZ_3: if (is_partition_valid(bsize, PARTITION_HORZ_3)) { @@ -423,11 +615,11 @@ } if (src->horizontal3[i]) { dst->horizontal3[i] = av1_alloc_pc_tree_node( - mi_row + offset_mr, mi_col + offset_mc, this_subsize, dst, - PARTITION_HORZ_3, i, i == 3, ss_x, ss_y); + tree_type, mi_row + offset_mr, mi_col + offset_mc, this_subsize, + dst, PARTITION_HORZ_3, i, i == 3, ss_x, ss_y); av1_copy_pc_tree_recursive(cm, dst->horizontal3[i], src->horizontal3[i], ss_x, ss_y, - shared_bufs, num_planes); + shared_bufs, tree_type, num_planes); } } } @@ -449,65 +641,15 @@ } if (src->vertical3[i]) { dst->vertical3[i] = av1_alloc_pc_tree_node( - mi_row + offset_mr, mi_col + offset_mc, this_subsize, dst, - PARTITION_VERT_3, i, i == 3, ss_x, ss_y); + tree_type, mi_row + offset_mr, mi_col + offset_mc, this_subsize, + dst, PARTITION_VERT_3, i, i == 3, ss_x, ss_y); av1_copy_pc_tree_recursive(cm, dst->vertical3[i], src->vertical3[i], - ss_x, ss_y, shared_bufs, num_planes); + ss_x, ss_y, shared_bufs, tree_type, + num_planes); } } } break; -#else - // PARTITION_HORZ_3 - case PARTITION_HORZ_3: - if (is_partition_valid(bsize, PARTITION_HORZ_3)) { - const int mi_rows[3] = { mi_row, mi_row + (mi_size_high[bsize] >> 2), - mi_row + (mi_size_high[bsize] >> 2) * 3 }; - const BLOCK_SIZE subsizes[3] = { - subsize, get_partition_subsize(bsize, PARTITION_HORZ), subsize - }; - - for (int i = 0; i < 3; ++i) { - if (dst->horizontal3[i]) { - av1_free_pc_tree_recursive(dst->horizontal3[i], num_planes, 0, 0); - dst->horizontal3[i] = NULL; - } - if (src->horizontal3[i]) { - dst->horizontal3[i] = - av1_alloc_pc_tree_node(mi_rows[i], mi_col, subsizes[i], dst, - PARTITION_HORZ_3, i, i == 2, ss_x, ss_y); - av1_copy_pc_tree_recursive(cm, dst->horizontal3[i], - src->horizontal3[i], ss_x, ss_y, - shared_bufs, num_planes); - } - } - } - break; - // PARTITION_VERT_3 - case PARTITION_VERT_3: - if (is_partition_valid(bsize, PARTITION_VERT_3)) { - const int mi_cols[3] = { mi_col, mi_col + (mi_size_wide[bsize] >> 2), - mi_col + (mi_size_wide[bsize] >> 2) * 3 }; - const BLOCK_SIZE subsizes[3] = { - subsize, get_partition_subsize(bsize, PARTITION_VERT), subsize - }; - - for (int i = 0; i < 3; ++i) { - if (dst->vertical3[i]) { - av1_free_pc_tree_recursive(dst->vertical3[i], num_planes, 0, 0); - dst->vertical3[i] = NULL; - } - if (src->vertical3[i]) { - dst->vertical3[i] = - av1_alloc_pc_tree_node(mi_row, mi_cols[i], subsizes[i], dst, - PARTITION_VERT_3, i, i == 2, ss_x, ss_y); - av1_copy_pc_tree_recursive(cm, dst->vertical3[i], src->vertical3[i], - ss_x, ss_y, shared_bufs, num_planes); - } - } - } - break; -#endif // CONFIG_H_PARTITION default: assert(0 && "Not a valid partition."); break; } } @@ -647,11 +789,20 @@ if (result) return result; result = look_for_counterpart_helper(pc_tree->vertical[0], target); if (result) return result; +#if CONFIG_UNEVEN_4WAY + result = look_for_counterpart_helper(pc_tree->horizontal4a[0], target); + if (result) return result; + result = look_for_counterpart_helper(pc_tree->horizontal4b[0], target); + if (result) return result; + result = look_for_counterpart_helper(pc_tree->vertical4a[0], target); + if (result) return result; + result = look_for_counterpart_helper(pc_tree->vertical4b[0], target); + if (result) return result; +#endif // CONFIG_UNEVEN_4WAY result = look_for_counterpart_helper(pc_tree->horizontal3[0], target); if (result) return result; result = look_for_counterpart_helper(pc_tree->vertical3[0], target); if (result) return result; - return NULL; }
diff --git a/av1/encoder/context_tree.h b/av1/encoder/context_tree.h index 280b3e9..d99cb98 100644 --- a/av1/encoder/context_tree.h +++ b/av1/encoder/context_tree.h
@@ -46,6 +46,9 @@ tran_low_t *qcoeff[MAX_MB_PLANE]; tran_low_t *dqcoeff[MAX_MB_PLANE]; uint16_t *eobs[MAX_MB_PLANE]; +#if CONFIG_ATC_DCTX_ALIGNED + uint16_t *bobs[MAX_MB_PLANE]; +#endif // CONFIG_ATC_DCTX_ALIGNED uint8_t *txb_entropy_ctx[MAX_MB_PLANE]; TX_TYPE *tx_type_map; #if CONFIG_CROSS_CHROMA_TX @@ -53,6 +56,7 @@ #endif // CONFIG_CROSS_CHROMA_TX int num_4x4_blk; + int num_4x4_blk_chroma; // For current partition, only if all Y, U, and V transform blocks' // coefficients are quantized to 0, skippable is set to 1. int skippable; @@ -76,14 +80,15 @@ #if CONFIG_EXT_RECUR_PARTITIONS struct PC_TREE *horizontal[2]; struct PC_TREE *vertical[2]; -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + struct PC_TREE *horizontal4a[4]; + struct PC_TREE *horizontal4b[4]; + struct PC_TREE *vertical4a[4]; + struct PC_TREE *vertical4b[4]; +#endif // CONFIG_UNEVEN_4WAY struct PC_TREE *horizontal3[4]; struct PC_TREE *vertical3[4]; #else - struct PC_TREE *horizontal3[3]; - struct PC_TREE *vertical3[3]; -#endif // CONFIG_H_PARTITION -#else PICK_MODE_CONTEXT *horizontal[2]; PICK_MODE_CONTEXT *vertical[2]; PICK_MODE_CONTEXT *horizontala[3]; @@ -104,9 +109,9 @@ #if CONFIG_EXT_RECUR_PARTITIONS RD_STATS none_rd; bool skippable; -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT REF_MV_BANK ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK WARP_PARAM_BANK warp_param_bank; #endif // WARP_CU_BANK @@ -134,8 +139,8 @@ PC_TREE_SHARED_BUFFERS *shared_bufs); void av1_free_shared_coeff_buffer(PC_TREE_SHARED_BUFFERS *shared_bufs); -PC_TREE *av1_alloc_pc_tree_node(int mi_row, int mi_col, BLOCK_SIZE bsize, - PC_TREE *parent, +PC_TREE *av1_alloc_pc_tree_node(TREE_TYPE tree_type, int mi_row, int mi_col, + BLOCK_SIZE bsize, PC_TREE *parent, PARTITION_TYPE parent_partition, int index, int is_last, int subsampling_x, int subsampling_y); @@ -145,11 +150,12 @@ void av1_copy_pc_tree_recursive(const AV1_COMMON *cm, PC_TREE *dst, PC_TREE *src, int ss_x, int ss_y, PC_TREE_SHARED_BUFFERS *shared_bufs, - int num_planes); + TREE_TYPE tree_type, int num_planes); #endif // CONFIG_EXT_RECUR_PARTITIONS -PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, int mi_row, int mi_col, - BLOCK_SIZE bsize, PC_TREE *parent, +PICK_MODE_CONTEXT *av1_alloc_pmc(const AV1_COMMON *cm, TREE_TYPE tree_type, + int mi_row, int mi_col, BLOCK_SIZE bsize, + PC_TREE *parent, PARTITION_TYPE parent_partition, int index, int subsampling_x, int subsampling_y, PC_TREE_SHARED_BUFFERS *shared_bufs);
diff --git a/av1/encoder/encode_strategy.c b/av1/encoder/encode_strategy.c index a931f84..06809d7 100644 --- a/av1/encoder/encode_strategy.c +++ b/av1/encoder/encode_strategy.c
@@ -863,6 +863,12 @@ source_buffer->metadata); } } +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // when enable_frame_output_order == 1, show_existing mechanism is + // used for alt_ref in encoder side internally, but the OBU with + // show_existing_frame == 1 is not signaled in the bitstream. + if (cm->seq_params.enable_frame_output_order) show_existing_alt_ref = 1; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT set_show_existing_alt_ref(&cpi->gf_group, apply_filtering, oxcf->algo_cfg.enable_overlay, show_existing_alt_ref); @@ -1105,6 +1111,9 @@ AOMMIN(cm->seq_params.num_same_ref_compound, cm->ref_frames_info.num_total_refs); #endif // CONFIG_ALLOW_SAME_REF_COMPOUND +#if CONFIG_IMPROVED_GLOBAL_MOTION + cm->cur_frame->num_ref_frames = cm->ref_frames_info.num_total_refs; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION // ref_frame_flags is defined based on the external flag // max-reference-frames.
diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index e1dca4f..ecd0ee6 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c
@@ -572,8 +572,9 @@ : (loop_idx == 0 ? LUMA_PART : CHROMA_PART)); init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col, 1); - PC_TREE *const pc_root = av1_alloc_pc_tree_node( - mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y); + PC_TREE *const pc_root = + av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, sb_size, NULL, + PARTITION_NONE, 0, 1, ss_x, ss_y); #if CONFIG_EXT_RECUR_PARTITIONS const PARTITION_TREE *template_tree = multi_pass_params ? multi_pass_params->template_tree : NULL; @@ -615,9 +616,9 @@ // First pass SB_FIRST_PASS_STATS sb_fp_stats; av1_backup_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col); -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT REF_MV_BANK stored_mv_bank = td->mb.e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK WARP_PARAM_BANK stored_warp_bank = td->mb.e_mbd.warp_param_bank; #endif // WARP_CU_BANK @@ -632,9 +633,9 @@ av1_reset_simple_motion_tree_partition(sms_root, sb_size); av1_restore_sb_state(&sb_fp_stats, cpi, td, tile_data, mi_row, mi_col); -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT td->mb.e_mbd.ref_mv_bank = stored_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK td->mb.e_mbd.warp_param_bank = stored_warp_bank; #endif // WARP_CU_BANK @@ -666,13 +667,14 @@ case PARTITION_NONE: num_subtrees = 0; break; case PARTITION_HORZ: case PARTITION_VERT: num_subtrees = 2; break; -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + case PARTITION_HORZ_4B: + case PARTITION_VERT_4A: + case PARTITION_VERT_4B: num_subtrees = 4; break; +#endif // CONFIG_UNEVEN_4WAY case PARTITION_HORZ_3: case PARTITION_VERT_3: num_subtrees = 4; break; -#else - case PARTITION_HORZ_3: - case PARTITION_VERT_3: num_subtrees = 3; break; -#endif // CONFIG_H_PARTITION case PARTITION_SPLIT: num_subtrees = 4; break; default: assert(0 && "Invalid partition type in set_min_none_to_invalid!"); @@ -797,8 +799,9 @@ cm, xd->tree_type, mi_row, mi_col, bsize, xd->sbi->ptree_root[av1_get_sdp_idx(xd->tree_type)]); #endif // CONFIG_EXT_RECUR_PARTITIONS - PC_TREE *const pc_root = av1_alloc_pc_tree_node( - mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y); + PC_TREE *const pc_root = + av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, sb_size, NULL, + PARTITION_NONE, 0, 1, ss_x, ss_y); av1_rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size, &dummy_rate, &dummy_dist, 1, #if CONFIG_EXT_RECUR_PARTITIONS @@ -825,8 +828,9 @@ : (loop_idx == 0 ? LUMA_PART : CHROMA_PART)); init_encode_rd_sb(cpi, td, tile_data, sms_root, &dummy_rdc, mi_row, mi_col, 1); - PC_TREE *const pc_root = av1_alloc_pc_tree_node( - mi_row, mi_col, sb_size, NULL, PARTITION_NONE, 0, 1, ss_x, ss_y); + PC_TREE *const pc_root = + av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, sb_size, NULL, + PARTITION_NONE, 0, 1, ss_x, ss_y); #if CONFIG_EXT_RECUR_PARTITIONS av1_reset_ptree_in_sbi(xd->sbi, xd->tree_type); av1_build_partition_tree_fixed_partitioning( @@ -1139,7 +1143,7 @@ mi_row += cm->mib_size) { #if CONFIG_REF_MV_BANK av1_zero(td->mb.e_mbd.ref_mv_bank); -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT td->mb.e_mbd.ref_mv_bank_pt = &td->mb.e_mbd.ref_mv_bank; #endif #endif // CONFIG_REF_MV_BANK @@ -1237,8 +1241,13 @@ get_ref_frame_buf(cm, skip_mode_info->ref_frame_idx_1); assert(buf_0 != NULL && buf_1 != NULL); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + ref_order_hint[0] = buf_0->display_order_hint; + ref_order_hint[1] = buf_1->display_order_hint; +#else ref_order_hint[0] = buf_0->order_hint; ref_order_hint[1] = buf_1->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC } static int check_skip_mode_enabled(AV1_COMP *const cpi) { @@ -1247,9 +1256,13 @@ av1_setup_skip_mode_allowed(cm); if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0; - // Turn off skip mode if the temporal distances of the reference pair to the - // current frame are different by more than 1 frame. + // Turn off skip mode if the temporal distances of the reference pair to the + // current frame are different by more than 1 frame. +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int cur_offset = (int)cm->current_frame.display_order_hint; +#else const int cur_offset = (int)cm->current_frame.order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC int ref_offset[2]; get_skip_mode_ref_offsets(cm, ref_offset); const int cur_to_ref0 = get_relative_dist(&cm->seq_params.order_hint_info, @@ -1291,6 +1304,19 @@ #endif // CONFIG_OPTFLOW_REFINEMENT uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params, int *src_stride) { + +#if CONFIG_REFINEMV + if (inter_pred_params->use_ref_padding) { + tip_common_calc_subpel_params_and_extend( + src_mv, inter_pred_params, xd, mi_x, mi_y, ref, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + mc_buf, pre, subpel_params, src_stride); + return; + } +#endif // CONFIG_REFINEMV + // These are part of the function signature to use this function through a // function pointer. See typedef of 'CalcSubpelParamsFunc'. (void)xd; @@ -1338,11 +1364,29 @@ subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK; subpel_params->xs = sf->x_step_q4; subpel_params->ys = sf->y_step_q4; + +#if CONFIG_D071_IMP_MSK_BLD + if (inter_pred_params->border_data.enable_bacp) { + // Get reference block top left coordinate. + subpel_params->x0 = pos_x >> SCALE_SUBPEL_BITS; + subpel_params->y0 = pos_y >> SCALE_SUBPEL_BITS; + // Get reference block bottom right coordinate. + subpel_params->x1 = subpel_params->x0 + inter_pred_params->block_width; + subpel_params->y1 = subpel_params->y0 + inter_pred_params->block_height; + } +#endif // CONFIG_D071_IMP_MSK_BLD + *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride + (pos_x >> SCALE_SUBPEL_BITS); } else { int pos_x = inter_pred_params->pix_col << SUBPEL_BITS; int pos_y = inter_pred_params->pix_row << SUBPEL_BITS; + +#if CONFIG_REFINEMV + const int bw = inter_pred_params->original_pu_width; + const int bh = inter_pred_params->original_pu_height; + +#else #if CONFIG_OPTFLOW_REFINEMENT // Use original block size to clamp MV and to extend block boundary const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width @@ -1353,6 +1397,8 @@ const int bw = inter_pred_params->block_width; const int bh = inter_pred_params->block_height; #endif // CONFIG_OPTFLOW_REFINEMENT + +#endif // CONFIG_REFINEMV const MV mv_q4 = tip_clamp_mv_to_umv_border_sb( inter_pred_params, src_mv, bw, bh, #if CONFIG_OPTFLOW_REFINEMENT @@ -1365,6 +1411,18 @@ subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; pos_x += mv_q4.col; pos_y += mv_q4.row; + +#if CONFIG_D071_IMP_MSK_BLD + if (inter_pred_params->border_data.enable_bacp) { + // Get reference block top left coordinate. + subpel_params->x0 = pos_x >> SUBPEL_BITS; + subpel_params->y0 = pos_y >> SUBPEL_BITS; + // Get reference block bottom right coordinate. + subpel_params->x1 = subpel_params->x0 + inter_pred_params->block_width; + subpel_params->y1 = subpel_params->y0 + inter_pred_params->block_height; + } +#endif // CONFIG_D071_IMP_MSK_BLD + *pre = pre_buf->buf0 + (pos_y >> SUBPEL_BITS) * pre_buf->stride + (pos_x >> SUBPEL_BITS); } @@ -1479,6 +1537,19 @@ } #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWG_D067_IMPROVED_WARP + features->allow_warpmv_mode = + (features->enabled_motion_modes & (1 << WARP_DELTA)) != 0; + if (features->allow_warpmv_mode && + cpi->sf.inter_sf.prune_warpmv_prob_thresh > 0) { + const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group); + if (frame_probs->warped_probs[update_type] < + cpi->sf.inter_sf.prune_warpmv_prob_thresh) { + features->allow_warpmv_mode = 0; + } + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + int hash_table_created = 0; if (!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi)) { // TODO(any): move this outside of the recoding loop to avoid recalculating @@ -1643,10 +1714,10 @@ start_timing(cpi, av1_setup_motion_field_time); #endif if (features->allow_ref_frame_mvs) av1_setup_motion_field(cm); -#if CONFIG_SMVP_IMPROVEMENT +#if CONFIG_MVP_IMPROVEMENT else av1_setup_ref_frame_sides(cm); -#endif // CONFIG_SMVP_IMPROVEMENT +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_COLLECT_COMPONENT_TIMING end_timing(cpi, av1_setup_motion_field_time); #endif @@ -1766,6 +1837,17 @@ (frame_probs->warped_probs[update_type] + new_prob) >> 1; } +#if CONFIG_CWG_D067_IMPROVED_WARP + if (cpi->sf.inter_sf.prune_warpmv_prob_thresh > 0) { + const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group); + int sum = 0; + for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i]; + const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0; + frame_probs->warped_probs[update_type] = + (frame_probs->warped_probs[update_type] + new_prob) >> 1; + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + if ((!is_stat_generation_stage(cpi) && av1_use_hash_me(cpi)) || hash_table_created) { av1_hash_table_destroy(&intrabc_hash_info->intrabc_hash_table);
diff --git a/av1/encoder/encodeframe_utils.c b/av1/encoder/encodeframe_utils.c index e7589aa..4258a28 100644 --- a/av1/encoder/encodeframe_utils.c +++ b/av1/encoder/encodeframe_utils.c
@@ -196,13 +196,42 @@ // MB_MODE_INFO_EXT_FRAME to MB_MODE_INFO_EXT. static INLINE void copy_mbmi_ext_frame_to_mbmi_ext( MB_MODE_INFO_EXT *mbmi_ext, - const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_best, uint8_t ref_frame_type) { + const MB_MODE_INFO_EXT_FRAME *const mbmi_ext_best, uint8_t ref_frame_type +#if CONFIG_SEP_COMP_DRL + , + PREDICTION_MODE this_mode +#endif // CONFIG_SEP_COMP_DRL +) { +#if CONFIG_SEP_COMP_DRL + MV_REFERENCE_FRAME rf[2]; + av1_set_ref_frame(rf, ref_frame_type); + if (has_second_drl_by_mode(this_mode, rf)) { + memcpy(mbmi_ext->ref_mv_stack[rf[0]], mbmi_ext_best->ref_mv_stack[0], + sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE])); + memcpy(mbmi_ext->weight[rf[0]], mbmi_ext_best->weight[0], + sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE])); + mbmi_ext->ref_mv_count[rf[0]] = mbmi_ext_best->ref_mv_count[0]; + memcpy(mbmi_ext->ref_mv_stack[rf[1]], mbmi_ext_best->ref_mv_stack[1], + sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE])); + memcpy(mbmi_ext->weight[rf[1]], mbmi_ext_best->weight[1], + sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE])); + mbmi_ext->ref_mv_count[rf[1]] = mbmi_ext_best->ref_mv_count[1]; + } else { + memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], + mbmi_ext_best->ref_mv_stack[0], + sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE])); + memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight[0], + sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE])); + mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count[0]; + } +#else memcpy(mbmi_ext->ref_mv_stack[ref_frame_type], mbmi_ext_best->ref_mv_stack, sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE])); memcpy(mbmi_ext->weight[ref_frame_type], mbmi_ext_best->weight, sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE])); - mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context; mbmi_ext->ref_mv_count[ref_frame_type] = mbmi_ext_best->ref_mv_count; +#endif // CONFIG_SEP_COMP_DRL + mbmi_ext->mode_context[ref_frame_type] = mbmi_ext_best->mode_context; memcpy(mbmi_ext->global_mvs, mbmi_ext_best->global_mvs, sizeof(mbmi_ext->global_mvs)); @@ -240,12 +269,18 @@ assert(mi->sb_type[xd->tree_type == CHROMA_PART] == bsize); *mi_addr = *mi; + mi_addr->chroma_ref_info = ctx->chroma_ref_info; #if CONFIG_C071_SUBBLK_WARPMV if (is_warp_mode(mi->motion_mode)) update_submi(xd, cm, ctx->submic, bsize); #endif // CONFIG_C071_SUBBLK_WARPMV if (xd->tree_type != CHROMA_PART) copy_mbmi_ext_frame_to_mbmi_ext(x->mbmi_ext, &ctx->mbmi_ext_best, - av1_ref_frame_type(ctx->mic.ref_frame)); + av1_ref_frame_type(ctx->mic.ref_frame) +#if CONFIG_SEP_COMP_DRL + , + ctx->mic.mode +#endif // CONFIG_SEP_COMP_DRL + ); memcpy(txfm_info->blk_skip, ctx->blk_skip, sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk); @@ -344,6 +379,9 @@ p[i].qcoeff = ctx->qcoeff[i]; p[i].dqcoeff = ctx->dqcoeff[i]; p[i].eobs = ctx->eobs[i]; +#if CONFIG_ATC_DCTX_ALIGNED + p[i].bobs = ctx->bobs[i]; +#endif // CONFIG_ATC_DCTX_ALIGNED p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; } for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; @@ -367,6 +405,12 @@ xd->mi[x_idx + y * mis]->cfl_alpha_signs = mi_addr->cfl_alpha_signs; xd->mi[x_idx + y * mis]->cfl_alpha_idx = mi_addr->cfl_alpha_idx; xd->mi[x_idx + y * mis]->partition = mi_addr->partition; +#if CONFIG_EXT_RECUR_PARTITIONS + xd->mi[x_idx + y * mis]->chroma_mi_row_start = + mi_addr->chroma_mi_row_start; + xd->mi[x_idx + y * mis]->chroma_mi_col_start = + mi_addr->chroma_mi_col_start; +#endif // CONFIG_EXT_RECUR_PARTITIONS xd->mi[x_idx + y * mis] ->palette_mode_info.palette_size[PLANE_TYPE_UV] = mi_addr->palette_mode_info.palette_size[PLANE_TYPE_UV]; @@ -386,12 +430,6 @@ if (dry_run) return; if (mi_addr->ref_frame[0] != INTRA_FRAME) { - if (is_inter_block(mi_addr, xd->tree_type)) { - // TODO(sarahparker): global motion stats need to be handled per-tile - // to be compatible with tile-based threading. - update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc); - } - if (cm->features.interp_filter == SWITCHABLE && !is_warp_mode(mi_addr->motion_mode) && !is_nontrans_global_motion(xd, xd->mi[0])) { @@ -561,10 +599,18 @@ intraonly); #endif // CONFIG_AIMC if (cm->seq_params.enable_mrls && av1_is_directional_mode(mbmi->mode)) { +#if CONFIG_EXT_DIR + int mrl_ctx = get_mrl_index_ctx(xd->neighbors[0], xd->neighbors[1]); + update_cdf(fc->mrl_index_cdf[mrl_ctx], mbmi->mrl_index, MRL_LINE_NUMBER); +#if CONFIG_ENTROPY_STATS + ++counts->mrl_index[mrl_ctx][mbmi->mrl_index]; +#endif // CONFIG_ENTROPY_STATS +#else + update_cdf(fc->mrl_index_cdf, mbmi->mrl_index, MRL_LINE_NUMBER); #if CONFIG_ENTROPY_STATS ++counts->mrl_index[mbmi->mrl_index]; -#endif - update_cdf(fc->mrl_index_cdf, mbmi->mrl_index, MRL_LINE_NUMBER); +#endif // CONFIG_ENTROPY_STATS +#endif // CONFIG_EXT_DIR } if (av1_filter_intra_allowed(cm, mbmi)) { const int use_filter_intra_mode = @@ -1193,21 +1239,31 @@ AVERAGE_CDF(ctx_left->v_dc_sign_cdf, ctx_tr->v_dc_sign_cdf, 2); AVERAGE_CDF(ctx_left->v_ac_sign_cdf, ctx_tr->v_ac_sign_cdf, 2); #endif // CONFIG_CONTEXT_DERIVATION - AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, 5); - AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, 6); - AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, 7); - AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, 8); - AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, 9); - AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, 10); - AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, 11); + AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, + EOB_MAX_SYMS - 6); + AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, + EOB_MAX_SYMS - 5); + AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, + EOB_MAX_SYMS - 4); + AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, + EOB_MAX_SYMS - 3); + AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, + EOB_MAX_SYMS - 2); + AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, + EOB_MAX_SYMS - 1); + AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, + EOB_MAX_SYMS); AVERAGE_CDF(ctx_left->coeff_base_eob_cdf, ctx_tr->coeff_base_eob_cdf, 3); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC_DCTX_ALIGNED + AVERAGE_CDF(ctx_left->coeff_base_bob_cdf, ctx_tr->coeff_base_bob_cdf, 3); +#endif // CONFIG_ATC_DCTX_ALIGNED +#if CONFIG_ATC AVERAGE_CDF(ctx_left->coeff_base_lf_cdf, ctx_tr->coeff_base_lf_cdf, LF_BASE_SYMBOLS); AVERAGE_CDF(ctx_left->coeff_base_lf_eob_cdf, ctx_tr->coeff_base_lf_eob_cdf, LF_BASE_SYMBOLS - 1); AVERAGE_CDF(ctx_left->coeff_br_lf_cdf, ctx_tr->coeff_br_lf_cdf, BR_CDF_SIZE); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC AVERAGE_CDF(ctx_left->coeff_base_cdf, ctx_tr->coeff_base_cdf, 4); AVERAGE_CDF(ctx_left->idtx_sign_cdf, ctx_tr->idtx_sign_cdf, 2); AVERAGE_CDF(ctx_left->coeff_base_cdf_idtx, ctx_tr->coeff_base_cdf_idtx, 4); @@ -1221,12 +1277,17 @@ AVERAGE_CDF(ctx_left->inter_warp_mode_cdf, ctx_tr->inter_warp_mode_cdf, 2); #endif // CONFIG_WARPMV +#if CONFIG_REFINEMV + AVERAGE_CDF(ctx_left->refinemv_flag_cdf, ctx_tr->refinemv_flag_cdf, + REFINEMV_NUM_MODES); +#endif // CONFIG_REFINEMV + AVERAGE_CDF(ctx_left->drl_cdf[0], ctx_tr->drl_cdf[0], 2); AVERAGE_CDF(ctx_left->drl_cdf[1], ctx_tr->drl_cdf[1], 2); AVERAGE_CDF(ctx_left->drl_cdf[2], ctx_tr->drl_cdf[2], 2); -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT AVERAGE_CDF(ctx_left->skip_drl_cdf, ctx_tr->skip_drl_cdf, 2); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_OPTFLOW_REFINEMENT AVERAGE_CDF(ctx_left->inter_compound_mode_cdf, ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_REF_TYPES); @@ -1234,6 +1295,9 @@ AVERAGE_CDF(ctx_left->inter_compound_mode_cdf, ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_MODES); #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_CWP + AVERAGE_CDF(ctx_left->cwp_idx_cdf, ctx_tr->cwp_idx_cdf, 2); +#endif // CONFIG_CWP #if CONFIG_IMPROVED_JMVD AVERAGE_CDF(ctx_left->jmvd_scale_mode_cdf, ctx_tr->jmvd_scale_mode_cdf, JOINT_NEWMV_SCALE_FACTOR_CNT); @@ -1273,6 +1337,10 @@ AVERAGE_CDF(ctx_left->warp_ref_idx_cdf[0], ctx_tr->warp_ref_idx_cdf[0], 2); AVERAGE_CDF(ctx_left->warp_ref_idx_cdf[1], ctx_tr->warp_ref_idx_cdf[1], 2); AVERAGE_CDF(ctx_left->warp_ref_idx_cdf[2], ctx_tr->warp_ref_idx_cdf[2], 2); +#if CONFIG_CWG_D067_IMPROVED_WARP + AVERAGE_CDF(ctx_left->warpmv_with_mvd_flag_cdf, + ctx_tr->warpmv_with_mvd_flag_cdf, 2); +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST AVERAGE_CDF(ctx_left->warp_extend_cdf, ctx_tr->warp_extend_cdf, 2); #else @@ -1316,19 +1384,19 @@ AVERAGE_CDF(ctx_left->comp_group_idx_cdf, ctx_tr->comp_group_idx_cdf, 2); AVERAGE_CDF(ctx_left->skip_mode_cdfs, ctx_tr->skip_mode_cdfs, 2); AVERAGE_CDF(ctx_left->skip_txfm_cdfs, ctx_tr->skip_txfm_cdfs, 2); -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT AVERAGE_CDF(ctx_left->intra_inter_cdf[0], ctx_tr->intra_inter_cdf[0], 2); AVERAGE_CDF(ctx_left->intra_inter_cdf[1], ctx_tr->intra_inter_cdf[1], 2); #else AVERAGE_CDF(ctx_left->intra_inter_cdf, ctx_tr->intra_inter_cdf, 2); -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT avg_nmv(&ctx_left->nmvc, &ctx_tr->nmvc, wt_left, wt_tr); avg_nmv(&ctx_left->ndvc, &ctx_tr->ndvc, wt_left, wt_tr); AVERAGE_CDF(ctx_left->intrabc_cdf, ctx_tr->intrabc_cdf, 2); -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT AVERAGE_CDF(ctx_left->intrabc_mode_cdf, ctx_tr->intrabc_mode_cdf, 2); AVERAGE_CDF(ctx_left->intrabc_drl_idx_cdf, ctx_tr->intrabc_drl_idx_cdf, 2); -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT AVERAGE_CDF(ctx_left->seg.tree_cdf, ctx_tr->seg.tree_cdf, MAX_SEGMENTS); AVERAGE_CDF(ctx_left->seg.pred_cdf, ctx_tr->seg.pred_cdf, 2); AVERAGE_CDF(ctx_left->seg.spatial_pred_seg_cdf, @@ -1383,6 +1451,7 @@ AVG_CDF_STRIDE(ctx_left->uv_mode_cdf[0], ctx_tr->uv_mode_cdf[0], UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES)); AVERAGE_CDF(ctx_left->uv_mode_cdf[1], ctx_tr->uv_mode_cdf[1], UV_INTRA_MODES); + #if CONFIG_EXT_RECUR_PARTITIONS for (int plane_index = 0; plane_index < PARTITION_STRUCTURE_NUM; plane_index++) { @@ -1411,6 +1480,15 @@ for (RECT_PART_TYPE rect = 0; rect < NUM_RECT_PARTS; rect++) { AVERAGE_CDF(ctx_left->do_ext_partition_cdf[plane_index][rect][i], ctx_tr->do_ext_partition_cdf[plane_index][rect][i], 2); +#if CONFIG_UNEVEN_4WAY + AVERAGE_CDF( + ctx_left->do_uneven_4way_partition_cdf[plane_index][rect][i], + ctx_tr->do_uneven_4way_partition_cdf[plane_index][rect][i], 2); + AVERAGE_CDF( + ctx_left->uneven_4way_partition_type_cdf[plane_index][rect][i], + ctx_tr->uneven_4way_partition_type_cdf[plane_index][rect][i], + NUM_UNEVEN_4WAY_PARTS); +#endif // CONFIG_UNEVEN_4WAY } } } @@ -1466,10 +1544,10 @@ } AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[1], ctx_tr->intra_ext_tx_cdf[1], INTRA_TX_SET1, CDF_SIZE(TX_TYPES)); -#if !(CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET) +#if !(CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET) AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[2], ctx_tr->intra_ext_tx_cdf[2], INTRA_TX_SET2, CDF_SIZE(TX_TYPES)); -#endif // !(CONFIG_ATC_NEWTXSETS && !CONFIG_ATC_REDUCED_TXSET) +#endif // !(CONFIG_ATC && !CONFIG_ATC_REDUCED_TXSET) AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[1], ctx_tr->inter_ext_tx_cdf[1], 16, CDF_SIZE(TX_TYPES)); AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[2], ctx_tr->inter_ext_tx_cdf[2], 12, @@ -1590,9 +1668,9 @@ const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col); sb_fp_stats->current_qindex = cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex; -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT sb_fp_stats->ref_mv_bank = td->mb.e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK sb_fp_stats->warp_param_bank = td->mb.e_mbd.warp_param_bank; #endif // WARP_CU_BANK @@ -1626,9 +1704,9 @@ const int alloc_mi_idx = get_alloc_mi_idx(&cm->mi_params, mi_row, mi_col); cm->mi_params.mi_alloc[alloc_mi_idx].current_qindex = sb_fp_stats->current_qindex; -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT x->e_mbd.ref_mv_bank = sb_fp_stats->ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK x->e_mbd.warp_param_bank = sb_fp_stats->warp_param_bank; #endif // WARP_CU_BANK @@ -1693,7 +1771,7 @@ #else cm->features.allow_high_precision_mv, &x->mv_costs); #endif -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT if (cm->features.allow_intrabc) { #if CONFIG_FLEX_MVRES fill_dv_costs(&x->dv_costs, xd->tile_ctx, &x->mv_costs); @@ -1701,7 +1779,7 @@ av1_fill_dv_costs(xd->tile_ctx, &x->dv_costs); #endif } -#endif // CONFIG_BVCOST_UPDATE +#endif // CONFIG_IBC_BV_IMPROVEMENT break; default: assert(0); }
diff --git a/av1/encoder/encodeframe_utils.h b/av1/encoder/encodeframe_utils.h index 4da6144..3da084f 100644 --- a/av1/encoder/encodeframe_utils.h +++ b/av1/encoder/encodeframe_utils.h
@@ -40,6 +40,19 @@ TXFM_CONTEXT *p_tl; TXFM_CONTEXT ta[MAX_MIB_SIZE]; TXFM_CONTEXT tl[MAX_MIB_SIZE]; +#if CONFIG_MVP_IMPROVEMENT + //! The current level bank, used to restore the level bank in MACROBLOCKD. + REF_MV_BANK curr_level_bank; + //! The best level bank from the rdopt process. + REF_MV_BANK best_level_bank; +#endif // CONFIG_MVP_IMPROVEMENT +#if WARP_CU_BANK + //! The current warp, level bank, used to restore the warp level bank in + //! MACROBLOCKD. + WARP_PARAM_BANK curr_level_warp_bank; + //! The best warp level bank from the rdopt process. + WARP_PARAM_BANK best_level_warp_bank; +#endif // WARP_CU_BANK } RD_SEARCH_MACROBLOCK_CONTEXT; // This struct is used to store the statistics used by sb-level multi-pass @@ -54,9 +67,9 @@ InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL]; int thresh_freq_fact[BLOCK_SIZES_ALL][MB_MODE_COUNT]; int current_qindex; -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT REF_MV_BANK ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK WARP_PARAM_BANK warp_param_bank; #endif // WARP_CU_BANK @@ -99,10 +112,16 @@ int bsize_at_least_8x8; #endif // !CONFIG_EXT_RECUR_PARTITIONS - // Indicates edge blocks in frame. + // Indicates if at least half of the rows / cols of this block are within the + // frame. int has_rows; int has_cols; + // Indicates if at least 7/8th of the rows / cols of this block are within the + // frame. Used by HORZ/VERT_4A/4B partitions. + int has_7_8th_rows; + int has_7_8th_cols; + // Block size of current partition. BLOCK_SIZE bsize; @@ -130,14 +149,13 @@ // RD cost summed across all blocks of partition type. RD_STATS sum_rdc; +#if !CONFIG_EXT_RECUR_PARTITIONS // Array holding partition type cost. int tmp_partition_cost[PARTITION_TYPES]; -#if CONFIG_EXT_RECUR_PARTITIONS - int partition_cost_table[ALL_PARTITION_TYPES]; -#endif +#endif // CONFIG_EXT_RECUR_PARTITIONS // Pointer to partition cost buffer - int *partition_cost; + const int *partition_cost; // RD costs for different partition types. int64_t none_rd; @@ -164,7 +182,24 @@ #if !CONFIG_EXT_RECUR_PARTITIONS int do_square_split; #endif // !CONFIG_EXT_RECUR_PARTITIONS - int prune_rect_part[NUM_RECT_PARTS]; +#if CONFIG_EXT_RECUR_PARTITIONS + bool prune_partition_none; + bool ext_partition_allowed; + bool partition_3_allowed[NUM_RECT_PARTS]; + bool prune_partition_3[NUM_RECT_PARTS]; +#if CONFIG_UNEVEN_4WAY + bool partition_4a_allowed[NUM_RECT_PARTS]; + bool partition_4b_allowed[NUM_RECT_PARTS]; + bool prune_partition_4a[NUM_RECT_PARTS]; + bool prune_partition_4b[NUM_RECT_PARTS]; +#endif // CONFIG_UNEVEN_4WAY + PARTITION_TYPE forced_partition; + // Pointer to an array that traces out the current best partition boundary. + // Used by prune_part_h_with_partition_boundary and + // prune_part_4_with_partition_boundary. + bool *partition_boundaries; +#endif // CONFIG_EXT_RECUR_PARTITIONS + bool prune_rect_part[NUM_RECT_PARTS]; int is_block_splittable; // Chroma subsampling in x and y directions. @@ -178,19 +213,6 @@ bool found_best_partition; } PartitionSearchState; -static AOM_INLINE void update_global_motion_used(PREDICTION_MODE mode, - BLOCK_SIZE bsize, - const MB_MODE_INFO *mbmi, - RD_COUNTS *rdc) { - if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) { - const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize]; - int ref; - for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { - rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s; - } - } -} - #if CONFIG_WEDGE_MOD_EXT static AOM_INLINE void update_wedge_mode_cdf(FRAME_CONTEXT *fc, const BLOCK_SIZE bsize,
diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index a78753b..33b8cc2 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c
@@ -466,6 +466,32 @@ } #endif // CONFIG_CROSS_CHROMA_TX +#if CONFIG_ATC_DCTX_ALIGNED +// Finds and sets the first position (BOB) index. +// To make sure the BOB value is statistically similar to EOB +// for arithmetic coding efficiency performs a simple rotation. +void set_bob(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, + TX_TYPE tx_type) { + const struct macroblock_plane *const p = &x->plane[plane]; + const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); + const int block_offset = BLOCK_OFFSET(block); + tran_low_t *const qcoeff = p->qcoeff + block_offset; + uint16_t *const eob = &p->eobs[block]; + uint16_t *const bob_ptr = &p->bobs[block]; + int bob = 0; + for (int c = 0; c < *eob; ++c) { + const int pos = scan_order->scan[c]; + const tran_low_t v = qcoeff[pos]; + const tran_low_t level = abs(v); + if (level != 0) { + break; + } + bob++; + } + *bob_ptr = av1_get_max_eob(tx_size) - bob; +} +#endif // CONFIG_ATC_DCTX_ALIGNED + void av1_quant(MACROBLOCK *x, int plane, int block, TxfmParam *txfm_param, QUANT_PARAM *qparam) { const struct macroblock_plane *const p = &x->plane[plane]; @@ -487,6 +513,10 @@ } } +#if CONFIG_ATC_DCTX_ALIGNED + set_bob(x, plane, block, txfm_param->tx_size, txfm_param->tx_type); +#endif // CONFIG_ATC_DCTX_ALIGNED + #if CONFIG_CONTEXT_DERIVATION MACROBLOCKD *const xd = &x->e_mbd; const int16_t *const scan = scan_order->scan; @@ -687,10 +717,17 @@ } #endif #if CONFIG_CROSS_CHROMA_TX +#if CONFIG_ATC_DCTX_ALIGNED + const int skip_cctx = is_inter ? 0 : (p->eobs[block] == 1); +#endif // CONFIG_ATC_DCTX_ALIGNED // Since eob can be updated here, make sure cctx_type is always CCTX_NONE // when eob of U is 0. if (is_cctx_allowed(cm, xd) && plane == AOM_PLANE_U && +#if CONFIG_ATC_DCTX_ALIGNED + (p->eobs[block] == 0 || skip_cctx)) { +#else p->eobs[block] == 0) { +#endif // CONFIG_ATC_DCTX_ALIGNED // In dry run, cctx type will not be referenced by neighboring blocks, so // there is no need to fill in the whole chroma region. In addition, // ctx->cctx_type_map size in dry run may not be aligned with actual @@ -709,6 +746,9 @@ &p->eobs[block]); #endif // CONFIG_CROSS_CHROMA_TX && CCTX_C2_DROPPED p->eobs[block] = 0; +#if CONFIG_ATC_DCTX_ALIGNED + p->bobs[block] = 0; +#endif // CONFIG_ATC_DCTX_ALIGNED p->txb_entropy_ctx[block] = 0; } @@ -733,7 +773,13 @@ .buf[(blk_row * pd_c1->dst.stride + blk_col) << MI_SIZE_LOG2]; int eob_c1 = p_c1->eobs[block]; int eob_c2 = x->plane[AOM_PLANE_V].eobs[block]; +#if CONFIG_ATC_DCTX_ALIGNED + const int is_inter = is_inter_block(mbmi, xd->tree_type); + const int skip_cctx = is_inter ? 0 : (p->eobs[block] == 1); + recon_with_cctx = (eob_c1 || eob_c2) && !skip_cctx; +#else recon_with_cctx = eob_c1 || eob_c2; +#endif // CONFIG_ATC_DCTX_ALIGNED max_chroma_eob = AOMMAX(eob_c1, eob_c2); if (recon_with_cctx) { av1_inv_cross_chroma_tx_block(dqcoeff_c1, dqcoeff, tx_size, cctx_type); @@ -751,8 +797,14 @@ av1_inverse_transform_block( xd, dqcoeff, plane, tx_type, tx_size, dst, pd->dst.stride, #if CONFIG_CROSS_CHROMA_TX +#if CONFIG_ATC_DCTX_ALIGNED + (plane == 0 || !is_cctx_allowed(cm, xd) || !recon_with_cctx) + ? p->eobs[block] + : max_chroma_eob, +#else (plane == 0 || !is_cctx_allowed(cm, xd)) ? p->eobs[block] : max_chroma_eob, +#endif // CONFIG_ATC_DCTX_ALIGNED #else p->eobs[block], #endif @@ -816,11 +868,20 @@ &pd_c1->dst .buf[(blk_row * pd_c1->dst.stride + blk_col) << MI_SIZE_LOG2]; mismatch_record_block_tx(dst_c1, pd_c1->dst.stride, - cm->current_frame.order_hint, AOM_PLANE_U, - pixel_c, pixel_r, blk_w, blk_h); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + AOM_PLANE_U, pixel_c, pixel_r, blk_w, blk_h); } #endif // CONFIG_CROSS_CHROMA_TX - mismatch_record_block_tx(dst, pd->dst.stride, cm->current_frame.order_hint, + mismatch_record_block_tx(dst, pd->dst.stride, +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC plane, pixel_c, pixel_r, blk_w, blk_h); } #endif // CONFIG_MISMATCH_DEBUG @@ -1136,6 +1197,9 @@ tran_low_t *dqcoeff = p->dqcoeff + BLOCK_OFFSET(block); PLANE_TYPE plane_type = get_plane_type(plane); uint16_t *eob = &p->eobs[block]; +#if CONFIG_ATC_DCTX_ALIGNED + uint16_t *bob_code = &p->bobs[block]; +#endif // CONFIG_ATC_DCTX_ALIGNED const int dst_stride = pd->dst.stride; uint16_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2]; @@ -1155,9 +1219,14 @@ mi_to_pixel_loc(&pixel_c, &pixel_r, xd->mi_col, xd->mi_row, blk_col, blk_row, pd->subsampling_x, pd->subsampling_y); } - mismatch_record_block_pre( - pd->dst.buf, pd->dst.stride, cm->current_frame.order_hint, plane, - pixel_c, pixel_r, tx_size_wide[tx_size], tx_size_high[tx_size]); + mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + plane, pixel_c, pixel_r, tx_size_wide[tx_size], + tx_size_high[tx_size]); } #endif // CONFIG_MISMATCH_DEBUG @@ -1175,6 +1244,9 @@ if (plane == 0 && is_blk_skip(x->txfm_search_info.blk_skip, plane, blk_row * bw + blk_col)) { *eob = 0; +#if CONFIG_ATC_DCTX_ALIGNED + *bob_code = 0; +#endif // CONFIG_ATC_DCTX_ALIGNED p->txb_entropy_ctx[block] = 0; #if DEBUG_EXTQUANT if (args->dry_run == OUTPUT_ENABLED) { @@ -1260,6 +1332,43 @@ av1_dropout_qcoeff(x, plane, block, tx_size, tx_type, cm->quant_params.base_qindex); } +#if CONFIG_ATC_DCTX_ALIGNED + // make sure recon is correct at the encoder + if (*eob == 1 && tx_type != 0 && plane == 0) { + xd->tx_type_map[blk_row * xd->tx_type_map_stride + blk_col] = DCT_DCT; + tx_type = av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size, + cm->features.reduced_tx_set_used); + av1_setup_xform(cm, x, plane, tx_size, tx_type, +#if CONFIG_CROSS_CHROMA_TX + CCTX_NONE, +#endif // CONFIG_CROSS_CHROMA_TX + &txfm_param); + av1_setup_quant(tx_size, use_trellis, quant_idx, + cpi->oxcf.q_cfg.quant_b_adapt, &quant_param); + av1_setup_qmatrix(&cm->quant_params, xd, plane, tx_size, tx_type, + &quant_param); + av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, + &txfm_param, &quant_param); + if (quant_param.use_optimize_b && do_trellis) { + TXB_CTX txb_ctx; + get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx, + mbmi->fsc_mode[xd->tree_type == CHROMA_PART]); + av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, +#if CONFIG_CROSS_CHROMA_TX + CCTX_NONE, +#endif // CONFIG_CROSS_CHROMA_TX + &txb_ctx, &dummy_rate_cost); + } + if (do_dropout && !fsc_mode +#if CONFIG_PAR_HIDING + && !enable_parity_hiding +#endif // CONFIG_PAR_HIDING + ) { + av1_dropout_qcoeff(x, plane, block, tx_size, tx_type, + cm->quant_params.base_qindex); + } + } +#endif // CONFIG_ATC_DCTX_ALIGNED #if CONFIG_PAR_HIDING if (!quant_param.use_optimize_b && enable_parity_hiding) { parity_hiding_trellis_off(cpi, x, plane, block, tx_size, tx_type); @@ -1312,7 +1421,12 @@ mi_to_pixel_loc(&pixel_c, &pixel_r, xd->mi_col, xd->mi_row, blk_col, blk_row, pd->subsampling_x, pd->subsampling_y); } - mismatch_record_block_tx(dst, pd->dst.stride, cm->current_frame.order_hint, + mismatch_record_block_tx(dst, pd->dst.stride, +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC plane, pixel_c, pixel_r, blk_w, blk_h); } #endif // CONFIG_MISMATCH_DEBUG @@ -1323,11 +1437,7 @@ if (plane == AOM_PLANE_Y && xd->cfl.store_y && xd->tree_type == SHARED_PART) { #if CONFIG_ADAPTIVE_DS_FILTER cfl_store_tx(xd, blk_row, blk_col, tx_size, -#if DS_FRAME_LEVEL - cm->features.ds_filter_type); -#else cm->seq_params.enable_cfl_ds_filter); -#endif // DS_FRAME_LEVEL #else cfl_store_tx(xd, blk_row, blk_col, tx_size); #endif // CONFIG_ADAPTIVE_DS_FILTER @@ -1402,13 +1512,21 @@ xd->mi[0]->chroma_ref_info.mi_row_chroma_base, blk_col, blk_row, pd_c1->subsampling_x, pd_c1->subsampling_y); mismatch_record_block_pre(pd_c1->dst.buf, pd_c1->dst.stride, - cm->current_frame.order_hint, AOM_PLANE_U, - pixel_c, pixel_r, tx_size_wide[tx_size], - tx_size_high[tx_size]); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + AOM_PLANE_U, pixel_c, pixel_r, + tx_size_wide[tx_size], tx_size_high[tx_size]); mismatch_record_block_pre(pd_c2->dst.buf, pd_c2->dst.stride, - cm->current_frame.order_hint, AOM_PLANE_V, - pixel_c, pixel_r, tx_size_wide[tx_size], - tx_size_high[tx_size]); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + AOM_PLANE_V, pixel_c, pixel_r, + tx_size_wide[tx_size], tx_size_high[tx_size]); } #endif // CONFIG_MISMATCH_DEBUG @@ -1447,6 +1565,14 @@ (INTRA_BLOCK_OPT_TYPE == DROPOUT_OPT || INTRA_BLOCK_OPT_TYPE == TRELLIS_DROPOUT_OPT)); for (int plane = AOM_PLANE_U; plane <= AOM_PLANE_V; plane++) { +#if CONFIG_ATC_DCTX_ALIGNED + int skip_cctx = !is_inter_block(xd->mi[0], xd->tree_type) && *eob_c1 == 1; + if (plane == AOM_PLANE_V && skip_cctx) { + update_cctx_array(xd, blk_row, blk_col, 0, 0, + args->dry_run ? TX_4X4 : tx_size, CCTX_NONE); + cctx_type = av1_get_cctx_type(xd, blk_row, blk_col); + } +#endif // CONFIG_ATC_DCTX_ALIGNED // Since eob can be updated here, make sure cctx_type is always CCTX_NONE // when eob of U is 0. if (plane == AOM_PLANE_V && *eob_c1 == 0) { @@ -1486,6 +1612,33 @@ av1_dropout_qcoeff(x, plane, block, tx_size, tx_type, cm->quant_params.base_qindex); } +#if CONFIG_ATC_DCTX_ALIGNED + skip_cctx = !is_inter_block(xd->mi[0], xd->tree_type) && *eob_c1 == 1; + if (plane == AOM_PLANE_V && skip_cctx) { + update_cctx_array(xd, blk_row, blk_col, 0, 0, + args->dry_run ? TX_4X4 : tx_size, CCTX_NONE); + cctx_type = av1_get_cctx_type(xd, blk_row, blk_col); + av1_setup_qmatrix(&cm->quant_params, xd, plane, tx_size, tx_type, + &quant_param); + av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, + &txfm_param, &quant_param); + if (quant_param.use_optimize_b && do_trellis) { + const ENTROPY_CONTEXT *a = + &args->ta[blk_col + (plane - AOM_PLANE_U) * MAX_MIB_SIZE]; + const ENTROPY_CONTEXT *l = + &args->tl[blk_row + (plane - AOM_PLANE_U) * MAX_MIB_SIZE]; + TXB_CTX txb_ctx; + get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx, + xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART]); + av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, cctx_type, + &txb_ctx, &dummy_rate_cost); + } + if (do_dropout) { + av1_dropout_qcoeff(x, plane, block, tx_size, tx_type, + cm->quant_params.base_qindex); + } + } +#endif // CONFIG_ATC_DCTX_ALIGNED } if (*eob_c1 || *eob_c2) { @@ -1522,11 +1675,19 @@ xd->mi[0]->chroma_ref_info.mi_row_chroma_base, blk_col, blk_row, pd_c1->subsampling_x, pd_c1->subsampling_y); mismatch_record_block_tx(dst_c1, pd_c1->dst.stride, - cm->current_frame.order_hint, AOM_PLANE_U, pixel_c, - pixel_r, blk_w, blk_h); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + AOM_PLANE_U, pixel_c, pixel_r, blk_w, blk_h); mismatch_record_block_tx(dst_c2, pd_c2->dst.stride, - cm->current_frame.order_hint, AOM_PLANE_V, pixel_c, - pixel_r, blk_w, blk_h); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + AOM_PLANE_V, pixel_c, pixel_r, blk_w, blk_h); } #endif // CONFIG_MISMATCH_DEBUG
diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h index 3d0a27c..f6be3e4 100644 --- a/av1/encoder/encodemb.h +++ b/av1/encoder/encodemb.h
@@ -105,6 +105,12 @@ CctxType cctx_type); #endif // CONFIG_CROSS_CHROMA_TX +#if CONFIG_ATC_DCTX_ALIGNED +// This function sets the first position index in a TU. +void set_bob(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, + TX_TYPE tx_type); +#endif // CONFIG_ATC_DCTX_ALIGNED + void av1_quant(MACROBLOCK *x, int plane, int block, TxfmParam *txfm_param, QUANT_PARAM *qparam);
diff --git a/av1/encoder/encodemv.c b/av1/encoder/encodemv.c index 369054d..6e7a524 100644 --- a/av1/encoder/encodemv.c +++ b/av1/encoder/encodemv.c
@@ -875,15 +875,31 @@ int_mv av1_get_ref_mv_from_stack( int ref_idx, const MV_REFERENCE_FRAME *ref_frame, int ref_mv_idx, - const MB_MODE_INFO_EXT *mbmi_ext) { + const MB_MODE_INFO_EXT *mbmi_ext +#if CONFIG_SEP_COMP_DRL + , + const MB_MODE_INFO *mbmi +#endif // CONFIG_SEP_COMP_DRL + ) { const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); +#if CONFIG_SEP_COMP_DRL const CANDIDATE_MV *curr_ref_mv_stack = - mbmi_ext->ref_mv_stack[ref_frame_type]; + has_second_drl(mbmi) ? mbmi_ext->ref_mv_stack[ref_frame[ref_idx]] + : mbmi_ext->ref_mv_stack[ref_frame_type]; +#else + const CANDIDATE_MV *curr_ref_mv_stack = + mbmi_ext->ref_mv_stack[ref_frame_type]; +#endif // CONFIG_SEP_COMP_DRL if (is_inter_ref_frame(ref_frame[1])) { assert(ref_idx == 0 || ref_idx == 1); - return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv - : curr_ref_mv_stack[ref_mv_idx].this_mv; +#if CONFIG_SEP_COMP_DRL + return ref_idx && !has_second_drl(mbmi) + ? curr_ref_mv_stack[ref_mv_idx].comp_mv +#else + return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv +#endif // CONFIG_SEP_COMP_DRL + : curr_ref_mv_stack[ref_mv_idx].this_mv; } assert(ref_idx == 0); @@ -910,8 +926,14 @@ if (have_nearmv_newmv_in_inter_mode(mbmi->mode)) { assert(has_second_ref(mbmi)); } - return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, - mbmi->ref_mv_idx, x->mbmi_ext); +#if CONFIG_SEP_COMP_DRL + const int ref_mv_idx = get_ref_mv_idx(mbmi, ref_idx); + return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx, + x->mbmi_ext, mbmi); +#else + return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, mbmi->ref_mv_idx, + x->mbmi_ext); +#endif // CONFIG_SEP_COMP_DRL } /** @@ -927,6 +949,9 @@ */ #if CONFIG_FLEX_MVRES int_mv av1_find_best_ref_mv_from_stack(const MB_MODE_INFO_EXT *mbmi_ext, +#if CONFIG_SEP_COMP_DRL + const MB_MODE_INFO *mbmi, +#endif // CONFIG_SEP_COMP_DRL MV_REFERENCE_FRAME ref_frame, MvSubpelPrecision precision) { #else @@ -941,7 +966,11 @@ int range = AOMMIN(mbmi_ext->ref_mv_count[ref_frame], MAX_REF_MV_STACK_SIZE); for (int i = 0; i < range; i++) { - mv = av1_get_ref_mv_from_stack(0, ref_frames, i, mbmi_ext); +#if CONFIG_SEP_COMP_DRL + mv = av1_get_ref_mv_from_stack(0, ref_frames, i, mbmi_ext, mbmi); +#else + mv = av1_get_ref_mv_from_stack(0, ref_frames, i, mbmi_ext); +#endif // CONFIG_SEP_COMP_DRL if (mv.as_int != 0 && mv.as_int != INVALID_MV) { found_ref_mv = true; break; @@ -969,7 +998,16 @@ int_mv mv; const int ref_idx = 0; MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME }; - mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext); +#if CONFIG_SEP_COMP_DRL + // this function is not called in this software. + MB_MODE_INFO mbmi; + mbmi.skip_mode = 0; + mbmi.mode = NEWMV; + mbmi.ref_frame[0] = ref_frame; + mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext, &mbmi); +#else + mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext); +#endif // CONFIG_SEP_COMP_DRL #if CONFIG_FLEX_MVRES lower_mv_precision(&mv.as_mv, precision); #else
diff --git a/av1/encoder/encodemv.h b/av1/encoder/encodemv.h index 39cdf7d..119107d 100644 --- a/av1/encoder/encodemv.h +++ b/av1/encoder/encodemv.h
@@ -57,12 +57,20 @@ int_mv av1_get_ref_mv_from_stack(int ref_idx, const MV_REFERENCE_FRAME *ref_frame, int ref_mv_idx, - const MB_MODE_INFO_EXT *mbmi_ext); + const MB_MODE_INFO_EXT *mbmi_ext +#if CONFIG_SEP_COMP_DRL + , + const MB_MODE_INFO *mbmi +#endif // CONFIG_SEP_COMP_DRL +); #if CONFIG_FLEX_MVRES int_mv av1_find_first_ref_mv_from_stack(const MB_MODE_INFO_EXT *mbmi_ext, MV_REFERENCE_FRAME ref_frame, MvSubpelPrecision precision); int_mv av1_find_best_ref_mv_from_stack(const MB_MODE_INFO_EXT *mbmi_ext, +#if CONFIG_SEP_COMP_DRL + const MB_MODE_INFO *mbmi, +#endif // CONFIG_SEP_COMP_DRL MV_REFERENCE_FRAME ref_frame, MvSubpelPrecision precision); #else
diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index 543f671..378e9d6 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c
@@ -204,6 +204,9 @@ av1_init_me_luts(); av1_rc_init_minq_luts(); av1_init_wedge_masks(); +#if CONFIG_CWP + init_cwp_masks(); +#endif // CONFIG_CWP } static void update_reference_segmentation_map(AV1_COMP *cpi) { @@ -371,6 +374,10 @@ ? DEFAULT_EXPLICIT_ORDER_HINT_BITS - 1 : -1; seq->explicit_ref_frame_map = oxcf->ref_frm_cfg.explicit_ref_frame_map; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // Set 0 for multi-layer coding + seq->enable_frame_output_order = oxcf->ref_frm_cfg.enable_frame_output_order; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT seq->max_reference_frames = oxcf->ref_frm_cfg.max_reference_frames; #if CONFIG_ALLOW_SAME_REF_COMPOUND seq->num_same_ref_compound = SAME_REF_COMPOUND_PRUNE; @@ -414,6 +421,12 @@ #if CONFIG_BAWP seq->enable_bawp = tool_cfg->enable_bawp; #endif // CONFIG_BAWP +#if CONFIG_CWP + seq->enable_cwp = tool_cfg->enable_cwp; +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + seq->enable_imp_msk_bld = tool_cfg->enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD #if CONFIG_EXTENDED_WARP_PREDICTION seq->seq_enabled_motion_modes = oxcf->motion_mode_cfg.seq_enabled_motion_modes; @@ -434,6 +447,9 @@ #if CONFIG_ORIP seq->enable_orip = oxcf->intra_mode_cfg.enable_orip; #endif +#if CONFIG_IDIF + seq->enable_idif = oxcf->intra_mode_cfg.enable_idif; +#endif // CONFIG_IDIF seq->enable_ist = oxcf->txfm_cfg.enable_ist; #if CONFIG_CROSS_CHROMA_TX seq->enable_cctx = oxcf->txfm_cfg.enable_cctx; @@ -451,6 +467,9 @@ #if CONFIG_JOINT_MVD seq->enable_joint_mvd = tool_cfg->enable_joint_mvd; #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + seq->enable_refinemv = tool_cfg->enable_refinemv; +#endif // CONFIG_REFINEMV set_bitstream_level_tier(seq, cm, frm_dim_cfg->width, frm_dim_cfg->height, oxcf->input_cfg.init_framerate); @@ -495,6 +514,13 @@ #if CONFIG_PAR_HIDING seq->enable_parity_hiding = tool_cfg->enable_parity_hiding; #endif // CONFIG_PAR_HIDING +#if CONFIG_IMPROVED_GLOBAL_MOTION + // TODO(rachelbarker): Check if cpi->sf.gm_sf.gm_search_type is set by this + // point, and set to 0 if cpi->sf.gm_sf.gm_search_type == GM_DISABLE_SEARCH + // if possible + seq->enable_global_motion = + tool_cfg->enable_global_motion && !seq->reduced_still_picture_hdr; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION } static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) { @@ -575,7 +601,7 @@ // set sb size before allocations const BLOCK_SIZE sb_size = av1_select_sb_size(cpi); set_sb_size(cm, sb_size); - cpi->td.sb_size = sb_size; + cpi->td.sb_size = cm->sb_size; alloc_compressor_data(cpi); av1_update_film_grain_parameters(cpi, oxcf); @@ -1469,7 +1495,14 @@ PSNR_STATS psnr; const uint32_t in_bit_depth = cpi->oxcf.input_cfg.input_bit_depth; const uint32_t bit_depth = cpi->td.mb.e_mbd.bd; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // To match the PSNR results between encoder log and VMAF results, + // the same reference sources (unfiltered source) need to be used. + aom_calc_highbd_psnr(cpi->unfiltered_source, &cpi->common.cur_frame->buf, + &psnr, +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT aom_calc_highbd_psnr(cpi->source, &cpi->common.cur_frame->buf, &psnr, +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT bit_depth, in_bit_depth); for (i = 0; i < 4; ++i) { @@ -1622,9 +1655,13 @@ dst += CFL_BUF_LINE; } } - +#if CONFIG_CFL_IMPROVEMENTS +static int64_t compute_sad(const uint16_t *src, uint16_t *src2, int width, + int height, int round_offset, int src2_stride) { +#else static int compute_sad(const uint16_t *src, uint16_t *src2, int width, int height, int round_offset, int src2_stride) { +#endif // CONFIG_CFL_IMPROVEMENTS int sad = round_offset; for (int j = 0; j < height; ++j) { for (int i = 0; i < width; ++i) { @@ -1633,7 +1670,11 @@ src += CFL_BUF_LINE; src2 += src2_stride; } +#if CONFIG_CFL_IMPROVEMENTS + return sad; +#else return (sad / (height * width)); +#endif // CONFIG_CFL_IMPROVEMENTS } static void cfl_predict_hbd_pre_analysis(const int16_t *ac_buf_q3, @@ -1688,11 +1729,7 @@ } } -#if DS_FRAME_LEVEL -void av1_set_downsample_filter_options(AV1_COMP *cpi, FeatureFlags *features) { -#else void av1_set_downsample_filter_options(AV1_COMP *cpi) { -#endif // DS_FRAME_LEVE AV1_COMMON *cm = &cpi->common; const uint16_t *src = cpi->unfiltered_source->y_buffer; uint16_t *src_chroma_u = cpi->unfiltered_source->u_buffer; @@ -1707,14 +1744,34 @@ const int subsampling_x = cpi->unfiltered_source->subsampling_x; const int subsampling_y = cpi->unfiltered_source->subsampling_y; +#if CONFIG_ADPTIVE_DS_422 + if (subsampling_x == 0 && subsampling_y == 0) { + cm->seq_params.enable_cfl_ds_filter = + 0; // For 4:4:4 chroma format, downsampling filter is not used. There + // is a redundant that the filter index is still signalled for + // 4:4:4. Should we remove the index signalling for 4:4:4 with this + // MR? + return; + } +#endif // CONFIG_ADPTIVE_DS_422 + +#if CONFIG_CFL_IMPROVEMENTS + const int blk_w = 16; + const int blk_h = 16; +#else const int blk_w = 32; const int blk_h = 32; +#endif // CONFIG_CFL_IMPROVEMENTS uint16_t recon_buf_q3[CFL_BUF_SQUARE]; uint16_t dc_buf_q3[CFL_BUF_SQUARE]; // Q3 AC contributions (reconstructed luma pixels - tx block avg) int16_t ac_buf_q3[CFL_BUF_SQUARE]; +#if CONFIG_CFL_IMPROVEMENTS + int64_t cost[3] = { 0, 0, 0 }; +#else int cost[3] = { 0, 0, 0 }; +#endif // CONFIG_CFL_IMPROVEMENTS for (int filter_type = 0; filter_type < 3; ++filter_type) { for (int comp = 0; comp < 2; comp++) { for (int r = 2; r + blk_h <= height - 2; r += blk_h) { @@ -1730,7 +1787,14 @@ } int alpha = 0; +#if CONFIG_ADPTIVE_DS_422 + if (subsampling_x == 1 && subsampling_y == 0) { + cfl_adaptive_luma_subsampling_422_hbd_c( + this_src, stride, recon_buf_q3, blk_w, blk_h, filter_type); + } else if (filter_type == 1) { +#else if (filter_type == 1) { +#endif // CONFIG_ADPTIVE_DS_422 cfl_luma_subsampling_420_hbd_121_c(this_src, stride, recon_buf_q3, blk_w, blk_h); } else if (filter_type == 2) { @@ -1740,6 +1804,30 @@ cfl_luma_subsampling_420_hbd_c(this_src, stride, recon_buf_q3, blk_w, blk_h); } +#if CONFIG_ADPTIVE_DS_422 + cfl_derive_block_implicit_scaling_factor( + recon_buf_q3, this_src_chroma, blk_w >> subsampling_x, + blk_h >> subsampling_y, CFL_BUF_LINE, chroma_stride, &alpha); + subtract_average_c( + recon_buf_q3, ac_buf_q3, blk_w >> subsampling_x, + blk_h >> subsampling_y, 4, + (blk_w >> subsampling_x) * (blk_h >> subsampling_y)); + cfl_predict_hbd_dc(this_src_chroma - chroma_stride, dc_buf_q3, + chroma_stride, blk_w >> subsampling_x, + blk_h >> subsampling_y); + cfl_predict_hbd_pre_analysis(ac_buf_q3, dc_buf_q3, CFL_BUF_LINE, + alpha, bd, blk_w >> subsampling_x, + blk_h >> subsampling_y); +#if CONFIG_CFL_IMPROVEMENTS + int64_t filter_cost = + compute_sad(dc_buf_q3, this_src_chroma, blk_w >> 1, blk_h >> 1, 2, + chroma_stride); +#else + int filter_cost = + compute_sad(dc_buf_q3, this_src_chroma, blk_w >> subsampling_x, + blk_h >> subsampling_y, 2, chroma_stride); +#endif // CONFIG_CFL_IMPROVEMENTS +#else cfl_derive_block_implicit_scaling_factor( recon_buf_q3, this_src_chroma, blk_w >> 1, blk_h >> 1, CFL_BUF_LINE, chroma_stride, &alpha); @@ -1749,23 +1837,29 @@ chroma_stride, blk_w >> 1, blk_h >> 1); cfl_predict_hbd_pre_analysis(ac_buf_q3, dc_buf_q3, CFL_BUF_LINE, alpha, bd, blk_w >> 1, blk_h >> 1); +#if CONFIG_CFL_IMPROVEMENTS + int64_t filter_cost = + compute_sad(dc_buf_q3, this_src_chroma, blk_w >> 1, blk_h >> 1, 2, + chroma_stride); +#else int filter_cost = compute_sad(dc_buf_q3, this_src_chroma, blk_w >> 1, blk_h >> 1, 2, chroma_stride); +#endif // CONFIG_CFL_IMPROVEMENTS +#endif // CONFIG_ADPTIVE_DS_422 cost[filter_type] = cost[filter_type] + filter_cost; } } } } - +#if CONFIG_CFL_IMPROVEMENTS + int64_t min_cost = INT64_MAX; +#else int min_cost = INT_MAX; +#endif // CONFIG_CFL_IMPROVEMENTS for (int i = 0; i < 3; ++i) { if (cost[i] < min_cost) { min_cost = cost[i]; -#if DS_FRAME_LEVEL - features->ds_filter_type = i; -#else cm->seq_params.enable_cfl_ds_filter = i; -#endif // DS_FRAME_LEVEL } } } @@ -1911,6 +2005,29 @@ } } +#define COUPLED_CHROMA_FROM_LUMA_RESTORATION 0 +#if !CONFIG_FLEXIBLE_RU_SIZE +static void set_restoration_unit_size(int width, int height, int sx, int sy, + RestorationInfo *rst) { + (void)width; + (void)height; + (void)sx; + (void)sy; +#if COUPLED_CHROMA_FROM_LUMA_RESTORATION + int s = AOMMIN(sx, sy); +#else + int s = 0; +#endif // !COUPLED_CHROMA_FROM_LUMA_RESTORATION + + if (width * height > 352 * 288) + rst[0].restoration_unit_size = RESTORATION_UNITSIZE_MAX; + else + rst[0].restoration_unit_size = (RESTORATION_UNITSIZE_MAX >> 1); + rst[1].restoration_unit_size = rst[0].restoration_unit_size >> s; + rst[2].restoration_unit_size = rst[1].restoration_unit_size; +} +#endif // !CONFIG_FLEXIBLE_RU_SIZE + static void init_ref_frame_bufs(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; int i; @@ -2049,11 +2166,15 @@ const int frame_width = cm->superres_upscaled_width; const int frame_height = cm->superres_upscaled_height; - av1_set_restoration_unit_size( - frame_width, frame_height, seq_params->subsampling_x, - seq_params->subsampling_y, cm->rst_info, cm->sb_size); + set_restoration_unit_size(frame_width, frame_height, + seq_params->subsampling_x, + seq_params->subsampling_y, cm->rst_info); for (int i = 0; i < num_planes; ++i) cm->rst_info[i].frame_restoration_type = RESTORE_NONE; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + for (int i = 0; i < num_planes; ++i) + cm->rst_info[i].frame_cross_restoration_type = RESTORE_NONE; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER av1_alloc_restoration_buffers(cm); if (!is_stat_generation_stage(cpi)) alloc_util_frame_buffers(cpi); @@ -2094,6 +2215,30 @@ set_ref_ptrs(cm, xd, 0, 0); } +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +static void save_pre_filter_frame(AV1_COMP *cpi, AV1_COMMON *cm) { + (void)cpi; + YV12_BUFFER_CONFIG *frame = &cm->cur_frame->buf; + YV12_BUFFER_CONFIG *pre_filter_frame = &cm->pre_rst_frame; + + const SequenceHeader *const seq_params = &cm->seq_params; + + const int frame_width = frame->crop_widths[0]; + const int frame_height = frame->crop_heights[0]; + + if (aom_realloc_frame_buffer( + pre_filter_frame, frame_width, frame_height, + seq_params->subsampling_x, seq_params->subsampling_y, + AOM_RESTORATION_FRAME_BORDER, cm->features.byte_alignment, NULL, NULL, + NULL) < 0) + aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, + "Failed to allocate restoration dst buffer"); + + const int num_planes = av1_num_planes(cm); + aom_yv12_copy_frame(frame, pre_filter_frame, num_planes); +} +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + /*!\brief Select and apply cdef filters and switchable restoration filters * * \ingroup high_level_algo @@ -2235,9 +2380,32 @@ if (use_restoration) { av1_loop_restoration_save_boundary_lines(&cm->cur_frame->buf, cm, 1); av1_pick_filter_restoration(cpi->source, cpi); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + save_pre_filter_frame(cpi, cm); + if (num_workers > 1) + av1_loop_restoration_filter_frame_mt( + &cm->cur_frame->buf, cm, 0, mt_info->workers, num_workers, + &mt_info->lr_row_sync, &cpi->lr_ctxt); + else + av1_loop_restoration_filter_frame(&cm->cur_frame->buf, cm, 0, + &cpi->lr_ctxt); + + // restore luma component of the frame + aom_yv12_copy_y(&cm->pre_rst_frame, &cm->cur_frame->buf); + av1_pick_cross_filter_restoration(cpi->source, cpi); + // restore chroma components of the frame + aom_yv12_copy_u(&cm->pre_rst_frame, &cm->cur_frame->buf); + aom_yv12_copy_v(&cm->pre_rst_frame, &cm->cur_frame->buf); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE || cm->rst_info[1].frame_restoration_type != RESTORE_NONE || - cm->rst_info[2].frame_restoration_type != RESTORE_NONE) { + cm->rst_info[2].frame_restoration_type != RESTORE_NONE +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + || cm->rst_info[0].frame_cross_restoration_type != RESTORE_NONE || + cm->rst_info[1].frame_cross_restoration_type != RESTORE_NONE || + cm->rst_info[2].frame_cross_restoration_type != RESTORE_NONE +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ) { if (num_workers > 1) av1_loop_restoration_filter_frame_mt( &cm->cur_frame->buf, cm, 0, mt_info->workers, num_workers, @@ -2250,6 +2418,11 @@ cm->rst_info[0].frame_restoration_type = RESTORE_NONE; cm->rst_info[1].frame_restoration_type = RESTORE_NONE; cm->rst_info[2].frame_restoration_type = RESTORE_NONE; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } #if CONFIG_COLLECT_COMPONENT_TIMING end_timing(cpi, loop_restoration_time); @@ -2724,13 +2897,6 @@ loop = 0; } - if (allow_recode && !cpi->sf.gm_sf.gm_disable_recode && - av1_recode_loop_test_global_motion(cm->global_motion, - cpi->td.rd_counts.global_motion_used, - gm_info->params_cost)) { - loop = 1; - } - if (loop) { ++loop_count; @@ -2837,6 +3003,11 @@ cm->rst_info[0].frame_restoration_type = RESTORE_NONE; cm->rst_info[1].frame_restoration_type = RESTORE_NONE; cm->rst_info[2].frame_restoration_type = RESTORE_NONE; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { cm->global_motion[i] = default_warp_params; @@ -2967,6 +3138,11 @@ cm->rst_info[0].frame_restoration_type = RESTORE_NONE; cm->rst_info[1].frame_restoration_type = RESTORE_NONE; cm->rst_info[2].frame_restoration_type = RESTORE_NONE; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + cm->rst_info[0].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[1].frame_cross_restoration_type = RESTORE_NONE; + cm->rst_info[2].frame_cross_restoration_type = RESTORE_NONE; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER } #if CONFIG_TIP @@ -3218,13 +3394,8 @@ } #endif // CONFIG_IBC_SR_EXT #if CONFIG_ADAPTIVE_DS_FILTER -#if DS_FRAME_LEVEL - if (cm->current_frame.frame_type == KEY_FRAME) { - av1_set_downsample_filter_options(cpi, features); -#else - if (cpi->common.current_frame.absolute_poc == 0) { + if (cpi->common.current_frame.frame_type == KEY_FRAME) { av1_set_downsample_filter_options(cpi); -#endif // DS_FRAME_LEVEL } #endif // CONFIG_ADAPTIVE_DS_FILTER // frame type has been decided outside of this function call @@ -3243,11 +3414,20 @@ features->allow_warped_motion = oxcf->motion_mode_cfg.allow_warped_motion && frame_might_allow_warped_motion(cm); #endif // !CONFIG_EXTENDED_WARP_PREDICTION - +#if CONFIG_CWG_D067_IMPROVED_WARP + features->allow_warpmv_mode = features->enabled_motion_modes; +#endif // CONFIG_CWG_D067_IMPROVED_WARP // temporal set of frame level enable_bawp flag. #if CONFIG_BAWP features->enable_bawp = seq_params->enable_bawp; #endif +#if CONFIG_CWP + features->enable_cwp = seq_params->enable_cwp; +#endif // CONFIG_CWP + +#if CONFIG_D071_IMP_MSK_BLD + features->enable_imp_msk_bld = seq_params->enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD cpi->last_frame_type = current_frame->frame_type; @@ -3715,7 +3895,11 @@ #endif cpi->bytes += frame_bytes; if (cm->show_frame) { +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + const YV12_BUFFER_CONFIG *orig = cpi->unfiltered_source; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT const YV12_BUFFER_CONFIG *orig = cpi->source; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT const YV12_BUFFER_CONFIG *recon = &cpi->common.cur_frame->buf; double y, u, v, frame_all; @@ -3814,10 +3998,18 @@ aom_usec_timer_mark(&cmptimer); cpi->time_compress_data += aom_usec_timer_elapsed(&cmptimer); #endif // CONFIG_INTERNAL_STATS +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (cpi->b_calculate_psnr && *size > 0) { + if ((cm->showable_frame && cm->seq_params.enable_frame_output_order) || + (cm->show_existing_frame && + !cm->seq_params.enable_frame_output_order) || + (!is_stat_generation_stage(cpi) && cm->show_frame)) { +#else // Note *size = 0 indicates a dropeed frame for which psnr is not calculated if (cpi->b_calculate_psnr && *size > 0) { if (cm->show_existing_frame || (!is_stat_generation_stage(cpi) && cm->show_frame)) { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT generate_psnr_packet(cpi); } }
diff --git a/av1/encoder/encoder.h b/av1/encoder/encoder.h index a0a16b0..e365151 100644 --- a/av1/encoder/encoder.h +++ b/av1/encoder/encoder.h
@@ -288,6 +288,12 @@ */ bool enable_orip; #endif +#if CONFIG_IDIF + /*! + * Flag to indicate if IDIF should be enabled + */ + bool enable_idif; +#endif /*! * Flag to indicate if IBP should be enabled */ @@ -678,6 +684,10 @@ // Indicates if one-sided compound should be enabled. bool enable_onesided_comp; bool explicit_ref_frame_map; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // Indicates if the implicit frame order derivation is enabled. + bool enable_frame_output_order; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } RefFrameCfg; typedef struct { @@ -864,6 +874,10 @@ // Indicates if joint mvd coding should be enabled. bool enable_joint_mvd; #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + // Indicates if refineMV mode should be enabled. + bool enable_refinemv; +#endif // CONFIG_REFINEMV #if CONFIG_TIP // enable temporal interpolated prediction int enable_tip; @@ -872,6 +886,14 @@ // enable block adaptive weighted prediction int enable_bawp; #endif // CONFIG_BAWP +#if CONFIG_CWP + // enable compound weighted prediction + int enable_cwp; +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + // enable implicit masked blending + bool enable_imp_msk_bld; +#endif // CONFIG_D071_IMP_MSK_BLD // When enabled, video mode should be used even for single frame input. bool force_video_mode; // Indicates if the error resiliency features should be enabled. @@ -1220,7 +1242,11 @@ unsigned int uv_mode[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES]; #endif unsigned int fsc_mode[FSC_MODE_CONTEXTS][FSC_BSIZE_CONTEXTS][FSC_MODES]; +#if CONFIG_EXT_DIR + unsigned int mrl_index[MRL_INDEX_CONTEXTS][MRL_LINE_NUMBER]; +#else unsigned int mrl_index[MRL_LINE_NUMBER]; +#endif // CONFIG_EXT_DIR #if CONFIG_IMPROVED_CFL unsigned int cfl_index[CFL_TYPE_COUNT]; #endif @@ -1243,6 +1269,13 @@ unsigned int rect_type[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS][2]; unsigned int do_ext_partition[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS] [PARTITION_CONTEXTS][2]; +#if CONFIG_UNEVEN_4WAY + unsigned int do_uneven_4way_partition[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS] + [PARTITION_CONTEXTS][2]; + unsigned int uneven_4way_partition_type[PARTITION_STRUCTURE_NUM] + [NUM_RECT_PARTS][PARTITION_CONTEXTS] + [NUM_UNEVEN_4WAY_PARTS]; +#endif // CONFIG_UNEVEN_4WAY #else unsigned int partition[PARTITION_STRUCTURE_NUM][PARTITION_CONTEXTS] [EXT_PARTITION_TYPES]; @@ -1268,6 +1301,17 @@ unsigned int coeff_lps[TX_SIZES][PLANE_TYPES][BR_CDF_SIZE - 1][LEVEL_CONTEXTS] [2]; unsigned int eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS][2]; +#if CONFIG_ATC_DCTX_ALIGNED + unsigned int coeff_base_bob_multi[TOKEN_CDF_Q_CTXS][SIG_COEF_CONTEXTS_BOB] + [NUM_BASE_LEVELS + 1]; + unsigned int eob_multi16[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 6]; + unsigned int eob_multi32[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 5]; + unsigned int eob_multi64[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 4]; + unsigned int eob_multi128[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 3]; + unsigned int eob_multi256[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 2]; + unsigned int eob_multi512[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS - 1]; + unsigned int eob_multi1024[TOKEN_CDF_Q_CTXS][PLANE_TYPES][EOB_MAX_SYMS]; +#else unsigned int eob_multi16[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][5]; unsigned int eob_multi32[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][6]; unsigned int eob_multi64[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][7]; @@ -1275,7 +1319,8 @@ unsigned int eob_multi256[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][9]; unsigned int eob_multi512[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][10]; unsigned int eob_multi1024[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][11]; -#if CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC_DCTX_ALIGNED +#if CONFIG_ATC unsigned int coeff_lps_lf[PLANE_TYPES][BR_CDF_SIZE - 1][LF_LEVEL_CONTEXTS][2]; unsigned int coeff_base_lf_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES] [LF_SIG_COEF_CONTEXTS][LF_BASE_SYMBOLS]; @@ -1289,7 +1334,7 @@ #else unsigned int coeff_lps_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES] [LEVEL_CONTEXTS][BR_CDF_SIZE]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #if CONFIG_PAR_HIDING unsigned int coeff_base_ph_multi[TOKEN_CDF_Q_CTXS][COEFF_BASE_PH_CONTEXTS] [NUM_BASE_LEVELS + 2]; @@ -1304,9 +1349,9 @@ unsigned int inter_single_mode[INTER_SINGLE_MODE_CONTEXTS] [INTER_SINGLE_MODES]; unsigned int drl_mode[3][DRL_MODE_CONTEXTS][2]; -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT unsigned int skip_drl_mode[3][2]; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_OPTFLOW_REFINEMENT unsigned int use_optflow[INTER_COMPOUND_MODE_CONTEXTS][2]; unsigned int inter_compound_mode[INTER_COMPOUND_MODE_CONTEXTS] @@ -1338,12 +1383,15 @@ unsigned int motion_mode[BLOCK_SIZES_ALL][MOTION_MODES]; unsigned int obmc[BLOCK_SIZES_ALL][2]; #endif // CONFIG_EXTENDED_WARP_PREDICTION -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT unsigned int intra_inter[INTRA_INTER_SKIP_TXFM_CONTEXTS][INTRA_INTER_CONTEXTS] [2]; #else unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; -#endif +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT +#if CONFIG_CWP + int8_t cwp_idx[MAX_CWP_NUM - 1][2]; +#endif // CONFIG_CWP #if CONFIG_BAWP unsigned int bawp[2]; #endif // CONFIG_BAWP @@ -1360,7 +1408,7 @@ #else unsigned int intrabc[2]; #endif // CONFIG_NEW_CONTEXT_MODELING -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT unsigned int intrabc_mode[2]; unsigned int intrabc_drl_idx[MAX_REF_BV_STACK_SIZE - 1][2]; #endif @@ -1380,7 +1428,12 @@ unsigned int delta_lf_multi[FRAME_LF_COUNT][DELTA_LF_PROBS][2]; unsigned int delta_lf[DELTA_LF_PROBS][2]; +#if CONFIG_ATC_DCTX_ALIGNED + unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EOB_TX_CTXS][EXT_TX_SIZES] + [TX_TYPES]; +#else unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; +#endif // CONFIG_ATC_DCTX_ALIGNED unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] [TX_TYPES]; #if CONFIG_CROSS_CHROMA_TX @@ -1625,8 +1678,6 @@ typedef struct RD_COUNTS { int64_t comp_pred_diff[REFERENCE_MODES]; - // Stores number of 4x4 blocks using global motion per reference frame. - int global_motion_used[INTER_REFS_PER_FRAME]; int compound_ref_used_flag; int skip_mode_used_flag; int tx_type_used[TX_SIZES_ALL][TX_TYPES]; @@ -1944,19 +1995,6 @@ */ typedef struct { /*! - * Array to store the cost for signalling each global motion model. - * gmtype_cost[i] stores the cost of signalling the ith Global Motion model. - */ - int type_cost[TRANS_TYPES]; - - /*! - * Array to store the cost for signalling a particular global motion model for - * each reference frame. gmparams_cost[i] stores the cost of signalling global - * motion for the ith reference frame. - */ - int params_cost[INTER_REFS_PER_FRAME]; - - /*! * Flag to indicate if global motion search needs to be rerun. */ bool search_done; @@ -2007,6 +2045,25 @@ * the y co-ordinate of the ith corner point detected. */ int src_corners[2 * MAX_CORNERS]; + +#if CONFIG_IMPROVED_GLOBAL_MOTION + /*! + * \brief Error ratio for each selected global motion model + * + * This is used to help decide which models will actually be used, + * because that decision has to be deferred until we actually select a + * base model to use + */ + double erroradvantage[INTER_REFS_PER_FRAME]; + + /** + * \name Reference path for selected base model + */ + /**@{*/ + int base_model_our_ref; /*!< which of our ref frames to copy from */ + int base_model_their_ref; /*!< which model to copy from that frame */ + /**@}*/ +#endif // CONFIG_IMPROVED_GLOBAL_MOTION } GlobalMotionInfo; /*! @@ -2743,7 +2800,7 @@ /*! * Tables to calculate IntraBC MV cost. */ -#if !CONFIG_FLEX_MVRES && !CONFIG_BVCOST_UPDATE +#if !CONFIG_FLEX_MVRES && !CONFIG_IBC_BV_IMPROVEMENT IntraBCMVCosts dv_costs; #endif @@ -2896,6 +2953,14 @@ * Number of frames left to be encoded, is 0 if limit is not set. */ int frames_left; + + /*! + * Indicates if a valid global motion model has been found in the different + * frame update types of a GF group. + * valid_gm_model_found[i] indicates if valid global motion model has been + * found in the frame update type with enum value equal to i + */ + int valid_gm_model_found[FRAME_UPDATE_TYPES]; } AV1_COMP; /*! @@ -3077,11 +3142,7 @@ int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size); -#if DS_FRAME_LEVEL -void av1_set_downsample_filter_options(AV1_COMP *cpi, FeatureFlags *features); -#else void av1_set_downsample_filter_options(AV1_COMP *cpi); -#endif // DS_FRAME_LEVEl // Set screen content options. // This function estimates whether to use screen content tools, by counting @@ -3289,8 +3350,18 @@ // frame. An exception can be made for a forward keyframe since it has no // previous dependencies. static INLINE int encode_show_existing_frame(const AV1_COMMON *cm) { - return cm->show_existing_frame && (!cm->features.error_resilient_mode || - cm->current_frame.frame_type == KEY_FRAME); + if (!cm->show_existing_frame) return 0; + +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // When enable_frame_output_order == 1, show_existing_frame can be equal to 1 + // only for a forward key frame + if (cm->seq_params.enable_frame_output_order) + return (!cm->features.error_resilient_mode && + cm->current_frame.frame_type == KEY_FRAME); + else +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + return (!cm->features.error_resilient_mode || + cm->current_frame.frame_type == KEY_FRAME); } // Get index into the 'cpi->mbmi_ext_info.frame_base' array for the given @@ -3358,6 +3429,16 @@ return AOMMIN(max_allowed_refs_for_given_speed, max_reference_frames); } +#if CONFIG_SEP_COMP_DRL +/*!\brief Return whether the current coding block has two separate DRLs, + * the mdoe info is used as inputs */ +static INLINE int has_second_drl_by_mode(const PREDICTION_MODE mode, + const MV_REFERENCE_FRAME *ref_frame) { + return (mode == NEAR_NEARMV || mode == NEAR_NEWMV) && + !is_tip_ref_frame(ref_frame[0]); +} +#endif // CONFIG_SEP_COMP_DRL + // Enforce the number of references for each arbitrary frame based on user // options and speed. static AOM_INLINE void enforce_max_ref_frames(AV1_COMP *cpi,
diff --git a/av1/encoder/encoder_alloc.h b/av1/encoder/encoder_alloc.h index 8ae62cd..2b6e702 100644 --- a/av1/encoder/encoder_alloc.h +++ b/av1/encoder/encoder_alloc.h
@@ -80,7 +80,7 @@ #endif // CONFIG_EXT_RECUR_PARTITIONS cpi->td.firstpass_ctx = - av1_alloc_pmc(cm, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0, + av1_alloc_pmc(cm, SHARED_PART, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0, cm->seq_params.subsampling_x, cm->seq_params.subsampling_y, &cpi->td.shared_coeff_buf); }
diff --git a/av1/encoder/encoder_utils.c b/av1/encoder/encoder_utils.c index c6cdc29..9b6842d 100644 --- a/av1/encoder/encoder_utils.c +++ b/av1/encoder/encoder_utils.c
@@ -740,9 +740,6 @@ return AOMMIN(cm->width, cm->height) > 480 ? BLOCK_128X128 : BLOCK_64X64; } #if CONFIG_BLOCK_256 - if (cm->features.allow_intrabc) { - return BLOCK_128X128; - } return AOMMIN(oxcf->frm_dim_cfg.width, oxcf->frm_dim_cfg.height) >= 720 ? BLOCK_256X256 : BLOCK_128X128; @@ -779,9 +776,9 @@ const int frame_width = cm->superres_upscaled_width; const int frame_height = cm->superres_upscaled_height; - av1_set_restoration_unit_size( - frame_width, frame_height, seq_params->subsampling_x, - seq_params->subsampling_y, cm->rst_info, seq_params->sb_size); + set_restoration_unit_size(frame_width, frame_height, + seq_params->subsampling_x, + seq_params->subsampling_y, cm->rst_info); if (old_restoration_unit_size != cm->rst_info[0].restoration_unit_size) { for (int i = 0; i < num_planes; ++i) @@ -1006,28 +1003,6 @@ cpi->sf.part_sf.fixed_partition_size = fixed_partition_block_size_orig; } -#define GM_RECODE_LOOP_NUM4X4_FACTOR 192 -int av1_recode_loop_test_global_motion(WarpedMotionParams *const global_motion, - const int *const global_motion_used, - int *const gm_params_cost) { - int i; - int recode = 0; - for (i = 0; i < INTER_REFS_PER_FRAME; ++i) { - if (global_motion[i].wmtype != IDENTITY && - global_motion_used[i] * GM_RECODE_LOOP_NUM4X4_FACTOR < - gm_params_cost[i]) { - global_motion[i] = default_warp_params; - assert(global_motion[i].wmtype == IDENTITY); - gm_params_cost[i] = 0; - recode = 1; - // TODO(sarahparker): The earlier condition for recoding here was: - // "recode |= (rdc->global_motion_used[i] > 0);". Can we bring something - // similar to that back to speed up global motion? - } - } - return recode; -} - static void fix_interp_filter(InterpFilter *const interp_filter, const FRAME_COUNTS *const counts) { if (*interp_filter == SWITCHABLE) {
diff --git a/av1/encoder/encoder_utils.h b/av1/encoder/encoder_utils.h index 37f5cef..5399797 100644 --- a/av1/encoder/encoder_utils.h +++ b/av1/encoder/encoder_utils.h
@@ -921,7 +921,11 @@ cpi->sf.inter_sf.prune_obmc_prob_thresh > 0) { av1_copy(frame_probs->obmc_probs, default_obmc_probs); } - if (cpi->sf.inter_sf.prune_warped_prob_thresh > 0) { + if (cpi->sf.inter_sf.prune_warped_prob_thresh > 0 +#if CONFIG_CWG_D067_IMPROVED_WARP + || cpi->sf.inter_sf.prune_warpmv_prob_thresh > 0 +#endif // CONFIG_CWG_D067_IMPROVED_WARP + ) { av1_copy(frame_probs->warped_probs, default_warped_probs); } } @@ -1071,10 +1075,6 @@ void av1_determine_sc_tools_with_encoding(AV1_COMP *cpi, const int q_orig); -int av1_recode_loop_test_global_motion(WarpedMotionParams *const global_motion, - const int *const global_motion_used, - int *const gm_params_cost); - void av1_set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index, int *top_index); @@ -1136,31 +1136,6 @@ } av1_calculate_tile_rows(cm, mi_params->mi_rows, tiles); } - -#define COUPLED_CHROMA_FROM_LUMA_RESTORATION 0 -static AOM_INLINE void av1_set_restoration_unit_size(int width, int height, - int sx, int sy, - RestorationInfo *rst, - BLOCK_SIZE sb_size) { - (void)width; - (void)height; - (void)sx; - (void)sy; -#if COUPLED_CHROMA_FROM_LUMA_RESTORATION - int s = AOMMIN(sx, sy); -#else - int s = 0; -#endif // !COUPLED_CHROMA_FROM_LUMA_RESTORATION - - if (width * height > 352 * 288) - rst[0].restoration_unit_size = RESTORATION_UNITSIZE_MAX; - else - rst[0].restoration_unit_size = - AOMMAX((RESTORATION_UNITSIZE_MAX >> 1), block_size_wide[sb_size]); - rst[1].restoration_unit_size = rst[0].restoration_unit_size >> s; - rst[2].restoration_unit_size = rst[1].restoration_unit_size; -} - #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/encodetxb.c b/av1/encoder/encodetxb.c index 1c36d25..f1f51e6 100644 --- a/av1/encoder/encodetxb.c +++ b/av1/encoder/encodetxb.c
@@ -174,11 +174,16 @@ #if CONFIG_ENTROPY_STATS void av1_update_eob_context(int cdf_idx, int eob, TX_SIZE tx_size, - TX_CLASS tx_class, PLANE_TYPE plane, - FRAME_CONTEXT *ec_ctx, FRAME_COUNTS *counts, - uint8_t allow_update_cdf) { +#if !CONFIG_ATC_DCTX_ALIGNED + TX_CLASS tx_class, +#endif // !CONFIG_ATC_DCTX_ALIGNED + PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx, + FRAME_COUNTS *counts, uint8_t allow_update_cdf) { #else -void av1_update_eob_context(int eob, TX_SIZE tx_size, TX_CLASS tx_class, +void av1_update_eob_context(int eob, TX_SIZE tx_size, +#if !CONFIG_ATC_DCTX_ALIGNED + TX_CLASS tx_class, +#endif // !CONFIG_ATC_DCTX_ALIGNED PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx, uint8_t allow_update_cdf) { #endif @@ -187,6 +192,67 @@ TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); const int eob_multi_size = txsize_log2_minus4[tx_size]; +#if CONFIG_ATC_DCTX_ALIGNED + switch (eob_multi_size) { + case 0: +#if CONFIG_ENTROPY_STATS + ++counts->eob_multi16[cdf_idx][plane][eob_pt - 1]; +#endif + if (allow_update_cdf) + update_cdf(ec_ctx->eob_flag_cdf16[plane], eob_pt - 1, EOB_MAX_SYMS - 6); + break; + case 1: +#if CONFIG_ENTROPY_STATS + ++counts->eob_multi32[cdf_idx][plane][eob_pt - 1]; +#endif + if (allow_update_cdf) + update_cdf(ec_ctx->eob_flag_cdf32[plane], eob_pt - 1, EOB_MAX_SYMS - 5); + break; + case 2: +#if CONFIG_ENTROPY_STATS + ++counts->eob_multi64[cdf_idx][plane][eob_pt - 1]; +#endif + if (allow_update_cdf) + update_cdf(ec_ctx->eob_flag_cdf64[plane], eob_pt - 1, EOB_MAX_SYMS - 4); + break; + case 3: +#if CONFIG_ENTROPY_STATS + ++counts->eob_multi128[cdf_idx][plane][eob_pt - 1]; +#endif + if (allow_update_cdf) { + update_cdf(ec_ctx->eob_flag_cdf128[plane], eob_pt - 1, + EOB_MAX_SYMS - 3); + } + break; + case 4: +#if CONFIG_ENTROPY_STATS + ++counts->eob_multi256[cdf_idx][plane][eob_pt - 1]; +#endif + if (allow_update_cdf) { + update_cdf(ec_ctx->eob_flag_cdf256[plane], eob_pt - 1, + EOB_MAX_SYMS - 2); + } + break; + case 5: +#if CONFIG_ENTROPY_STATS + ++counts->eob_multi512[cdf_idx][plane][eob_pt - 1]; +#endif + if (allow_update_cdf) { + update_cdf(ec_ctx->eob_flag_cdf512[plane], eob_pt - 1, + EOB_MAX_SYMS - 1); + } + break; + case 6: + default: +#if CONFIG_ENTROPY_STATS + ++counts->eob_multi1024[cdf_idx][plane][eob_pt - 1]; +#endif + if (allow_update_cdf) { + update_cdf(ec_ctx->eob_flag_cdf1024[plane], eob_pt - 1, EOB_MAX_SYMS); + } + break; + } +#else const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; switch (eob_multi_size) { @@ -249,6 +315,7 @@ } break; } +#endif // CONFIG_ATC_DCTX_ALIGNED if (av1_eob_offset_bits[eob_pt] > 0) { int eob_ctx = eob_pt - 3; @@ -263,13 +330,21 @@ } static int get_eob_cost(int eob, const LV_MAP_EOB_COST *txb_eob_costs, - const LV_MAP_COEFF_COST *txb_costs, TX_CLASS tx_class) { + const LV_MAP_COEFF_COST *txb_costs +#if !CONFIG_ATC_DCTX_ALIGNED + , + TX_CLASS tx_class +#endif // !CONFIG_ATC_DCTX_ALIGNED +) { int eob_extra; const int eob_pt = get_eob_pos_token(eob, &eob_extra); int eob_cost = 0; +#if CONFIG_ATC_DCTX_ALIGNED + eob_cost = txb_eob_costs->eob_cost[eob_pt - 1]; +#else const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1]; - +#endif // CONFIG_ATC_DCTX_ALIGNED if (av1_eob_offset_bits[eob_pt] > 0) { const int eob_ctx = eob_pt - 3; const int eob_shift = av1_eob_offset_bits[eob_pt] - 1; @@ -301,7 +376,7 @@ return 0; } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC // Golomb cost of coding bypass coded level values in the // low-frequency region. static INLINE int get_golomb_cost_lf(int abs_qc) { @@ -342,7 +417,7 @@ } return coeff_lps[base_range] + golomb_bits; } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC static INLINE int get_br_cost_with_diff(tran_low_t level, const int *coeff_lps, int *diff) { @@ -374,23 +449,23 @@ const int coeff_idx, const int bwl, const int height, const int scan_idx, const int is_eob, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC const TX_SIZE tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC const TX_CLASS tx_class -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , const int plane) { #else ) { -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC if (is_eob) { if (scan_idx == 0) return 0; if (scan_idx <= (height << bwl) / 8) return 1; if (scan_idx <= (height << bwl) / 4) return 2; return 3; } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC int stats = 0; const int row = coeff_idx >> bwl; const int col = coeff_idx - (row << bwl); @@ -407,11 +482,20 @@ const int stats = get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class); return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } static INLINE int get_nz_map_ctx_skip(const uint8_t *const levels, +#if CONFIG_ATC_DCTX_ALIGNED + const int height, const int scan_idx, + const int is_bob, const int coeff_idx, + const int bwl) { + if (is_bob) { + return get_lower_levels_ctx_bob(bwl, height, scan_idx); + } +#else const int coeff_idx, const int bwl) { +#endif // CONFIG_ATC_DCTX_ALIGNED const int stats = get_nz_mag_skip(levels + get_padded_idx_left(coeff_idx, bwl), bwl); return get_nz_map_ctx_from_stats_skip(stats, coeff_idx, bwl); @@ -492,39 +576,114 @@ const int16_t *const scan, const uint16_t eob, const TX_SIZE tx_size, const TX_CLASS tx_class, int8_t *const coeff_contexts -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , const int plane) { #else ) { -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC const int bwl = get_txb_bwl(tx_size); const int height = get_txb_high(tx_size); for (int i = 0; i < eob; ++i) { const int pos = scan[i]; coeff_contexts[pos] = get_nz_map_ctx(levels, pos, bwl, height, i, i == eob - 1, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC tx_class -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , plane); #else ); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } } +#if CONFIG_ATC_DCTX_ALIGNED +// Encodes the EOB syntax in the bitstream. +static INLINE void code_eob(MACROBLOCK *const x, aom_writer *w, int plane, + TX_SIZE tx_size, const int eob) { + MACROBLOCKD *xd = &x->e_mbd; + FRAME_CONTEXT *ec_ctx = xd->tile_ctx; + const PLANE_TYPE plane_type = get_plane_type(plane); + const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); + + int eob_extra; + const int eob_pt = get_eob_pos_token(eob, &eob_extra); + const int eob_multi_size = txsize_log2_minus4[tx_size]; + switch (eob_multi_size) { + case 0: + aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf16[plane_type], + EOB_MAX_SYMS - 6); + break; + case 1: + aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf32[plane_type], + EOB_MAX_SYMS - 5); + break; + case 2: + aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf64[plane_type], + EOB_MAX_SYMS - 4); + break; + case 3: + aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf128[plane_type], + EOB_MAX_SYMS - 3); + break; + case 4: + aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf256[plane_type], + EOB_MAX_SYMS - 2); + break; + case 5: + aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf512[plane_type], + EOB_MAX_SYMS - 1); + break; + default: + aom_write_symbol(w, eob_pt - 1, ec_ctx->eob_flag_cdf1024[plane_type], + EOB_MAX_SYMS); + break; + } + const int eob_offset_bits = av1_eob_offset_bits[eob_pt]; + if (eob_offset_bits > 0) { + const int eob_ctx = eob_pt - 3; + int eob_shift = eob_offset_bits - 1; + int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; + aom_write_symbol(w, bit, + ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2); +#if CONFIG_BYPASS_IMPROVEMENT + // Zero out top bit; write (eob_offset_bits - 1) lsb bits. + eob_extra &= (1 << (eob_offset_bits - 1)) - 1; + aom_write_literal(w, eob_extra, eob_offset_bits - 1); +#else + for (int i = 1; i < eob_offset_bits; i++) { + eob_shift = eob_offset_bits - 1 - i; + bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; + aom_write_bit(w, bit); + } +#endif + } +} +#endif // CONFIG_ATC_DCTX_ALIGNED + void av1_get_nz_map_contexts_skip_c(const uint8_t *const levels, const int16_t *const scan, +#if CONFIG_ATC_DCTX_ALIGNED + const uint16_t bob, +#endif // CONFIG_ATC_DCTX_ALIGNED const uint16_t eob, const TX_SIZE tx_size, int8_t *const coeff_contexts) { const int bwl = get_txb_bwl(tx_size); +#if CONFIG_ATC_DCTX_ALIGNED + const int height = get_txb_high(tx_size); + for (int i = bob; i < eob; ++i) { + const int pos = scan[i]; + coeff_contexts[pos] = + get_nz_map_ctx_skip(levels, height, i, bob == i, pos, bwl); +#else for (int i = 0; i < eob; ++i) { const int pos = scan[i]; coeff_contexts[pos] = get_nz_map_ctx_skip(levels, pos, bwl); +#endif // CONFIG_ATC_DCTX_ALIGNED } } @@ -545,6 +704,10 @@ const uint16_t *eob_txb = cb_coef_buff->eobs[plane] + txb_offset; const uint16_t eob = eob_txb[block]; +#if CONFIG_ATC_DCTX_ALIGNED + const uint16_t *bob_txb = cb_coef_buff->bobs[plane] + txb_offset; + const uint16_t bob_code = bob_txb[block]; +#endif // CONFIG_ATC_DCTX_ALIGNED const uint8_t *entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset; #if CONFIG_CONTEXT_DERIVATION @@ -563,6 +726,12 @@ const TX_TYPE tx_type = av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size, cm->features.reduced_tx_set_used); +#if CONFIG_ATC_DCTX_ALIGNED + const int is_inter = is_inter_block(xd->mi[0], xd->tree_type); + const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] && + plane == PLANE_TYPE_Y) || + use_inter_fsc(cm, plane, tx_type, is_inter); +#endif // CONFIG_ATC_DCTX_ALIGNED #if CONFIG_CROSS_CHROMA_TX && CCTX_C2_DROPPED if (plane == AOM_PLANE_V && is_cctx_allowed(cm, xd)) { @@ -585,17 +754,32 @@ aom_write_symbol(w, eob == 0, ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 2); #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CROSS_CHROMA_TX +#if CONFIG_CROSS_CHROMA_TX && !CONFIG_ATC_DCTX_ALIGNED if (plane == AOM_PLANE_U && is_cctx_allowed(cm, xd)) { CctxType cctx_type = av1_get_cctx_type(xd, blk_row, blk_col); if (eob > 0) av1_write_cctx_type(cm, xd, cctx_type, tx_size, w); } -#endif // CONFIG_CROSS_CHROMA_TX +#endif // CONFIG_CROSS_CHROMA_TX && !CONFIG_ATC_DCTX_ALIGNED if (eob == 0) return 0; +#if CONFIG_ATC_DCTX_ALIGNED + int esc_eob = is_fsc ? bob_code : eob; + const int dc_skip = (eob == 1) && !is_inter; + code_eob(x, w, plane, tx_size, esc_eob); + av1_write_tx_type(cm, xd, tx_type, tx_size, w, plane, esc_eob, dc_skip); +#if CONFIG_CROSS_CHROMA_TX + if (plane == AOM_PLANE_U && is_cctx_allowed(cm, xd)) { + const int skip_cctx = is_inter ? 0 : (eob == 1); + CctxType cctx_type = av1_get_cctx_type(xd, blk_row, blk_col); + if (eob > 0 && !skip_cctx) + av1_write_cctx_type(cm, xd, cctx_type, tx_size, w); + } +#endif // CONFIG_CROSS_CHROMA_TX +#else if (plane == 0) { // Only y plane's tx_type is transmitted av1_write_tx_type(cm, xd, tx_type, tx_size, w); } +#endif return 1; } @@ -625,15 +809,39 @@ const int bwl = get_txb_bwl(tx_size); DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); +#if CONFIG_ATC_DCTX_ALIGNED + const int txb_offset = + x->mbmi_ext_frame->cb_offset[plane] / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); + const uint16_t *bob_txb = cb_coef_buff->bobs[plane] + txb_offset; + const int bob_code = bob_txb[block]; + int bob = av1_get_max_eob(tx_size) - bob_code; + av1_get_nz_map_contexts_skip_c(levels, scan, bob, eob, tx_size, + coeff_contexts); +#else av1_get_nz_map_contexts_skip(levels, scan, eob, tx_size, coeff_contexts); +#endif // CONFIG_ATC_DCTX_ALIGNED +#if CONFIG_ATC_DCTX_ALIGNED + for (int c = bob; c < eob; ++c) { +#else for (int c = 0; c < eob; ++c) { +#endif // CONFIG_ATC_DCTX_ALIGNED const int pos = scan[c]; const int coeff_ctx = coeff_contexts[pos]; const tran_low_t v = tcoeff[pos]; const tran_low_t level = abs(v); +#if CONFIG_ATC_DCTX_ALIGNED + if (c == bob) { + aom_write_symbol(w, AOMMIN(level, 3) - 1, + ec_ctx->coeff_base_bob_cdf[coeff_ctx], 3); + } else { + aom_write_symbol(w, AOMMIN(level, 3), + ec_ctx->coeff_base_cdf_idtx[coeff_ctx], 4); + } +#else aom_write_symbol(w, AOMMIN(level, 3), ec_ctx->coeff_base_cdf_idtx[coeff_ctx], 4); +#endif // CONFIG_ATC_DCTX_ALIGNED if (level > NUM_BASE_LEVELS) { // level is above 1. const int base_range = level - 1 - NUM_BASE_LEVELS; @@ -705,8 +913,11 @@ const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); FRAME_CONTEXT *ec_ctx = xd->tile_ctx; #if CONFIG_PC_WIENER - assert((eob == 0) == - av1_get_txk_skip(cm, xd->mi_row, xd->mi_col, plane, blk_row, blk_col)); + if (!is_global_intrabc_allowed(cm) && !cm->features.coded_lossless) { + // Assert only when LR is enabled. + assert((eob == 0) == av1_get_txk_skip(cm, xd->mi_row, xd->mi_col, plane, + blk_row, blk_col)); + } #endif // CONFIG_PC_WIENER if (eob == 0) return; @@ -719,7 +930,7 @@ fprintf(cm->fEncCoeffLog, "\nblk_row=%d,blk_col=%d,plane=%d,tx_size=%d", blk_row, blk_col, plane, tx_size); #endif - +#if !CONFIG_ATC_DCTX_ALIGNED int eob_extra; const int eob_pt = get_eob_pos_token(eob, &eob_extra); const int eob_multi_size = txsize_log2_minus4[tx_size]; @@ -775,10 +986,17 @@ } #endif } +#else + const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)]; +#endif // !CONFIG_ATC_DCTX_ALIGNED // write sec_tx_type here // Only y plane's sec_tx_type is transmitted +#if CONFIG_ATC_DCTX_ALIGNED + if ((plane == AOM_PLANE_Y) && (cm->seq_params.enable_ist) && eob != 1) { +#else if ((plane == AOM_PLANE_Y) && (cm->seq_params.enable_ist)) { +#endif // CONFIG_ATC_DCTX_ALIGNED av1_write_sec_tx_type(cm, xd, tx_type, tx_size, eob, w); } @@ -800,10 +1018,10 @@ const int16_t *const scan = scan_order->scan; DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , plane -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ); const int bwl = get_txb_bwl(tx_size); @@ -822,7 +1040,7 @@ const tran_low_t v = tcoeff[pos]; const tran_low_t level = abs(v); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (c == eob - 1) { const int row = pos >> bwl; const int col = pos - (row << bwl); @@ -860,9 +1078,9 @@ aom_write_symbol(w, AOMMIN(level, 3), ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], 4); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -903,7 +1121,7 @@ if (k < BR_CDF_SIZE - 1) break; } } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } #if CONFIG_PAR_HIDING @@ -929,7 +1147,7 @@ const tran_low_t v = tcoeff[pos]; const tran_low_t level = abs(v); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (c == eob - 1) { const int row = pos >> bwl; const int col = pos - (row << bwl); @@ -967,9 +1185,9 @@ aom_write_symbol(w, AOMMIN(level, 3), ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], 4); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -1010,7 +1228,7 @@ if (k < BR_CDF_SIZE - 1) break; } } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } #endif // CONFIG_PAR_HIDING @@ -1059,7 +1277,7 @@ #endif // CONFIG_CONTEXT_DERIVATION } #if CONFIG_PAR_HIDING -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (is_hidden && c == 0) { int q_index = level >> 1; if (q_index > COEFF_BASE_RANGE + NUM_BASE_LEVELS) @@ -1086,9 +1304,9 @@ if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS) write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #else -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int pos = scan[c]; const int row = pos >> bwl; const int col = pos - (row << bwl); @@ -1103,7 +1321,7 @@ #else if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS) write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #endif // CONFIG_PAR_HIDING } } @@ -1202,8 +1420,17 @@ int get_cctx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd, int plane, TX_SIZE tx_size, int block, CctxType cctx_type) { +#if CONFIG_ATC_DCTX_ALIGNED + const int skip_cctx = is_inter_block(xd->mi[0], xd->tree_type) + ? 0 + : (x->plane[plane].eobs[block] == 1); +#endif // CONFIG_ATC_DCTX_ALIGNED if (plane == AOM_PLANE_U && x->plane[plane].eobs[block] && - is_cctx_allowed(cm, xd)) { + is_cctx_allowed(cm, xd) +#if CONFIG_ATC_DCTX_ALIGNED + && !skip_cctx +#endif // CONFIG_ATC_DCTX_ALIGNED + ) { const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; int above_cctx, left_cctx; #if CONFIG_EXT_RECUR_PARTITIONS @@ -1222,7 +1449,12 @@ // TODO(angiebird): use this function whenever it's possible static int get_tx_type_cost(const MACROBLOCK *x, const MACROBLOCKD *xd, int plane, TX_SIZE tx_size, TX_TYPE tx_type, - int reduced_tx_set_used, int eob) { + int reduced_tx_set_used, int eob +#if CONFIG_ATC_DCTX_ALIGNED + , + int bob_code, int is_fsc +#endif // CONFIG_ATC_DCTX_ALIGNED +) { if (plane > 0) return 0; const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; @@ -1239,8 +1471,18 @@ get_ext_tx_set(tx_size, is_inter, reduced_tx_set_used); if (is_inter) { if (ext_tx_set > 0) +#if CONFIG_ATC_DCTX_ALIGNED + { + const int esc_eob = is_fsc ? bob_code : eob; + const int eob_tx_ctx = + get_lp2tx_ctx(tx_size, get_txb_bwl(tx_size), esc_eob); + return x->mode_costs.inter_tx_type_costs[ext_tx_set][eob_tx_ctx] + [square_tx_size][tx_type]; + } +#else return x->mode_costs .inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type]; +#endif // CONFIG_ATC_DCTX_ALIGNED } else { if (ext_tx_set > 0) { PREDICTION_MODE intra_dir; @@ -1250,6 +1492,20 @@ else intra_dir = mbmi->mode; TX_TYPE primary_tx_type = get_primary_tx_type(tx_type); +#if CONFIG_ATC_DCTX_ALIGNED + int tx_type_cost = 0; + if (eob != 1) { + tx_type_cost = + x->mode_costs.intra_tx_type_costs[ext_tx_set][square_tx_size] + [intra_dir][primary_tx_type]; + } + if (block_signals_sec_tx_type(xd, tx_size, tx_type, eob) && + xd->enable_ist) { + tx_type_cost += + x->mode_costs.stx_flag_cost[square_tx_size] + [get_secondary_tx_type(tx_type)]; + } +#else int tx_type_cost = x->mode_costs.intra_tx_type_costs[ext_tx_set][square_tx_size] [intra_dir][primary_tx_type]; @@ -1259,6 +1515,7 @@ x->mode_costs.stx_flag_cost[square_tx_size] [get_secondary_tx_type(tx_type)]; } +#endif // CONFIG_ATC_DCTX_ALIGNED return tx_type_cost; } } @@ -1304,9 +1561,9 @@ } static AOM_FORCE_INLINE int warehouse_efficients_txb_skip( -#if CONFIG_CROSS_CHROMA_TX +#if CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED const AV1_COMMON *cm, -#endif // CONFIG_CROSS_CHROMA_TX +#endif // CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED const MACROBLOCK *x, const int plane, const int block, const TX_SIZE tx_size, const TXB_CTX *const txb_ctx, const struct macroblock_plane *p, const int eob, @@ -1329,22 +1586,61 @@ int8_t signs_buf[TX_PAD_2D]; int8_t *const signs = set_signs(signs_buf, width); av1_txb_init_levels_signs(qcoeff, width, height, levels_buf, signs_buf); - cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used, - eob); +#if CONFIG_ATC_DCTX_ALIGNED + const int bob_code = p->bobs[block]; + const int bob = av1_get_max_eob(tx_size) - bob_code; + const int is_inter = is_inter_block(xd->mi[0], xd->tree_type); + const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] && + plane == PLANE_TYPE_Y) || + use_inter_fsc(cm, plane, tx_type, is_inter); +#endif // CONFIG_ATC_DCTX_ALIGNED + cost += + get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used, eob +#if CONFIG_ATC_DCTX_ALIGNED + , + bob_code, is_fsc +#endif // CONFIG_ATC_DCTX_ALIGNED + ); + +#if CONFIG_ATC_DCTX_ALIGNED + const int eob_multi_size = txsize_log2_minus4[tx_size]; + const LV_MAP_EOB_COST *const eob_costs = + &x->coeff_costs.eob_costs[eob_multi_size][PLANE_TYPE_Y]; + cost += get_eob_cost(bob_code, eob_costs, coeff_costs); +#endif // CONFIG_ATC_DCTX_ALIGNED + #if CONFIG_CROSS_CHROMA_TX cost += get_cctx_type_cost(cm, x, xd, plane, tx_size, block, cctx_type); #endif // CONFIG_CROSS_CHROMA_TX DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); +#if CONFIG_ATC_DCTX_ALIGNED + av1_get_nz_map_contexts_skip_c(levels, scan, bob, eob, tx_size, + coeff_contexts); +#else av1_get_nz_map_contexts_skip(levels, scan, eob, tx_size, coeff_contexts); +#endif // CONFIG_ATC_DCTX_ALIGNED const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] = coeff_costs->lps_cost_skip; const int(*base_cost)[8] = coeff_costs->idtx_base_cost; + +#if CONFIG_ATC_DCTX_ALIGNED + for (int c = bob; c < eob; c++) { +#else for (int c = 0; c < eob; c++) { +#endif // CONFIG_ATC_DCTX_ALIGNED const int pos = scan[c]; const int coeff_ctx = coeff_contexts[pos]; const tran_low_t v = qcoeff[pos]; const int level = abs(v); +#if CONFIG_ATC_DCTX_ALIGNED + if (c == bob) { + cost += coeff_costs->base_bob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; + } else { + cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; + } +#else cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; +#endif // CONFIG_ATC_DCTX_ALIGNED if (v) { if (level > NUM_BASE_LEVELS) { const int ctx = get_br_ctx_skip(levels, pos, bwl); @@ -1352,7 +1648,11 @@ } } } +#if CONFIG_ATC_DCTX_ALIGNED + for (int c = eob - 1; c >= bob; --c) { +#else for (int c = eob - 1; c >= 0; --c) { +#endif // CONFIG_ATC_DCTX_ALIGNED const int pos = scan[c]; const tran_low_t v = qcoeff[pos]; const tran_low_t level = abs(v); @@ -1366,9 +1666,9 @@ } static AOM_FORCE_INLINE int warehouse_efficients_txb( -#if CONFIG_CROSS_CHROMA_TX +#if CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED const AV1_COMMON *cm, -#endif // CONFIG_CROSS_CHROMA_TX +#endif // CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED const MACROBLOCK *x, const int plane, const int block, const TX_SIZE tx_size, const TXB_CTX *const txb_ctx, const struct macroblock_plane *p, const int eob, @@ -1417,27 +1717,43 @@ av1_txb_init_levels(qcoeff, width, height, levels); - cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used, - eob); +#if CONFIG_ATC_DCTX_ALIGNED + const int bob_code = p->bobs[block]; + const int is_inter = is_inter_block(xd->mi[0], xd->tree_type); + const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] && + plane == PLANE_TYPE_Y) || + use_inter_fsc(cm, plane, tx_type, is_inter); +#endif // CONFIG_ATC_DCTX_ALIGNED + + cost += + get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used, eob +#if CONFIG_ATC_DCTX_ALIGNED + , + bob_code, is_fsc +#endif // CONFIG_ATC_DCTX_ALIGNED + ); #if CONFIG_CROSS_CHROMA_TX cost += get_cctx_type_cost(cm, x, xd, plane, tx_size, block, cctx_type); #endif // CONFIG_CROSS_CHROMA_TX - +#if CONFIG_ATC_DCTX_ALIGNED + cost += get_eob_cost(eob, eob_costs, coeff_costs); +#else cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class); +#endif // CONFIG_ATC_DCTX_ALIGNED av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , plane -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ); const int(*lps_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] = coeff_costs->lps_cost; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int(*lps_lf_cost)[COEFF_BASE_RANGE + 1 + COEFF_BASE_RANGE + 1] = coeff_costs->lps_lf_cost; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC int c = eob - 1; { const int pos = scan[c]; @@ -1445,7 +1761,7 @@ const int sign = AOMSIGN(v); const int level = (v ^ sign) - sign; const int coeff_ctx = coeff_contexts[pos]; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -1458,11 +1774,11 @@ } #else cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC if (v) { // sign bit cost -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (limits) { if (level > LF_NUM_BASE_LEVELS) { const int ctx = get_br_ctx_lf_eob(pos, tx_class); @@ -1479,7 +1795,7 @@ const int ctx = get_br_ctx_eob(pos, bwl, tx_class); cost += get_br_cost(level, lps_cost[ctx]); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC if (c) { #if CONFIG_CONTEXT_DERIVATION if (plane == AOM_PLANE_V) { @@ -1508,16 +1824,16 @@ } } } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int(*base_lf_cost)[LF_BASE_SYMBOLS * 2] = coeff_costs->base_lf_cost; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC const int(*base_cost)[8] = coeff_costs->base_cost; for (c = eob - 2; c >= 1; --c) { const int pos = scan[c]; const int coeff_ctx = coeff_contexts[pos]; const tran_low_t v = qcoeff[pos]; const int level = abs(v); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -1528,7 +1844,7 @@ } #else cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC if (v) { // sign bit cost #if CONFIG_CONTEXT_DERIVATION @@ -1542,7 +1858,7 @@ #else cost += av1_cost_literal(1); #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (limits) { if (level > LF_NUM_BASE_LEVELS) { const int ctx = get_br_lf_ctx(levels, pos, bwl, tx_class); @@ -1559,7 +1875,7 @@ const int ctx = get_br_ctx(levels, pos, bwl, tx_class); cost += get_br_cost(level, lps_cost[ctx]); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } } // c == 0 after previous loop @@ -1595,7 +1911,7 @@ const int coeff_ctx = coeff_contexts[pos]; const int sign = AOMSIGN(v); const int level = (v ^ sign) - sign; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -1606,7 +1922,7 @@ } #else cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC if (v) { // sign bit cost @@ -1622,7 +1938,7 @@ #else cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01]; #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (limits) { if (level > LF_NUM_BASE_LEVELS) { const int ctx = get_br_lf_ctx(levels, pos, bwl, tx_class); @@ -1639,7 +1955,7 @@ const int ctx = get_br_ctx(levels, pos, bwl, tx_class); cost += get_br_cost(level, lps_cost[ctx]); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } } return cost; @@ -1653,7 +1969,10 @@ #if CONFIG_CROSS_CHROMA_TX const CctxType cctx_type, #endif // CONFIG_CROSS_CHROMA_TX - const TX_CLASS tx_class, int reduced_tx_set_used) { +#if !CONFIG_ATC_DCTX_ALIGNED + const TX_CLASS tx_class, +#endif // !CONFIG_ATC_DCTX_ALIGNED + int reduced_tx_set_used) { #if CONFIG_CONTEXT_DERIVATION int txb_skip_ctx = txb_ctx->txb_skip_ctx; if (plane == AOM_PLANE_V) { @@ -1678,8 +1997,21 @@ int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0]; #endif // CONFIG_CONTEXT_DERIVATION - cost += get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used, - eob); +#if CONFIG_ATC_DCTX_ALIGNED + const int bob_code = x->plane[plane].bobs[block]; + const int is_inter = is_inter_block(xd->mi[0], xd->tree_type); + const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] && + plane == PLANE_TYPE_Y) || + use_inter_fsc(cm, plane, tx_type, is_inter); +#endif // CONFIG_ATC_DCTX_ALIGNED + + cost += + get_tx_type_cost(x, xd, plane, tx_size, tx_type, reduced_tx_set_used, eob +#if CONFIG_ATC_DCTX_ALIGNED + , + bob_code, is_fsc +#endif // CONFIG_ATC_DCTX_ALIGNED + ); #if CONFIG_CROSS_CHROMA_TX cost += get_cctx_type_cost(cm, x, xd, plane, tx_size, block, cctx_type); #endif // CONFIG_CROSS_CHROMA_TX @@ -1691,7 +2023,11 @@ cost += av1_cost_coeffs_txb_skip_estimate(x, plane, block, tx_size, tx_type); } else { +#if CONFIG_ATC_DCTX_ALIGNED + cost += get_eob_cost(eob, eob_costs, coeff_costs); +#else cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class); +#endif // CONFIG_ATC_DCTX_ALIGNED cost += av1_cost_coeffs_txb_estimate(x, plane, block, tx_size, tx_type); } return cost; @@ -1816,9 +2152,9 @@ get_primary_tx_type(tx_type) == IDTX && plane == PLANE_TYPE_Y) || use_inter_fsc(cm, plane, tx_type, is_inter_block(mbmi, xd->tree_type))) { return warehouse_efficients_txb_skip( -#if CONFIG_CROSS_CHROMA_TX +#if CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED cm, -#endif // CONFIG_CROSS_CHROMA_TX +#endif // CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED x, plane, block, tx_size, txb_ctx, p, eob, coeff_costs, xd, tx_type, #if CONFIG_CROSS_CHROMA_TX cctx_type, @@ -1826,9 +2162,9 @@ reduced_tx_set_used); } else { return warehouse_efficients_txb( -#if CONFIG_CROSS_CHROMA_TX +#if CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED cm, -#endif // CONFIG_CROSS_CHROMA_TX +#endif // CONFIG_CROSS_CHROMA_TX || CONFIG_ATC_DCTX_ALIGNED x, plane, block, tx_size, txb_ctx, p, eob, plane_type, coeff_costs, xd, tx_type, #if CONFIG_CROSS_CHROMA_TX @@ -1892,7 +2228,9 @@ return skip_cost; } +#if !CONFIG_ATC_DCTX_ALIGNED const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)]; +#endif // !CONFIG_ATC_DCTX_ALIGNED return warehouse_efficients_txb_laplacian(cm, x, plane, block, tx_size, txb_ctx, eob, plane_type, @@ -1900,20 +2238,23 @@ #if CONFIG_CROSS_CHROMA_TX cctx_type, #endif // CONFIG_CROSS_CHROMA_TX - tx_class, reduced_tx_set_used); +#if !CONFIG_ATC_DCTX_ALIGNED + tx_class, +#endif // !CONFIG_ATC_DCTX_ALIGNED + reduced_tx_set_used); } static AOM_FORCE_INLINE int get_two_coeff_cost_simple( -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC int plane, -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC int ci, tran_low_t abs_qc, int coeff_ctx, const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class, const uint8_t *levels, int *cost_low) { // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) // and not the last (scan_idx != eob - 1) assert(ci > 0); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = ci >> bwl; const int col = ci - (row << bwl); int cost = 0; @@ -1926,9 +2267,9 @@ } #else int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC int diff = 0; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (limits) { if (abs_qc <= (LF_BASE_SYMBOLS - 1)) { if (abs_qc == 0) { @@ -1968,10 +2309,10 @@ } #else if (abs_qc <= 3) diff = txb_costs->base_cost[coeff_ctx][abs_qc + 4]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC if (abs_qc) { cost += av1_cost_literal(1); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (limits) { if (abs_qc > LF_NUM_BASE_LEVELS) { const int br_ctx = get_br_lf_ctx(levels, ci, bwl, tx_class); @@ -1997,7 +2338,7 @@ &brcost_diff); diff += brcost_diff; } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } *cost_low = cost - diff; @@ -2012,13 +2353,13 @@ , int32_t *tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , int plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ) { int cost = 0; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = ci >> bwl; const int col = ci - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -2031,7 +2372,7 @@ } #else cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC if (abs_qc != 0) { if (ci == 0) { #if CONFIG_CONTEXT_DERIVATION @@ -2052,7 +2393,7 @@ cost += av1_cost_literal(1); #endif // CONFIG_CONTEXT_DERIVATION } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (limits) { if (abs_qc > LF_NUM_BASE_LEVELS) { int br_ctx; @@ -2071,7 +2412,7 @@ br_ctx = get_br_ctx_eob(ci, bwl, tx_class); cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } return cost; } @@ -2086,14 +2427,14 @@ , int32_t *tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , int plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ) { int cost = 0; if (is_last) { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = ci >> bwl; const int col = ci - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -2106,9 +2447,9 @@ } #else cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = ci >> bwl; const int col = ci - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -2121,7 +2462,7 @@ } #else cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } if (abs_qc != 0) { if (ci == 0) { @@ -2143,7 +2484,7 @@ cost += av1_cost_literal(1); #endif // CONFIG_CONTEXT_DERIVATION } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = ci >> bwl; const int col = ci - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -2175,7 +2516,7 @@ br_ctx = get_br_ctx(levels, ci, bwl, tx_class); cost += get_br_cost(abs_qc, txb_costs->lps_cost[br_ctx]); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } return cost; } @@ -2198,9 +2539,9 @@ static INLINE void update_coeff_general( int *accu_rate, int64_t *accu_dist, int si, int eob, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC TX_SIZE tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift, int dc_sign_ctx, const int32_t *dequant, const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, @@ -2210,10 +2551,10 @@ , int32_t *tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , int plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC #if CONFIG_PAR_HIDING , coeff_info *coef_info, bool enable_parity_hiding @@ -2225,17 +2566,17 @@ const int is_last = si == (eob - 1); const int coeff_ctx = get_lower_levels_ctx_general(is_last, si, bwl, height, levels, ci, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC tx_class -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , plane -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ); if (qc == 0) { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = ci >> bwl; const int col = ci - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -2246,7 +2587,7 @@ } #else *accu_rate += txb_costs->base_cost[coeff_ctx][0]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { const int sign = (qc < 0) ? 1 : 0; const tran_low_t abs_qc = abs(qc); @@ -2261,10 +2602,10 @@ , tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ); const int64_t rd = RDCOST(rdmult, rate, dist); @@ -2275,7 +2616,7 @@ if (abs_qc == 1) { abs_qc_low = qc_low = dqc_low = 0; dist_low = dist0; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = ci >> bwl; const int col = ci - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -2286,7 +2627,7 @@ } #else rate_low = txb_costs->base_cost[coeff_ctx][0]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); abs_qc_low = abs_qc - 1; @@ -2298,10 +2639,10 @@ , tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ); } @@ -2331,9 +2672,9 @@ static AOM_FORCE_INLINE void update_coeff_simple( int *accu_rate, int si, int eob, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC TX_SIZE tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC TX_CLASS tx_class, int bwl, int64_t rdmult, int shift, const int32_t *dequant, const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, @@ -2343,12 +2684,12 @@ , coeff_info *coef_info, bool enable_parity_hiding #endif // CONFIG_PAR_HIDING -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , int plane) { #else ) { -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC const int dqv = get_dqv(dequant, scan[si], iqmatrix); (void)eob; // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) @@ -2357,7 +2698,7 @@ assert(si > 0); const int ci = scan[si]; const tran_low_t qc = qcoeff[ci]; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = ci >> bwl; const int col = ci - (row << bwl); @@ -2371,9 +2712,9 @@ #else const int coeff_ctx = get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC if (qc == 0) { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (limits) { *accu_rate += txb_costs->base_lf_cost[coeff_ctx][0]; } else { @@ -2381,16 +2722,16 @@ } #else *accu_rate += txb_costs->base_cost[coeff_ctx][0]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { const tran_low_t abs_qc = abs(qc); const tran_low_t abs_tqc = abs(tcoeff[ci]); const tran_low_t abs_dqc = abs(dqcoeff[ci]); int rate_low = 0; const int rate = get_two_coeff_cost_simple( -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC plane, -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ci, abs_qc, coeff_ctx, txb_costs, bwl, tx_class, levels, &rate_low); if (abs_dqc < abs_tqc) { *accu_rate += rate; @@ -2440,9 +2781,9 @@ static AOM_FORCE_INLINE void update_coeff_eob( int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci, int si, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC TX_SIZE tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC TX_CLASS tx_class, int bwl, int height, int dc_sign_ctx, int64_t rdmult, int shift, const int32_t *dequant, const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs, const LV_MAP_COEFF_COST *txb_costs, @@ -2452,10 +2793,10 @@ , int32_t *tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , int plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC #if CONFIG_PAR_HIDING , coeff_info *coef_info, bool enable_parity_hiding @@ -2465,7 +2806,7 @@ assert(si != *eob - 1); const int ci = scan[si]; const tran_low_t qc = qcoeff[ci]; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = ci >> bwl; const int col = ci - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -2478,9 +2819,9 @@ #else const int coeff_ctx = get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC if (qc == 0) { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (limits) { *accu_rate += txb_costs->base_lf_cost[coeff_ctx][0]; } else { @@ -2488,7 +2829,7 @@ } #else *accu_rate += txb_costs->base_cost[coeff_ctx][0]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { #if CONFIG_PAR_HIDING int64_t rd_eob_low = INT64_MAX >> 1; @@ -2508,10 +2849,10 @@ , tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ); int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist); @@ -2523,7 +2864,7 @@ abs_qc_low = 0; dqc_low = qc_low = 0; dist_low = 0; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC if (limits) { rate_low = txb_costs->base_lf_cost[coeff_ctx][0]; } else { @@ -2531,7 +2872,7 @@ } #else rate_low = txb_costs->base_cost[coeff_ctx][0]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist); } else { get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); @@ -2544,10 +2885,10 @@ , tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ); rd_low = RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low); } @@ -2559,7 +2900,11 @@ const int new_eob = si + 1; const int coeff_ctx_new_eob = get_lower_levels_ctx_eob(bwl, height, si); const int new_eob_cost = +#if CONFIG_ATC_DCTX_ALIGNED + get_eob_cost(new_eob, txb_eob_costs, txb_costs); +#else get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class); +#endif // CONFIG_ATC_DCTX_ALIGNED int rate_coeff_eob = new_eob_cost + get_coeff_cost_eob(ci, abs_qc, sign, coeff_ctx_new_eob, dc_sign_ctx, txb_costs, bwl, tx_class @@ -2567,10 +2912,10 @@ , tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ); int64_t dist_new_eob = dist; int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob); @@ -2587,10 +2932,10 @@ , tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ); const int64_t dist_new_eob_low = dist_low; const int64_t rd_new_eob_low = @@ -2682,14 +3027,14 @@ tran_low_t level, int bwl, int pos, uint8_t *levels, int dc_sign_ctx, TX_CLASS tx_class, int *rate -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC , TX_SIZE tx_size -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ) { tran_low_t abslevel = abs(level), q_index = abslevel >> 1; int sign = level < 0; -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, 0); @@ -2702,17 +3047,17 @@ #else const int coeff_ctx = get_lower_levels_ctx(levels, pos, bwl, tx_size, tx_class); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC *rate = get_coeff_cost_general(0, pos, abslevel, level < 0, coeff_ctx, dc_sign_ctx, txb_costs, bwl, tx_class, levels #if CONFIG_CONTEXT_DERIVATION , 0 #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , 0 -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ); const int base_ctx_ph = get_base_ctx_ph(levels, pos, bwl, tx_class); @@ -2898,10 +3243,10 @@ const qm_val_t *iqmatrix, int dc_sign_ctx, const TX_CLASS tx_class, tran_low_t *qcoeff, tran_low_t *dqcoeff, const tran_low_t *tcoeff, coeff_info *coef_info, int *accu_rate -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC , TX_SIZE tx_size -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ) { int nzsbb = 0, sum_abs1 = 0; for (int scan_idx = eob - 1; scan_idx > 0; --scan_idx) { @@ -2922,10 +3267,10 @@ const int ratesaving = rate_save(txb_costs, txb_costs_ph, qcoeff[hidepos], bwl, hidepos, levels, dc_sign_ctx, tx_class, &rate_cur -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC , tx_size -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ); if (!needtune && nzsbb >= PHTHRESH) { @@ -3009,6 +3354,12 @@ const int height = get_txb_high(tx_size); assert(width == (1 << bwl)); const int is_inter = is_inter_block(mbmi, xd->tree_type); +#if CONFIG_ATC_DCTX_ALIGNED + const int bob_code = p->bobs[block]; + const int is_fsc = (xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] && + plane == PLANE_TYPE_Y) || + use_inter_fsc(&cpi->common, plane, tx_type, is_inter); +#endif // CONFIG_ATC_DCTX_ALIGNED const LV_MAP_COEFF_COST *txb_costs = &coeff_costs->coeff_costs[txs_ctx][plane_type]; const int eob_multi_size = txsize_log2_minus4[tx_size]; @@ -3068,7 +3419,11 @@ const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0]; const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; #endif // CONFIG_CONTEXT_DERIVATION +#if CONFIG_ATC_DCTX_ALIGNED + const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs); +#else const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class); +#endif // CONFIG_ATC_DCTX_ALIGNED int accu_rate = eob_cost; int64_t accu_dist = 0; int si = eob - 1; @@ -3081,9 +3436,9 @@ int nz_ci[3] = { ci, 0, 0 }; if (abs_qc >= 2) { update_coeff_general(&accu_rate, &accu_dist, si, eob, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC tx_class, bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx, dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, levels, iqmatrix @@ -3091,10 +3446,10 @@ , xd->tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC #if CONFIG_PAR_HIDING , coef_info, enable_parity_hiding @@ -3111,10 +3466,10 @@ , xd->tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC ); const tran_low_t tqc = tcoeff[ci]; const tran_low_t dqc = dqcoeff[ci]; @@ -3126,9 +3481,9 @@ #if CONFIG_PAR_HIDING for (; si >= 0 && nz_num <= max_nz_num; --si) { update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC tx_size, -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC tx_class, bwl, height, txb_ctx->dc_sign_ctx, rdmult, shift, dequant, scan, txb_eob_costs, txb_costs, tcoeff, qcoeff, dqcoeff, levels, sharpness, iqmatrix @@ -3136,16 +3491,16 @@ , xd->tmp_sign #endif -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , coef_info, enable_parity_hiding); } #else #if CONFIG_CONTEXT_DERIVATION -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC #define UPDATE_COEFF_EOB_CASE(tx_class_literal) \ case tx_class_literal: \ for (; si >= 0 && nz_num <= max_nz_num; --si) { \ @@ -3167,7 +3522,7 @@ levels, sharpness, iqmatrix, xd->tmp_sign, plane); \ } \ break; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #else #define UPDATE_COEFF_EOB_CASE(tx_class_literal) \ case tx_class_literal: \ @@ -3197,20 +3552,20 @@ #if CONFIG_PAR_HIDING for (; si >= 1; --si) { update_coeff_simple(&accu_rate, si, eob, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC tx_class, bwl, rdmult, shift, dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, levels, iqmatrix, coef_info, enable_parity_hiding -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , plane -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ); } #else -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC #define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \ case tx_class_literal: \ for (; si >= 1; --si) { \ @@ -3242,7 +3597,7 @@ #undef UPDATE_COEFF_SIMPLE_CASE default: assert(false); } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #endif // CONFIG_PAR_HIDING // DC position @@ -3250,9 +3605,9 @@ // no need to update accu_dist because it's not used after this point int64_t dummy_dist = 0; update_coeff_general(&accu_rate, &dummy_dist, si, eob, -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC tx_size, -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC tx_class, bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx, dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, levels, iqmatrix @@ -3260,10 +3615,10 @@ , xd->tmp_sign #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#if CONFIG_CONTEXT_DERIVATION || CONFIG_ATC , plane -#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC_COEFCODING +#endif // CONFIG_CONTEXT_DERIVATION || CONFIG_ATC #if CONFIG_PAR_HIDING , coef_info, enable_parity_hiding @@ -3276,16 +3631,30 @@ parity_hide_tb(eob, scan, levels, bwl, rdmult, shift, txb_costs, txb_costs_ph, dequant, iqmatrix, txb_ctx->dc_sign_ctx, tx_class, qcoeff, dqcoeff, tcoeff, coef_info, &accu_rate -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC , tx_size -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ); } aom_free(coef_info); #endif // CONFIG_PAR_HIDING +#if CONFIG_ATC_DCTX_ALIGNED + set_bob(x, plane, block, tx_size, tx_type); +#endif // CONFIG_ATC_DCTX_ALIGNED + +#if CONFIG_ATC_DCTX_ALIGNED + if (eob == 0) { + accu_rate += skip_cost; + } else { + const int tx_type_cost = get_tx_type_cost(x, xd, plane, tx_size, tx_type, + cm->features.reduced_tx_set_used, + eob, bob_code, is_fsc); + accu_rate += non_skip_cost + tx_type_cost; + } +#else const int tx_type_cost = get_tx_type_cost( x, xd, plane, tx_size, tx_type, cm->features.reduced_tx_set_used, eob); @@ -3293,6 +3662,7 @@ accu_rate += skip_cost; else accu_rate += non_skip_cost + tx_type_cost; +#endif // CONFIG_ATC_DCTX_ALIGNED p->eobs[block] = eob; p->txb_entropy_ctx[block] = @@ -3360,7 +3730,12 @@ MACROBLOCKD *xd, int blk_row, int blk_col, int plane, TX_SIZE tx_size, FRAME_COUNTS *counts, uint8_t allow_update_cdf, - int eob) { + int eob +#if CONFIG_ATC_DCTX_ALIGNED + , + int bob_code, int is_fsc +#endif // CONFIG_ATC_DCTX_ALIGNED +) { MB_MODE_INFO *mbmi = xd->mi[0]; int is_inter = is_inter_block(mbmi, xd->tree_type); const int reduced_tx_set_used = cm->features.reduced_tx_set_used; @@ -3397,19 +3772,39 @@ const TxSetType tx_set_type = av1_get_ext_tx_set_type(tx_size, is_inter, reduced_tx_set_used); if (is_inter) { +#if CONFIG_ATC_DCTX_ALIGNED + const int esc_eob = is_fsc ? bob_code : eob; + const int eob_tx_ctx = + get_lp2tx_ctx(tx_size, get_txb_bwl(tx_size), esc_eob); + if (allow_update_cdf) { + update_cdf( + fc->inter_ext_tx_cdf[eset][eob_tx_ctx][txsize_sqr_map[tx_size]], + av1_ext_tx_ind[tx_set_type][tx_type], + av1_num_ext_tx_set[tx_set_type]); + } +#else if (allow_update_cdf) { update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]], av1_ext_tx_ind[tx_set_type][tx_type], av1_num_ext_tx_set[tx_set_type]); } +#endif // CONFIG_ATC_DCTX_ALIGNED #if CONFIG_ENTROPY_STATS +#if CONFIG_ATC_DCTX_ALIGNED + ++counts->inter_ext_tx[eset][eob_tx_ctx][txsize_sqr_map[tx_size]] + [av1_ext_tx_ind[tx_set_type][tx_type]]; +#else ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]] [av1_ext_tx_ind[tx_set_type][tx_type]]; +#endif // CONFIG_ATC_DCTX_ALIGNED #endif // CONFIG_ENTROPY_STATS } else { if (mbmi->fsc_mode[xd->tree_type == CHROMA_PART] && allow_update_cdf) { return; } +#if CONFIG_ATC_DCTX_ALIGNED + if (eob == 1 && allow_update_cdf) return; +#endif // CONFIG_ATC_DCTX_ALIGNED PREDICTION_MODE intra_dir; if (mbmi->filter_intra_mode_info.use_filter_intra) intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info @@ -3418,7 +3813,7 @@ intra_dir = mbmi->mode; #if CONFIG_ENTROPY_STATS const TX_TYPE primary_tx_type = get_primary_tx_type(tx_type); -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir] [av1_tx_type_to_idx(primary_tx_type, tx_set_type, intra_dir, @@ -3427,7 +3822,7 @@ ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir] [av1_ext_tx_ind_intra[tx_set_type] [primary_tx_type]]; -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC #endif // CONFIG_ENTROPY_STATS if (allow_update_cdf) { update_cdf( @@ -3437,12 +3832,12 @@ #else fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][intra_dir], #endif // CONFIG_ATC_REDUCED_TXSET -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC av1_tx_type_to_idx(get_primary_tx_type(tx_type), tx_set_type, intra_dir, av1_size_class[tx_size]), #else av1_ext_tx_ind_intra[tx_set_type][get_primary_tx_type(tx_type)], -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC #if CONFIG_ATC_REDUCED_TXSET cm->features.reduced_tx_set_used ? av1_num_reduced_tx_set @@ -3465,6 +3860,9 @@ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) && cm->seq_params.enable_ist && block_signals_sec_tx_type(xd, tx_size, tx_type, eob)) { +#if CONFIG_ATC_DCTX_ALIGNED + if (eob == 1 && allow_update_cdf) return; +#endif // CONFIG_ATC_DCTX_ALIGNED if (allow_update_cdf) update_cdf(fc->stx_cdf[txsize_sqr_map[tx_size]], (int8_t)get_secondary_tx_type(tx_type), STX_TYPES); @@ -3483,6 +3881,9 @@ struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; const int eob = p->eobs[block]; +#if CONFIG_ATC_DCTX_ALIGNED + const int bob_code = p->bobs[block]; +#endif // CONFIG_ATC_DCTX_ALIGNED const int block_offset = BLOCK_OFFSET(block); tran_low_t *qcoeff = p->qcoeff + block_offset; const PLANE_TYPE plane_type = pd->plane_type; @@ -3520,6 +3921,11 @@ uint8_t *const entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset; entropy_ctx[block] = txb_ctx.txb_skip_ctx; eob_txb[block] = eob; +#if CONFIG_ATC_DCTX_ALIGNED + uint16_t *bob_txb = cb_coef_buff->bobs[plane] + txb_offset; + bob_txb[block] = bob_code; +#endif // CONFIG_ATC_DCTX_ALIGNED + if (eob == 0) { av1_set_entropy_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col, blk_row); @@ -3539,23 +3945,67 @@ int8_t *const signs = set_signs(signs_buf, width); av1_txb_init_levels_signs(tcoeff, width, height, levels_buf, signs_buf); update_tx_type_count(cpi, cm, xd, blk_row, blk_col, plane, tx_size, - td->counts, allow_update_cdf, eob); + td->counts, allow_update_cdf, eob +#if CONFIG_ATC_DCTX_ALIGNED + , + bob_code, 1 /* is_fsc */ +#endif // CONFIG_ATC_DCTX_ALIGNED + ); const int16_t *const scan = scan_order->scan; // record tx type usage td->rd_counts.tx_type_used[tx_size][get_primary_tx_type(tx_type)]++; +#if CONFIG_ATC_DCTX_ALIGNED + int bob = av1_get_max_eob(tx_size) - bob_code; +#if CONFIG_ENTROPY_STATS + av1_update_eob_context(cdf_idx, bob_code, tx_size, plane_type, ec_ctx, + td->counts, allow_update_cdf); +#else + av1_update_eob_context(bob_code, tx_size, plane_type, ec_ctx, + allow_update_cdf); +#endif +#endif // CONFIG_ATC_DCTX_ALIGNED DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); +#if CONFIG_ATC_DCTX_ALIGNED + av1_get_nz_map_contexts_skip_c(levels, scan, bob, eob, tx_size, + coeff_contexts); +#else av1_get_nz_map_contexts_skip(levels, scan, eob, tx_size, coeff_contexts); +#endif // CONFIG_ATC_DCTX_ALIGNED +#if CONFIG_ATC_DCTX_ALIGNED + for (int c = bob; c < eob; ++c) { +#else for (int c = 0; c < eob; c++) { +#endif // CONFIG_ATC_DCTX_ALIGNED const int pos = scan[c]; const int coeff_ctx = coeff_contexts[pos]; const tran_low_t v = qcoeff[pos]; const tran_low_t level = abs(v); if (allow_update_cdf) { +#if CONFIG_ATC_DCTX_ALIGNED + if (c == bob) { + update_cdf(ec_ctx->coeff_base_bob_cdf[coeff_ctx], + AOMMIN(level, 3) - 1, 3); + } else { + update_cdf(ec_ctx->coeff_base_cdf_idtx[coeff_ctx], AOMMIN(level, 3), + 4); + } +#else update_cdf(ec_ctx->coeff_base_cdf_idtx[coeff_ctx], AOMMIN(level, 3), 4); +#endif // CONFIG_ATC_DCTX_ALIGNED } #if CONFIG_ENTROPY_STATS +#if CONFIG_ATC_DCTX_ALIGNED + if (c == bob) { + ++td->counts + ->coeff_base_bob_multi[cdf_idx][coeff_ctx][AOMMIN(level, 3) - 1]; + } else { + ++td->counts + ->coeff_base_multi_skip[cdf_idx][coeff_ctx][AOMMIN(level, 3)]; + } +#else ++td->counts->coeff_base_multi_skip[cdf_idx][coeff_ctx][AOMMIN(level, 3)]; -#endif // CONFIG_ENTROPY_STATS +#endif // CONFIG_ATC_DCTX_ALIGNED +#endif if (level > NUM_BASE_LEVELS) { const int base_range = level - 1 - NUM_BASE_LEVELS; const int br_ctx = get_br_ctx_skip(levels, pos, bwl); @@ -3650,9 +4100,19 @@ struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; const int eob = p->eobs[block]; +#if CONFIG_ATC_DCTX_ALIGNED + const int bob_code = p->bobs[block]; +#endif // CONFIG_ATC_DCTX_ALIGNED const int block_offset = BLOCK_OFFSET(block); tran_low_t *qcoeff = p->qcoeff + block_offset; const PLANE_TYPE plane_type = pd->plane_type; +#if CONFIG_ATC_DCTX_ALIGNED + const int is_inter = is_inter_block(xd->mi[0], xd->tree_type); + if (eob == 1 && plane_type == 0 && + !xd->mi[0]->fsc_mode[xd->tree_type == CHROMA_PART] && !is_inter) { + update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT); + } +#endif // CONFIG_ATC_DCTX_ALIGNED const TX_TYPE tx_type = av1_get_tx_type(xd, plane_type, blk_row, blk_col, tx_size, cm->features.reduced_tx_set_used); @@ -3737,9 +4197,22 @@ uint8_t *const entropy_ctx = cb_coef_buff->entropy_ctx[plane] + txb_offset; entropy_ctx[block] = txb_ctx.txb_skip_ctx; eob_txb[block] = eob; +#if CONFIG_ATC_DCTX_ALIGNED + uint16_t *bob_txb = cb_coef_buff->bobs[plane] + txb_offset; + bob_txb[block] = bob_code; +#endif // CONFIG_ATC_DCTX_ALIGNED #if CONFIG_CROSS_CHROMA_TX - if (is_cctx_allowed(cm, xd) && plane == AOM_PLANE_U && eob > 0) +#if CONFIG_ATC_DCTX_ALIGNED + const int skip_cctx = is_inter ? 0 : (eob == 1); +#endif // CONFIG_ATC_DCTX_ALIGNED + if (is_cctx_allowed(cm, xd) && plane == AOM_PLANE_U && +#if CONFIG_ATC_DCTX_ALIGNED + !skip_cctx && eob > 0 +#else + eob > 0 +#endif // CONFIG_ATC_DCTX_ALIGNED + ) update_cctx_type_count(cm, xd, blk_row, blk_col, tx_size, td->counts, allow_update_cdf); #endif // CONFIG_CROSS_CHROMA_TX @@ -3759,7 +4232,12 @@ uint8_t *const levels = set_levels(levels_buf, width); av1_txb_init_levels(tcoeff, width, height, levels); update_tx_type_count(cpi, cm, xd, blk_row, blk_col, plane, tx_size, - td->counts, allow_update_cdf, eob); + td->counts, allow_update_cdf, eob +#if CONFIG_ATC_DCTX_ALIGNED + , + bob_code, 0 /* is_fsc */ +#endif // CONFIG_ATC_DCTX_ALIGNED + ); const TX_CLASS tx_class = tx_type_to_class[get_primary_tx_type(tx_type)]; const int16_t *const scan = scan_order->scan; @@ -3768,19 +4246,25 @@ td->rd_counts.tx_type_used[tx_size][get_primary_tx_type(tx_type)]++; #if CONFIG_ENTROPY_STATS - av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx, - td->counts, allow_update_cdf); + av1_update_eob_context(cdf_idx, eob, tx_size, +#if !CONFIG_ATC_DCTX_ALIGNED + tx_class, +#endif // !CONFIG_ATC_DCTX_ALIGNED + plane_type, ec_ctx, td->counts, allow_update_cdf); #else - av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx, - allow_update_cdf); + av1_update_eob_context(eob, tx_size, +#if !CONFIG_ATC_DCTX_ALIGNED + tx_class, +#endif // !CONFIG_ATC_DCTX_ALIGNED + plane_type, ec_ctx, allow_update_cdf); #endif DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC , plane -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC ); #if CONFIG_PAR_HIDING bool enable_parity_hiding = cm->features.allow_parity_hiding && @@ -3799,7 +4283,7 @@ if (allow_update_cdf) { if (c == eob - 1) { assert(coeff_ctx < 4); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -3818,9 +4302,9 @@ update_cdf( ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx], AOMMIN(level, 3) - 1, 3); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -3836,14 +4320,14 @@ #else update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx], AOMMIN(level, 3), 4); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } } if (c == eob - 1) { assert(coeff_ctx < 4); assert(level > 0); #if CONFIG_ENTROPY_STATS -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -3858,9 +4342,9 @@ #else ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type] [coeff_ctx][AOMMIN(level, 3) - 1]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -3875,10 +4359,10 @@ #else ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type] [coeff_ctx][AOMMIN(level, 3)]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #endif } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -3953,7 +4437,7 @@ if (k < BR_CDF_SIZE - 1) break; } } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } #if CONFIG_PAR_HIDING @@ -3986,7 +4470,7 @@ if (allow_update_cdf) { if (c == eob - 1) { assert(coeff_ctx < 4); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -4004,9 +4488,9 @@ update_cdf( ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx], AOMMIN(level, 3) - 1, 3); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -4022,13 +4506,13 @@ #else update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx], AOMMIN(level, 3), 4); -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } } if (c == eob - 1) { assert(coeff_ctx < 4); #if CONFIG_ENTROPY_STATS -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -4043,9 +4527,9 @@ #else ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type] [coeff_ctx][AOMMIN(level, 3) - 1]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -4060,10 +4544,10 @@ #else ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type] [coeff_ctx][AOMMIN(level, 3)]; -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC #endif } -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC const int row = pos >> bwl; const int col = pos - (row << bwl); int limits = get_lf_limits(row, col, tx_class, plane); @@ -4138,7 +4622,7 @@ if (k < BR_CDF_SIZE - 1) break; } } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } #endif // CONFIG_PAR_HIDING
diff --git a/av1/encoder/ethread.c b/av1/encoder/ethread.c index 6c669ac..a1d0de8 100644 --- a/av1/encoder/ethread.c +++ b/av1/encoder/ethread.c
@@ -27,10 +27,6 @@ for (int i = 0; i < REFERENCE_MODES; i++) td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i]; - for (int i = 0; i < INTER_REFS_PER_FRAME; i++) - td->rd_counts.global_motion_used[i] += - td_t->rd_counts.global_motion_used[i]; - td->rd_counts.compound_ref_used_flag |= td_t->rd_counts.compound_ref_used_flag; td->rd_counts.skip_mode_used_flag |= td_t->rd_counts.skip_mode_used_flag; @@ -470,7 +466,7 @@ &td->mb.txfm_search_info.mb_rd_record.crc_calculator); #if CONFIG_REF_MV_BANK av1_zero(td->mb.e_mbd.ref_mv_bank); -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT td->mb.e_mbd.ref_mv_bank_pt = &td->mb.e_mbd.ref_mv_bank; #endif #endif // CONFIG_REF_MV_BANK} @@ -679,7 +675,7 @@ if (i > 0) { // Set up firstpass PICK_MODE_CONTEXT. thread_data->td->firstpass_ctx = av1_alloc_pmc( - cm, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0, + cm, SHARED_PART, 0, 0, BLOCK_16X16, NULL, PARTITION_NONE, 0, cm->seq_params.subsampling_x, cm->seq_params.subsampling_y, &thread_data->td->shared_coeff_buf); @@ -814,7 +810,7 @@ } #if CONFIG_REF_MV_BANK av1_zero(thread_data->td->mb.e_mbd.ref_mv_bank); -#if !CONFIG_C043_MVP_IMPROVEMENTS +#if !CONFIG_MVP_IMPROVEMENT thread_data->td->mb.e_mbd.ref_mv_bank_pt = &thread_data->td->mb.e_mbd.ref_mv_bank; @@ -1431,7 +1427,7 @@ // source_alt_ref_frame w.r.t. ARF frames. if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search && gm_info->reference_frames[cur_dir][ref_frame_idx].distance != 0 && - cpi->common.global_motion[ref_buf_idx].wmtype != ROTZOOM) + cpi->common.global_motion[ref_buf_idx].wmtype <= TRANSLATION) job_info->early_exit[cur_dir] = 1; #if CONFIG_MULTITHREAD
diff --git a/av1/encoder/firstpass.c b/av1/encoder/firstpass.c index cf7332c..9d3f5dc 100644 --- a/av1/encoder/firstpass.c +++ b/av1/encoder/firstpass.c
@@ -221,7 +221,7 @@ } #if CONFIG_FLEX_MVRES const MvSubpelPrecision pb_mv_precision = cm->features.fr_mv_precision; -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 0; #endif #endif @@ -230,7 +230,7 @@ #if CONFIG_FLEX_MVRES av1_make_default_fullpel_ms_params( &ms_params, cpi, x, bsize, ref_mv, pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif first_pass_search_sites, fine_search_interval); @@ -688,6 +688,9 @@ xd->mi[0]->tx_size = TX_4X4; xd->mi[0]->ref_frame[0] = get_closest_pastcur_ref_index(cm); xd->mi[0]->ref_frame[1] = NONE_FRAME; +#if CONFIG_CWP + xd->mi[0]->cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP av1_enc_build_inter_predictor(cm, xd, mb_row * mb_scale, mb_col * mb_scale, NULL, bsize, AOM_PLANE_Y, AOM_PLANE_Y); av1_encode_sby_pass1(cpi, x, bsize); @@ -984,6 +987,9 @@ x->plane[i].coeff = ctx->coeff[i]; x->plane[i].qcoeff = ctx->qcoeff[i]; x->plane[i].eobs = ctx->eobs[i]; +#if CONFIG_ATC_DCTX_ALIGNED + x->plane[i].bobs = ctx->bobs[i]; +#endif // CONFIG_ATC_DCTX_ALIGNED x->plane[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; x->plane[i].dqcoeff = ctx->dqcoeff[i]; } @@ -1087,14 +1093,8 @@ cpi->is_screen_content_type = features->allow_screen_content_tools; } #if CONFIG_ADAPTIVE_DS_FILTER -#if DS_FRAME_LEVEL - if (cm->current_frame.frame_type == KEY_FRAME) { - FeatureFlags *const features = &cm->features; - av1_set_downsample_filter_options(cpi, features); -#else - if (cpi->common.current_frame.absolute_poc == 0) { + if (cpi->common.current_frame.frame_type == KEY_FRAME) { av1_set_downsample_filter_options(cpi); -#endif // DS_FRAME_LEVEL } #endif // CONFIG_ADAPTIVE_DS_FILTER // First pass coding proceeds in raster scan order with unit size of 16x16.
diff --git a/av1/encoder/global_motion.c b/av1/encoder/global_motion.c index 1f07c20..c3495e1 100644 --- a/av1/encoder/global_motion.c +++ b/av1/encoder/global_motion.c
@@ -72,7 +72,9 @@ static void convert_to_params(const double *params, int32_t *model) { int i; +#if !CONFIG_IMPROVED_GLOBAL_MOTION int alpha_present = 0; +#endif // !CONFIG_IMPROVED_GLOBAL_MOTION model[0] = (int32_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5); model[1] = (int32_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5); model[0] = (int32_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) * @@ -85,22 +87,28 @@ model[i] = (int32_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5); model[i] = (int32_t)clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX); +#if !CONFIG_IMPROVED_GLOBAL_MOTION alpha_present |= (model[i] != 0); +#endif // !CONFIG_IMPROVED_GLOBAL_MOTION model[i] = (model[i] + diag_value) * GM_ALPHA_DECODE_FACTOR; } for (; i < 8; ++i) { model[i] = (int32_t)floor(params[i] * (1 << GM_ROW3HOMO_PREC_BITS) + 0.5); model[i] = (int32_t)clamp(model[i], GM_ROW3HOMO_MIN, GM_ROW3HOMO_MAX) * GM_ROW3HOMO_DECODE_FACTOR; +#if !CONFIG_IMPROVED_GLOBAL_MOTION alpha_present |= (model[i] != 0); +#endif // !CONFIG_IMPROVED_GLOBAL_MOTION } +#if !CONFIG_IMPROVED_GLOBAL_MOTION if (!alpha_present) { if (abs(model[0]) < MIN_TRANS_THRESH && abs(model[1]) < MIN_TRANS_THRESH) { model[0] = 0; model[1] = 0; } } +#endif // !CONFIG_IMPROVED_GLOBAL_MOTION } void av1_convert_model_to_params(const double *params,
diff --git a/av1/encoder/global_motion_facade.c b/av1/encoder/global_motion_facade.c index 7e28cab..8180bf4 100644 --- a/av1/encoder/global_motion_facade.c +++ b/av1/encoder/global_motion_facade.c
@@ -30,11 +30,38 @@ const WarpedMotionParams *ref_gm, MvSubpelPrecision precision) { const int precision_loss = get_gm_precision_loss(precision); +#if CONFIG_IMPROVED_GLOBAL_MOTION + (void)precision_loss; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION #else const WarpedMotionParams *ref_gm, int allow_hp) { -#endif +#if CONFIG_IMPROVED_GLOBAL_MOTION + (void)allow_hp; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION +#endif // CONFIG_FLEX_MVRES int params_cost = 0; - int trans_bits, trans_prec_diff; +#if CONFIG_IMPROVED_GLOBAL_MOTION + const int trans_bits = GM_ABS_TRANS_BITS; + const int trans_prec_diff = GM_TRANS_PREC_DIFF; + const int trans_max = (1 << trans_bits) - 1; +#else + const int trans_bits = (gm->wmtype == TRANSLATION) +#if CONFIG_FLEX_MVRES + ? GM_ABS_TRANS_ONLY_BITS - precision_loss +#else + ? GM_ABS_TRANS_ONLY_BITS - !allow_hp +#endif + : GM_ABS_TRANS_BITS; + const int trans_prec_diff = (gm->wmtype == TRANSLATION) +#if CONFIG_FLEX_MVRES + ? GM_TRANS_ONLY_PREC_DIFF + precision_loss +#else + ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp +#endif + : GM_TRANS_PREC_DIFF; + const int trans_max = (1 << trans_bits); +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + switch (gm->wmtype) { case AFFINE: case ROTZOOM: @@ -57,29 +84,11 @@ (1 << GM_ALPHA_PREC_BITS), (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS)); } - AOM_FALLTHROUGH_INTENDED; - case TRANSLATION: - trans_bits = (gm->wmtype == TRANSLATION) -#if CONFIG_FLEX_MVRES - ? GM_ABS_TRANS_ONLY_BITS - precision_loss -#else - ? GM_ABS_TRANS_ONLY_BITS - !allow_hp -#endif - : GM_ABS_TRANS_BITS; - trans_prec_diff = (gm->wmtype == TRANSLATION) -#if CONFIG_FLEX_MVRES - ? GM_TRANS_ONLY_PREC_DIFF + precision_loss -#else - ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp -#endif - : GM_TRANS_PREC_DIFF; params_cost += aom_count_signed_primitive_refsubexpfin( - (1 << trans_bits) + 1, SUBEXPFIN_K, - (ref_gm->wmmat[0] >> trans_prec_diff), + trans_max + 1, SUBEXPFIN_K, (ref_gm->wmmat[0] >> trans_prec_diff), (gm->wmmat[0] >> trans_prec_diff)); params_cost += aom_count_signed_primitive_refsubexpfin( - (1 << trans_bits) + 1, SUBEXPFIN_K, - (ref_gm->wmmat[1] >> trans_prec_diff), + trans_max + 1, SUBEXPFIN_K, (ref_gm->wmmat[1] >> trans_prec_diff), (gm->wmmat[1] >> trans_prec_diff)); AOM_FALLTHROUGH_INTENDED; case IDENTITY: break; @@ -95,16 +104,28 @@ // For the given reference frame, computes the global motion parameters for // different motion models and finds the best. +#if CONFIG_IMPROVED_GLOBAL_MOTION +static AOM_INLINE void compute_global_motion_for_ref_frame( + AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[INTER_REFS_PER_FRAME], int frame, + int num_src_corners, int *src_corners, unsigned char *src_buffer, + MotionModel *params_by_motion, uint8_t *segment_map, + const int segment_map_w, const int segment_map_h) { +#else static AOM_INLINE void compute_global_motion_for_ref_frame( AV1_COMP *cpi, YV12_BUFFER_CONFIG *ref_buf[INTER_REFS_PER_FRAME], int frame, int num_src_corners, int *src_corners, unsigned char *src_buffer, MotionModel *params_by_motion, uint8_t *segment_map, const int segment_map_w, const int segment_map_h, const WarpedMotionParams *ref_params) { +#endif // CONFIG_IMPROVED_GLOBAL_MOTION ThreadData *const td = &cpi->td; MACROBLOCK *const x = &td->mb; AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; +#if CONFIG_IMPROVED_GLOBAL_MOTION + GlobalMotionInfo *const gm_info = &cpi->gm_info; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + int i; int src_width = cpi->source->y_width; int src_height = cpi->source->y_height; @@ -125,16 +146,21 @@ // TODO(sarahparker, debargha): Explore do_adaptive_gm_estimation = 1 const int do_adaptive_gm_estimation = 0; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref_frame_dist = get_relative_dist( + &cm->seq_params.order_hint_info, cm->current_frame.display_order_hint, + cm->cur_frame->ref_display_order_hint[frame]); +#else const int ref_frame_dist = get_relative_dist( &cm->seq_params.order_hint_info, cm->current_frame.order_hint, cm->cur_frame->ref_order_hints[frame]); +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC const GlobalMotionEstimationType gm_estimation_type = cm->seq_params.order_hint_info.enable_order_hint && abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation ? GLOBAL_MOTION_DISFLOW_BASED : GLOBAL_MOTION_FEATURE_BASED; for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) { - int64_t best_warp_error = INT64_MAX; // Initially set all params to identity. for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) { memcpy(params_by_motion[i].params, kIdentityParams, @@ -147,48 +173,82 @@ ref_buf[frame], cpi->common.seq_params.bit_depth, gm_estimation_type, inliers_by_motion, params_by_motion, RANSAC_NUM_MOTIONS); - int64_t ref_frame_error = 0; + + int64_t best_ref_frame_error = 0; + int64_t best_warp_error = INT64_MAX; for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) { if (inliers_by_motion[i] == 0) continue; params_this_motion = params_by_motion[i].params; av1_convert_model_to_params(params_this_motion, &tmp_wm_params); - if (tmp_wm_params.wmtype != IDENTITY) { - av1_compute_feature_segmentation_map( - segment_map, segment_map_w, segment_map_h, - params_by_motion[i].inliers, params_by_motion[i].num_inliers); +#if CONFIG_IMPROVED_GLOBAL_MOTION + // If the found model can be represented as a simple translation, + // then reject it. This is because translational motion is cheaper + // to signal through the standard MV coding tools, rather than through + // global motion + if (tmp_wm_params.wmtype <= TRANSLATION) continue; +#else + // For IDENTITY type models, we don't need to evaluate anything because + // all the following logic is effectively comparing the estimated model + // to an identity model. + if (tmp_wm_params.wmtype == IDENTITY) continue; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION - ref_frame_error = av1_segmented_frame_error( - xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride, - cpi->source->y_buffer, src_width, src_height, src_stride, - segment_map, segment_map_w); + av1_compute_feature_segmentation_map( + segment_map, segment_map_w, segment_map_h, + params_by_motion[i].inliers, params_by_motion[i].num_inliers); - const int64_t erroradv_threshold = - calc_erroradv_threshold(ref_frame_error); + const int64_t ref_frame_error = av1_segmented_frame_error( + xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride, + cpi->source->y_buffer, src_width, src_height, src_stride, segment_map, + segment_map_w); - const int64_t warp_error = av1_refine_integerized_param( - &tmp_wm_params, tmp_wm_params.wmtype, xd->bd, - ref_buf[frame]->y_buffer, ref_buf[frame]->y_width, - ref_buf[frame]->y_height, ref_buf[frame]->y_stride, - cpi->source->y_buffer, src_width, src_height, src_stride, - GM_REFINEMENT_COUNT, best_warp_error, segment_map, segment_map_w, - erroradv_threshold); + if (ref_frame_error == 0) continue; - if (warp_error < best_warp_error) { - best_warp_error = warp_error; - // Save the wm_params modified by - // av1_refine_integerized_param() rather than motion index to - // avoid rerunning refine() below. - memcpy(&(cm->global_motion[frame]), &tmp_wm_params, - sizeof(WarpedMotionParams)); - } + const int64_t erroradv_threshold = + calc_erroradv_threshold(ref_frame_error); + + const int64_t warp_error = av1_refine_integerized_param( + &tmp_wm_params, tmp_wm_params.wmtype, xd->bd, + ref_buf[frame]->y_buffer, ref_buf[frame]->y_width, + ref_buf[frame]->y_height, ref_buf[frame]->y_stride, + cpi->source->y_buffer, src_width, src_height, src_stride, + GM_REFINEMENT_COUNT, best_warp_error, segment_map, segment_map_w, + erroradv_threshold); + +#if CONFIG_IMPROVED_GLOBAL_MOTION + // av1_refine_integerized_param() can change the wmtype to a simpler + // model type than its input. So we need to check again to see if + // we have a translational model + if (tmp_wm_params.wmtype <= TRANSLATION) continue; +#else + // av1_refine_integerized_param() can return a simpler model type than + // its input, so re-check model type here + if (tmp_wm_params.wmtype == IDENTITY) continue; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + +#if CONFIG_IMPROVED_GLOBAL_MOTION + // Apply initial quality filter, which depends only on the error metrics + // and not the model cost + if (warp_error >= ref_frame_error * erroradv_tr) continue; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + + if (warp_error < best_warp_error) { + best_ref_frame_error = ref_frame_error; + best_warp_error = warp_error; + // Save the wm_params modified by + // av1_refine_integerized_param() rather than motion index to + // avoid rerunning refine() below. + memcpy(&(cm->global_motion[frame]), &tmp_wm_params, + sizeof(WarpedMotionParams)); } } if (cm->global_motion[frame].wmtype <= AFFINE) if (!av1_get_shear_params(&cm->global_motion[frame])) cm->global_motion[frame] = default_warp_params; +#if !CONFIG_IMPROVED_GLOBAL_MOTION if (cm->global_motion[frame].wmtype == TRANSLATION) { cm->global_motion[frame].wmmat[0] = #if CONFIG_FLEX_MVRES @@ -207,15 +267,25 @@ cm->global_motion[frame].wmmat[1]) * GM_TRANS_ONLY_DECODE_FACTOR; } +#endif // !CONFIG_IMPROVED_GLOBAL_MOTION if (cm->global_motion[frame].wmtype == IDENTITY) continue; - if (ref_frame_error == 0) continue; + // Once we get here, best_ref_frame_error must be > 0. This is because + // of the logic above, which skips over any models which have + // ref_frame_error == 0 + assert(best_ref_frame_error > 0); +#if CONFIG_IMPROVED_GLOBAL_MOTION + gm_info->erroradvantage[frame] = + (double)best_warp_error / best_ref_frame_error; + + break; +#else // If the best error advantage found doesn't meet the threshold for // this motion type, revert to IDENTITY. if (!av1_is_enough_erroradvantage( - (double)best_warp_error / ref_frame_error, + (double)best_warp_error / best_ref_frame_error, gm_get_params_cost(&cm->global_motion[frame], ref_params, #if CONFIG_FLEX_MVRES cm->features.fr_mv_precision))) { @@ -226,6 +296,7 @@ } if (cm->global_motion[frame].wmtype != IDENTITY) break; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION } aom_clear_system_state(); @@ -237,8 +308,12 @@ int num_src_corners, int *src_corners, unsigned char *src_buffer, MotionModel *params_by_motion, uint8_t *segment_map, int segment_map_w, int segment_map_h) { +#if CONFIG_IMPROVED_GLOBAL_MOTION + compute_global_motion_for_ref_frame( + cpi, ref_buf, frame, num_src_corners, src_corners, src_buffer, + params_by_motion, segment_map, segment_map_w, segment_map_h); +#else AV1_COMMON *const cm = &cpi->common; - GlobalMotionInfo *const gm_info = &cpi->gm_info; const WarpedMotionParams *ref_params = cm->prev_frame ? &cm->prev_frame->global_motion[frame] : &default_warp_params; @@ -246,16 +321,7 @@ compute_global_motion_for_ref_frame( cpi, ref_buf, frame, num_src_corners, src_corners, src_buffer, params_by_motion, segment_map, segment_map_w, segment_map_h, ref_params); - - gm_info->params_cost[frame] = - gm_get_params_cost(&cm->global_motion[frame], ref_params, -#if !CONFIG_FLEX_MVRES - cm->features.allow_high_precision_mv) + -#else - cm->features.fr_mv_precision) + -#endif - gm_info->type_cost[cm->global_motion[frame].wmtype] - - gm_info->type_cost[IDENTITY]; +#endif // CONFIG_IMPROVED_GLOBAL_MOTION } // Loops over valid reference frames and computes global motion estimation. @@ -282,7 +348,7 @@ // source_alt_ref_frame w.r.t. ARF frames. if (cpi->sf.gm_sf.prune_ref_frame_for_gm_search && reference_frame[frame].distance != 0 && - cm->global_motion[ref_frame].wmtype != ROTZOOM) + cm->global_motion[ref_frame].wmtype <= TRANSLATION) break; } } @@ -299,37 +365,26 @@ return 0; } -// Function to decide if we can skip the global motion parameter computation -// for a particular ref frame. -static AOM_INLINE int skip_gm_frame(AV1_COMMON *const cm, int refrank) { - const RefCntBuffer *const refbuf = get_ref_frame_buf(cm, refrank); - if (refbuf == NULL) return 1; - const int d0 = get_dir_rank(cm, refrank, NULL); - for (int i = 0; i < refrank; ++i) { - const int di = get_dir_rank(cm, i, NULL); - if (di == d0 && cm->global_motion[i].wmtype != IDENTITY) { - // Same direction higher ranked ref has a non-identity gm. - // Allow search if distance is smaller in this case. - return (abs(cm->ref_frames_info.ref_frame_distance[i]) > - abs(cm->ref_frames_info.ref_frame_distance[refrank])); - } - } - return 0; -} +static int disable_gm_search_based_on_stats(const AV1_COMP *const cpi) { + const GF_GROUP *gf_group = &cpi->gf_group; + int is_gm_present = 1; -// Prunes reference frames for global motion estimation based on the speed -// feature 'gm_search_type'. -static int do_gm_search_logic(SPEED_FEATURES *const sf, int refrank) { - switch (sf->gm_sf.gm_search_type) { - case GM_FULL_SEARCH: return 1; - case GM_REDUCED_REF_SEARCH_SKIP_LEV2: - return refrank < INTER_REFS_PER_FRAME - 2; - case GM_REDUCED_REF_SEARCH_SKIP_LEV3: - return refrank < INTER_REFS_PER_FRAME - 4; - case GM_DISABLE_SEARCH: return 0; - default: assert(0); + // Check number of GM models only in GF groups with ARF frames. GM param + // estimation is always done in the case of GF groups with no ARF frames (flat + // gops) + if (gf_group->arf_index > -1) { + // valid_gm_model_found is initialized to INT32_MAX in the beginning of + // every GF group. + // Therefore, GM param estimation is always done for all frames until + // at least 1 frame each of ARF_UPDATE, INTNL_ARF_UPDATE and LF_UPDATE are + // encoded in a GF group For subsequent frames, GM param estimation is + // disabled, if no valid models have been found in all the three update + // types. + is_gm_present = (cpi->valid_gm_model_found[ARF_UPDATE] != 0) || + (cpi->valid_gm_model_found[INTNL_ARF_UPDATE] != 0) || + (cpi->valid_gm_model_found[LF_UPDATE] != 0); } - return 1; + return !is_gm_present; } // Populates valid reference frames in past/future directions in @@ -344,6 +399,12 @@ const GF_GROUP *gf_group = &cpi->gf_group; int ref_pruning_enabled = is_frame_eligible_for_ref_pruning( gf_group, cpi->sf.inter_sf.selective_ref_frame, 1, gf_group->index); + int cur_frame_gm_disabled = 0; + int pyr_lvl = cm->cur_frame->pyramid_level; + + if (cpi->sf.gm_sf.disable_gm_search_based_on_stats) { + cur_frame_gm_disabled = disable_gm_search_based_on_stats(cpi); + } for (int frame = cm->ref_frames_info.num_total_refs - 1; frame >= 0; --frame) { @@ -358,7 +419,6 @@ // Skip global motion estimation for invalid ref frames if (buf == NULL || (ref_disabled && cpi->sf.hl_sf.recode_loop != DISALLOW_RECODE)) { - cpi->gm_info.params_cost[frame] = 0; continue; } else { ref_buf[frame] = &buf->buf; @@ -367,12 +427,12 @@ int prune_ref_frames = ref_pruning_enabled && prune_ref_by_selective_ref_frame(cpi, NULL, ref_frame); + int ref_pyr_lvl = buf->pyramid_level; if (ref_buf[frame]->y_crop_width == cpi->source->y_crop_width && ref_buf[frame]->y_crop_height == cpi->source->y_crop_height && - do_gm_search_logic(&cpi->sf, ref_frame[0]) && - !(cpi->sf.gm_sf.selective_ref_gm && skip_gm_frame(cm, ref_frame[0])) && - !prune_ref_frames) { + frame < cpi->sf.gm_sf.max_ref_frames && !prune_ref_frames && + ref_pyr_lvl <= pyr_lvl && !cur_frame_gm_disabled) { assert(ref_buf[frame] != NULL); const int relative_frame_dist = av1_encoder_get_relative_dist( buf->display_order_hint, cm->cur_frame->display_order_hint); @@ -420,6 +480,173 @@ } } +#if CONFIG_IMPROVED_GLOBAL_MOTION +// Select which global motion model to use as a base +static AOM_INLINE void pick_base_gm_params(AV1_COMP *cpi) { + AV1_COMMON *const cm = &cpi->common; + const SequenceHeader *const seq_params = &cm->seq_params; + GlobalMotionInfo *const gm_info = &cpi->gm_info; + int num_total_refs = cm->ref_frames_info.num_total_refs; + + int best_our_ref; + int best_their_ref; + const WarpedMotionParams *best_base_model; + int best_temporal_distance; + int best_num_models; + int best_cost; + + // Bitmask of which models we will actually use if we accept the current + // best base model + uint8_t best_enable_models; + + // First, evaluate the identity model as a base + { + int this_num_models = 0; + int this_cost = + aom_count_primitive_quniform(num_total_refs + 1, num_total_refs) + << AV1_PROB_COST_SHIFT; + uint8_t this_enable_models = 0; + + for (int frame = 0; frame < num_total_refs; frame++) { + const WarpedMotionParams *model = &cm->global_motion[frame]; + if (model->wmtype == IDENTITY) continue; + +#if CONFIG_FLEX_MVRES + int model_cost = gm_get_params_cost(model, &default_warp_params, + cm->features.fr_mv_precision); +#else + int model_cost = gm_get_params_cost(model, &default_warp_params, + cm->features.allow_high_precision_mv); +#endif // CONFIG_FLEX_MVRES + bool use_model = av1_is_enough_erroradvantage( + gm_info->erroradvantage[frame], model_cost); + + if (use_model) { + this_num_models += 1; + this_cost += model_cost; + this_enable_models |= (1 << frame); + } + } + + // Set initial values + best_our_ref = cm->ref_frames_info.num_total_refs; + best_their_ref = -1; + best_base_model = &default_warp_params; + best_temporal_distance = 1; + best_num_models = this_num_models; + best_cost = this_cost; + best_enable_models = this_enable_models; + } + + // Then try each available reference model in turn + for (int our_ref = 0; our_ref < num_total_refs; ++our_ref) { + const int ref_disabled = !(cm->ref_frame_flags & (1 << our_ref)); + RefCntBuffer *buf = get_ref_frame_buf(cm, our_ref); + // Skip looking at invalid ref frames + if (buf == NULL || + (ref_disabled && cpi->sf.hl_sf.recode_loop != DISALLOW_RECODE)) { + continue; + } + + int their_num_refs = buf->num_ref_frames; + for (int their_ref = 0; their_ref < their_num_refs; ++their_ref) { + const WarpedMotionParams *base_model = &buf->global_motion[their_ref]; + if (base_model->wmtype == IDENTITY) { + continue; + } + +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int our_ref_order_hint = buf->display_order_hint; + const int their_ref_order_hint = buf->ref_display_order_hint[their_ref]; +#else + const int our_ref_order_hint = buf->order_hint; + const int their_ref_order_hint = buf->ref_order_hints[their_ref]; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + int base_temporal_distance = + get_relative_dist(&seq_params->order_hint_info, our_ref_order_hint, + their_ref_order_hint); + + int this_num_models = 0; + int this_cost = + (aom_count_primitive_quniform(num_total_refs + 1, our_ref) + + aom_count_primitive_quniform(their_num_refs, their_ref)) + << AV1_PROB_COST_SHIFT; + uint8_t this_enable_models = 0; + + for (int frame = 0; frame < num_total_refs; frame++) { + const WarpedMotionParams *model = &cm->global_motion[frame]; + if (model->wmtype == IDENTITY) continue; + + int temporal_distance; + if (seq_params->order_hint_info.enable_order_hint) { + const RefCntBuffer *const ref_buf = get_ref_frame_buf(cm, frame); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int ref_order_hint = ref_buf->display_order_hint; + const int cur_order_hint = cm->cur_frame->display_order_hint; +#else + const int ref_order_hint = ref_buf->order_hint; + const int cur_order_hint = cm->cur_frame->order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + temporal_distance = get_relative_dist(&seq_params->order_hint_info, + cur_order_hint, ref_order_hint); + } else { + temporal_distance = 1; + } + + if (temporal_distance == 0) { + // Don't code global motion for frames at the same temporal instant + assert(model->wmtype == IDENTITY); + continue; + } + + WarpedMotionParams ref_params; + av1_scale_warp_model(base_model, base_temporal_distance, &ref_params, + temporal_distance); + +#if CONFIG_FLEX_MVRES + int model_cost = gm_get_params_cost(model, &ref_params, + cm->features.fr_mv_precision); +#else + int model_cost = gm_get_params_cost( + model, &ref_params, cm->features.allow_high_precision_mv); +#endif // CONFIG_FLEX_MVRES + bool use_model = av1_is_enough_erroradvantage( + gm_info->erroradvantage[frame], model_cost); + + if (use_model) { + this_num_models += 1; + this_cost += model_cost; + this_enable_models |= (1 << frame); + } + } + + if (this_num_models > best_num_models || + (this_num_models == best_num_models && this_cost < best_cost)) { + best_our_ref = our_ref; + best_their_ref = their_ref; + best_base_model = base_model; + best_temporal_distance = base_temporal_distance; + best_num_models = this_num_models; + best_cost = this_cost; + best_enable_models = this_enable_models; + } + } + } + + gm_info->base_model_our_ref = best_our_ref; + gm_info->base_model_their_ref = best_their_ref; + cm->base_global_motion_model = *best_base_model; + cm->base_global_motion_distance = best_temporal_distance; + + for (int frame = 0; frame < num_total_refs; frame++) { + if ((best_enable_models & (1 << frame)) == 0) { + // Disable this model + cm->global_motion[frame] = default_warp_params; + } + } +} +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + // Initializes parameters used for computing global motion. static AOM_INLINE void setup_global_motion_info_params(AV1_COMP *cpi) { GlobalMotionInfo *const gm_info = &cpi->gm_info; @@ -486,15 +713,45 @@ dealloc_global_motion_data(params_by_motion, segment_map); } +static AOM_INLINE void reset_gm_stats(AV1_COMP *cpi) { + for (int i = 0; i < FRAME_UPDATE_TYPES; i++) { + cpi->valid_gm_model_found[i] = INT32_MAX; + } +} + +// Updates frame level stats related to global motion +static AOM_INLINE void update_gm_stats(AV1_COMP *cpi) { + const GF_GROUP *gf_group = &cpi->gf_group; + FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index]; + + int is_gm_present = 0; + for (int frame = 0; frame < INTER_REFS_PER_FRAME; frame++) { + if (cpi->common.global_motion[frame].wmtype != IDENTITY) { + is_gm_present = 1; + break; + } + } + + if (cpi->valid_gm_model_found[update_type] == INT32_MAX) { + cpi->valid_gm_model_found[update_type] = is_gm_present; + } else { + cpi->valid_gm_model_found[update_type] |= is_gm_present; + } +} + // Global motion estimation for the current frame is computed.This computation // happens once per frame and the winner motion model parameters are stored in // cm->cur_frame->global_motion. void av1_compute_global_motion_facade(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; + const GF_GROUP *gf_group = &cpi->gf_group; GlobalMotionInfo *const gm_info = &cpi->gm_info; - av1_zero(cpi->td.rd_counts.global_motion_used); - av1_zero(gm_info->params_cost); + // Reset `valid_gm_model_found` at the start of each GOP + if (cpi->oxcf.tool_cfg.enable_global_motion && + cpi->sf.gm_sf.disable_gm_search_based_on_stats && gf_group->index == 0) { + reset_gm_stats(cpi); + } if (cpi->common.current_frame.frame_type == INTER_FRAME && cpi->source && cpi->oxcf.tool_cfg.enable_global_motion && !gm_info->search_done) { @@ -503,8 +760,22 @@ av1_global_motion_estimation_mt(cpi); else global_motion_estimation(cpi); + +#if CONFIG_IMPROVED_GLOBAL_MOTION + // Once we have determined the best motion model for each ref frame, + // choose the base parameters to minimize the total encoding cost + pick_base_gm_params(cpi); +#endif // CONFIG_IMPROVED_GLOBAL_MOTION + + // Check if the current frame has any valid global motion model across its + // reference frames + if (cpi->sf.gm_sf.disable_gm_search_based_on_stats) { + update_gm_stats(cpi); + } + gm_info->search_done = 1; } + memcpy(cm->cur_frame->global_motion, cm->global_motion, sizeof(cm->cur_frame->global_motion)); }
diff --git a/av1/encoder/interp_search.c b/av1/encoder/interp_search.c index d3108aa..c355ffc 100644 --- a/av1/encoder/interp_search.c +++ b/av1/encoder/interp_search.c
@@ -125,6 +125,9 @@ // to MULTITAP_SHARP, and thus is not switchable. assert(x->e_mbd.mi[0]->mode < NEAR_NEARMV_OPTFLOW); #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + assert(!x->e_mbd.mi[0]->refinemv_flag); +#endif // CONFIG_REFINEMV const int inter_filter_cost = x->mode_costs.switchable_interp_costs[ctx[0]][interp_fltr]; return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost; @@ -188,7 +191,11 @@ mbmi->interp_fltr = filter_idx; #if CONFIG_OPTFLOW_REFINEMENT const int tmp_rs = - (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)) + (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi) +#if CONFIG_REFINEMV + || mbmi->refinemv_flag +#endif // CONFIG_REFINEMV + ) ? 0 : get_switchable_rate(x, mbmi->interp_fltr, switchable_ctx); #else @@ -439,7 +446,11 @@ switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1); #if CONFIG_OPTFLOW_REFINEMENT *switchable_rate = - (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)) + (mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi) +#if CONFIG_REFINEMV + || mbmi->refinemv_flag +#endif // CONFIG_REFINEMV + ) ? 0 : get_switchable_rate(x, mbmi->interp_fltr, switchable_ctx); #else @@ -475,28 +486,54 @@ } if (!need_search) { #if CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + assert(mbmi->interp_fltr == + ((mbmi->mode >= NEAR_NEARMV_OPTFLOW || + use_opfl_refine_all(cm, mbmi) || mbmi->refinemv_flag) + ? MULTITAP_SHARP + : EIGHTTAP_REGULAR)); +#else assert(mbmi->interp_fltr == ((mbmi->mode >= NEAR_NEARMV_OPTFLOW || use_opfl_refine_all(cm, mbmi)) ? MULTITAP_SHARP : EIGHTTAP_REGULAR)); +#endif // CONFIG_REFINEMV #else assert(mbmi->interp_fltr == EIGHTTAP_REGULAR); #endif // CONFIG_OPTFLOW_REFINEMENT return 0; } if (args->modelled_rd != NULL) { +#if CONFIG_REFINEMV + int use_default_filter = mbmi->refinemv_flag +#if CONFIG_OPTFLOW_REFINEMENT + || mbmi->mode >= NEAR_NEARMV_OPTFLOW || + use_opfl_refine_all(cm, mbmi) +#endif + ; + if (has_second_ref(mbmi) && !use_default_filter) { +#else #if CONFIG_OPTFLOW_REFINEMENT if (has_second_ref(mbmi) && mbmi->mode < NEAR_NEARMV_OPTFLOW && !use_opfl_refine_all(cm, mbmi)) { #else if (has_second_ref(mbmi)) { #endif // CONFIG_OPTFLOW_REFINEMENT +#endif // CONFIG_REFINEMV +#if !CONFIG_SEP_COMP_DRL const int ref_mv_idx = mbmi->ref_mv_idx; +#endif // !CONFIG_SEP_COMP_DRL MV_REFERENCE_FRAME *refs = mbmi->ref_frame; const int mode0 = compound_ref0_mode(mbmi->mode); const int mode1 = compound_ref1_mode(mbmi->mode); +#if CONFIG_SEP_COMP_DRL + const int64_t mrd = + AOMMIN(args->modelled_rd[mode0][get_ref_mv_idx(mbmi, 0)][refs[0]], + args->modelled_rd[mode1][get_ref_mv_idx(mbmi, 1)][refs[1]]); +#else const int64_t mrd = AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]], args->modelled_rd[mode1][ref_mv_idx][refs[1]]); +#endif // CONFIG_SEP_COMP_DRL if ((*rd >> 1) > mrd && ref_best_rd < INT64_MAX) { return INT64_MAX;
diff --git a/av1/encoder/interp_search.h b/av1/encoder/interp_search.h index 8dfbe2b..d5c0a77 100644 --- a/av1/encoder/interp_search.h +++ b/av1/encoder/interp_search.h
@@ -153,6 +153,17 @@ * Index of the last set of saved stats in the interp_filter_stats array. */ int interp_filter_stats_idx; +#if CONFIG_SKIP_ME_FOR_OPFL_MODES + /*! + * Saved MV information for opfl off case. + */ + int_mv (*comp_newmv)[4][NUM_MV_PRECISIONS][2]; + /*! + * Valid status of saved MV information for opfl off case. + */ + int (*comp_newmv_valid)[4][NUM_MV_PRECISIONS]; +#endif // CONFIG_SKIP_ME_FOR_OPFL_MODES + } HandleInterModeArgs; /*!\cond */
diff --git a/av1/encoder/intra_mode_search.c b/av1/encoder/intra_mode_search.c index b2d10b5..d31870a 100644 --- a/av1/encoder/intra_mode_search.c +++ b/av1/encoder/intra_mode_search.c
@@ -10,6 +10,7 @@ * aomedia.org/license/patent-license/. */ +#include "av1/common/av1_common_int.h" #include "av1/common/reconintra.h" #include "av1/encoder/intra_mode_search.h" @@ -790,12 +791,19 @@ set_mv_precision(mbmi, mbmi->max_mv_precision); #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + #if CONFIG_EXTENDED_WARP_PREDICTION mbmi->motion_mode = SIMPLE_TRANSLATION; #endif #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST RD_STATS rd_stats_y; av1_invalid_rd_stats(&rd_stats_y); @@ -856,10 +864,15 @@ if (skippable) { rate2 -= rd_stats_y.rate; if (num_planes > 1) rate2 -= intra_search_state->rate_uv_tokenonly; +#if !CONFIG_SKIP_TXFM_OPT rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1]; - } else { +#endif // !CONFIG_SKIP_TXFM_OPT + } +#if !CONFIG_SKIP_TXFM_OPT + else { rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0]; } +#endif // !CONFIG_SKIP_TXFM_OPT this_rd = RDCOST(x->rdmult, rate2, distortion2); this_rd_cost->rate = rate2; this_rd_cost->dist = distortion2; @@ -948,6 +961,9 @@ #if CONFIG_FLEX_MVRES set_mv_precision(mbmi, mbmi->max_mv_precision); #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV mbmi->motion_mode = SIMPLE_TRANSLATION; RD_STATS rd_stats_y_fi; @@ -1026,10 +1042,20 @@ assert(mbmi->ref_frame[0] == INTRA_FRAME); const PREDICTION_MODE mode = mbmi->mode; const ModeCosts *mode_costs = &x->mode_costs; + +#if CONFIG_EXT_DIR + int mrl_ctx = get_mrl_index_ctx(xd->neighbors[0], xd->neighbors[1]); + int mrl_idx_cost = + (av1_is_directional_mode(mbmi->mode) && + cpi->common.seq_params.enable_mrls) + ? x->mode_costs.mrl_index_cost[mrl_ctx][mbmi->mrl_index] + : 0; +#else int mrl_idx_cost = (av1_is_directional_mode(mbmi->mode) && cpi->common.seq_params.enable_mrls) ? x->mode_costs.mrl_index_cost[mbmi->mrl_index] : 0; +#endif // CONFIG_EXT_DIR #if CONFIG_AIMC int mode_cost = 0; const int context = get_y_mode_idx_ctx(xd); @@ -1055,12 +1081,16 @@ const int intra_cost_penalty = av1_get_intra_cost_penalty( cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q, cm->seq_params.base_y_dc_delta_q, cm->seq_params.bit_depth); +#if !CONFIG_SKIP_TXFM_OPT const int skip_ctx = av1_get_skip_txfm_context(xd); +#endif // !CONFIG_SKIP_TXFM_OPT int known_rate = mode_cost; if (mode != DC_PRED && mode != PAETH_PRED) known_rate += intra_cost_penalty; +#if !CONFIG_SKIP_TXFM_OPT known_rate += AOMMIN(mode_costs->skip_txfm_cost[skip_ctx][0], mode_costs->skip_txfm_cost[skip_ctx][1]); +#endif // !CONFIG_SKIP_TXFM_OPT const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0); if (known_rd > best_rd) { intra_search_state->skip_intra_modes = 1; @@ -1140,10 +1170,14 @@ #if !CONFIG_AIMC if (intra_search_state->rate_uv_intra == INT_MAX) { #endif // !CONFIG_AIMC - // If no good uv-predictor had been found, search for it. - const int rate_y = rd_stats_y->skip_txfm - ? mode_costs->skip_txfm_cost[skip_ctx][1] - : rd_stats_y->rate; + // If no good uv-predictor had been found, search for it. +#if CONFIG_SKIP_TXFM_OPT + const int rate_y = rd_stats_y->rate; +#else + const int rate_y = rd_stats_y->skip_txfm + ? mode_costs->skip_txfm_cost[skip_ctx][1] + : rd_stats_y->rate; +#endif // CONFIG_SKIP_TXFM_OPT const int64_t rdy = RDCOST(x->rdmult, rate_y + mode_cost_y, rd_stats_y->dist); if (best_rd < (INT64_MAX / 2) && rdy > (best_rd + (best_rd >> 2))) { @@ -1224,8 +1258,10 @@ // Intra block is always coded as non-skip rd_stats->skip_txfm = 0; rd_stats->dist = rd_stats_y->dist + rd_stats_uv->dist; +#if !CONFIG_SKIP_TXFM_OPT // Add in the cost of the no skip flag. rd_stats->rate += mode_costs->skip_txfm_cost[skip_ctx][0]; +#endif // !CONFIG_SKIP_TXFM_OPT // Calculate the final RD estimate for this mode. const int64_t this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); // Keep record of best intra rd @@ -1349,13 +1385,23 @@ continue; if (!is_directional_mode && mrl_idx) continue; +#if !CONFIG_EXT_DIR if (best_mbmi->mrl_index == 0 && mbmi->mrl_index > 1 && av1_is_directional_mode(best_mbmi->mode) == 0) { continue; } +#endif // CONFIG_EXT_DIR +#if CONFIG_EXT_DIR + int mrl_ctx = get_mrl_index_ctx(xd->neighbors[0], xd->neighbors[1]); + int mrl_idx_cost = + (is_directional_mode && enable_mrls_flag) + ? x->mode_costs.mrl_index_cost[mrl_ctx][mbmi->mrl_index] + : 0; +#else int mrl_idx_cost = (is_directional_mode && enable_mrls_flag) ? x->mode_costs.mrl_index_cost[mbmi->mrl_index] : 0; +#endif // CONFIG_EXT_DIR #if CONFIG_AIMC mode_costs += mrl_idx_cost; #endif // CONFIG_AIMC @@ -1548,13 +1594,23 @@ continue; if (!is_directional_mode && mrl_idx) continue; +#if !CONFIG_EXT_DIR if (best_mbmi.mrl_index == 0 && mbmi->mrl_index > 1 && av1_is_directional_mode(best_mbmi.mode) == 0) { continue; } +#endif // CONFIG_EXT_DIR +#if CONFIG_EXT_DIR + int mrl_ctx = get_mrl_index_ctx(xd->neighbors[0], xd->neighbors[1]); + int mrl_idx_cost = + (is_directional_mode && enable_mrls_flag) + ? x->mode_costs.mrl_index_cost[mrl_ctx][mbmi->mrl_index] + : 0; +#else int mrl_idx_cost = (is_directional_mode && enable_mrls_flag) ? x->mode_costs.mrl_index_cost[mbmi->mrl_index] : 0; +#endif // CONFIG_EXT_DIR #if CONFIG_AIMC mode_costs += mrl_idx_cost; #endif // CONFIG_AIMC
diff --git a/av1/encoder/mcomp.c b/av1/encoder/mcomp.c index 572864a..5280a23 100644 --- a/av1/encoder/mcomp.c +++ b/av1/encoder/mcomp.c
@@ -42,7 +42,7 @@ #if CONFIG_FLEX_MVRES , MvSubpelPrecision pb_mv_precision -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT , const int is_ibc_cost #endif @@ -60,7 +60,7 @@ mv_cost_params->is_adaptive_mvd = is_adaptive_mvd; #endif // CONFIG_ADAPTIVE_MVD -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT mv_cost_params->is_ibc_cost = is_ibc_cost; #endif @@ -119,7 +119,7 @@ const MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv, #if CONFIG_FLEX_MVRES const MvSubpelPrecision pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost, #endif #endif @@ -136,6 +136,10 @@ enable_adaptive_mvd_resolution(&cpi->common, mbmi); #endif // CONFIG_ADAPTIVE_MVD +#if CONFIG_CWP + ms_params->xd = xd; +#endif // CONFIG_CWP + // High level params ms_params->bsize = bsize; ms_params->vfp = &cpi->fn_ptr[bsize]; @@ -143,12 +147,16 @@ init_ms_buffers(&ms_params->ms_buffers, x); SEARCH_METHODS search_method = mv_sf->search_method; + const int min_dim = AOMMIN(block_size_wide[bsize], block_size_high[bsize]); + const int max_dim = AOMMAX(block_size_wide[bsize], block_size_high[bsize]); if (mv_sf->use_bsize_dependent_search_method) { - const int min_dim = AOMMIN(block_size_wide[bsize], block_size_high[bsize]); if (min_dim >= 32) { search_method = get_faster_search_method(search_method); } } + if (max_dim >= 256) { + search_method = get_faster_search_method(search_method); + } #if CONFIG_FLEX_MVRES // MV search of flex MV precision is supported only for NSTEP or DIAMOND // search @@ -209,7 +217,7 @@ #endif // CONFIG_ADAPTIVE_MVD #if CONFIG_FLEX_MVRES ref_mv, pb_mv_precision -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT , is_ibc_cost #endif @@ -237,7 +245,7 @@ ms_params->allow_hp = cm->features.allow_high_precision_mv; #endif -#if CONFIG_BVCOST_UPDATE && CONFIG_FLEX_MVRES +#if CONFIG_IBC_BV_IMPROVEMENT && CONFIG_FLEX_MVRES const int is_ibc_cost = 0; #endif @@ -287,7 +295,7 @@ #if CONFIG_FLEX_MVRES ref_mv, pb_mv_precision -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT , is_ibc_cost #endif @@ -303,6 +311,10 @@ ms_params->var_params.subpel_search_type = cpi->sf.mv_sf.use_accurate_subpel_search; #endif + if (AOMMAX(block_size_wide[bsize], block_size_high[bsize]) >= 256) { + ms_params->var_params.subpel_search_type = + AOMMIN(ms_params->var_params.subpel_search_type, USE_2_TAPS); + } ms_params->var_params.w = block_size_wide[bsize]; ms_params->var_params.h = block_size_high[bsize]; @@ -471,7 +483,7 @@ #if CONFIG_ADAPTIVE_MVD const int is_adaptive_mvd, #endif -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost, #endif const MvCosts *mv_costs, int weight, int round_bits) { @@ -480,7 +492,7 @@ const int *mvjcost = is_adaptive_mvd ? mv_costs->amvd_nmv_joint_cost -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT : (is_ibc_cost ? mv_costs->dv_joint_cost : mv_costs->nmv_joint_cost); #else : mv_costs->nmv_joint_cost; @@ -488,7 +500,7 @@ const int *const *mvcost = is_adaptive_mvd ? CONVERT_TO_CONST_MVCOST(mv_costs->amvd_nmv_cost) -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT : (is_ibc_cost ? CONVERT_TO_CONST_MVCOST(mv_costs->dv_nmv_cost) : CONVERT_TO_CONST_MVCOST( mv_costs->nmv_costs[pb_mv_precision])); @@ -497,7 +509,7 @@ #endif #else -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int *mvjcost = (is_ibc_cost ? mv_costs->dv_joint_cost : mv_costs->nmv_joint_cost); const int *const *mvcost = @@ -556,7 +568,7 @@ const int is_adaptive_mvd #endif ) { -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT // For ibc block this function should not be called const int is_ibc_cost = 0; #endif @@ -565,7 +577,7 @@ #if CONFIG_ADAPTIVE_MVD is_adaptive_mvd, #endif -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif mv_costs, weight, 7); @@ -618,7 +630,7 @@ #if CONFIG_ADAPTIVE_MVD mv_cost_params->is_adaptive_mvd, #endif -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT mv_cost_params->is_ibc_cost, #endif mv_costs, mv_costs->errorperbit, @@ -677,7 +689,7 @@ const MvCosts *mv_costs = mv_cost_params->mv_costs; -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int *mvjcost = mv_cost_params->is_ibc_cost ? mv_costs->dv_joint_cost @@ -1062,7 +1074,7 @@ ((col + range) <= mv_limits->col_max); } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT int av1_get_mv_err_cost(const MV *mv, const MV_COST_PARAMS *mv_cost_params) { #if CONFIG_FLEX_MVRES return mv_err_cost(*mv, mv_cost_params); @@ -1073,7 +1085,7 @@ mv_cost_params->mv_cost_type); #endif } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT static INLINE int get_mvpred_var_cost( const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV *this_mv) { @@ -1170,6 +1182,17 @@ return bestsme; } +#if CONFIG_CWP +// Set weighting factor for two reference frames +static INLINE void set_cmp_weight(const MB_MODE_INFO *mi, int invert_mask, + DIST_WTD_COMP_PARAMS *jcp_param) { + int weight = get_cwp_idx(mi); + weight = invert_mask ? (1 << CWP_WEIGHT_BITS) - weight : weight; + jcp_param->fwd_offset = weight; + jcp_param->bck_offset = (1 << CWP_WEIGHT_BITS) - weight; +} +#endif // CONFIG_CWP + static INLINE int get_mvpred_compound_sad( const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const struct buf_2d *const src, const uint16_t *const ref_address, @@ -1187,6 +1210,16 @@ return vfp->msdf(src_buf, src_stride, ref_address, ref_stride, second_pred, mask, mask_stride, invert_mask); } else if (second_pred) { +#if CONFIG_CWP + const MB_MODE_INFO *mi = ms_params->xd->mi[0]; + if (get_cwp_idx(mi) != CWP_EQUAL) { + DIST_WTD_COMP_PARAMS jcp_param; + set_cmp_weight(mi, invert_mask, &jcp_param); + + return vfp->jsdaf(src_buf, src_stride, ref_address, ref_stride, + second_pred, &jcp_param); + } +#endif // CONFIG_CWP return vfp->sdaf(src_buf, src_stride, ref_address, ref_stride, second_pred); } else { return ms_params->sdf(src_buf, src_stride, ref_address, ref_stride); @@ -2804,7 +2837,28 @@ return var; } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_CWP +// Get the cost for compound weighted prediction +int av1_get_cwp_idx_cost(int8_t cwp_idx, const AV1_COMMON *const cm, + const MACROBLOCK *x) { + assert(cwp_idx >= CWP_MIN && cwp_idx <= CWP_MAX); + const MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mi = xd->mi[0]; + int cost = 0; + int bit_cnt = 0; + const int ctx = 0; + + const int8_t final_idx = get_cwp_coding_idx(cwp_idx, 1, cm, mi); + for (int idx = 0; idx < MAX_CWP_NUM - 1; ++idx) { + cost += x->mode_costs.cwp_idx_cost[ctx][bit_cnt][final_idx != idx]; + if (final_idx == idx) return cost; + ++bit_cnt; + } + return cost; +} +#endif // CONFIG_CWP + +#if CONFIG_IBC_BV_IMPROVEMENT int av1_get_ref_mvpred_var_cost(const AV1_COMP *cpi, const MACROBLOCKD *xd, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params) { const BLOCK_SIZE bsize = ms_params->bsize; @@ -2946,7 +3000,7 @@ } return INT_MAX; } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT int av1_intrabc_hash_search(const AV1_COMP *cpi, const MACROBLOCKD *xd, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, @@ -2973,13 +3027,13 @@ uint32_t hash_value1, hash_value2; int best_hash_cost = INT_MAX; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT int best_intrabc_mode = 0; int best_intrabc_drl_idx = 0; int_mv best_ref_bv; best_ref_bv.as_mv = *ms_params->mv_cost_params.ref_mv; MB_MODE_INFO *mbmi = xd->mi[0]; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT // for the hashMap hash_table *ref_frame_hash = &intrabc_hash_info->intrabc_hash_table; @@ -3014,7 +3068,7 @@ #endif )) continue; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT int refCost = get_mvpred_var_cost(ms_params, &hash_mv); int cur_intrabc_mode = 0; int cur_intrabc_drl_idx = 0; @@ -3035,24 +3089,24 @@ } #else const int refCost = get_mvpred_var_cost(ms_params, &hash_mv); -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT if (refCost < best_hash_cost) { best_hash_cost = refCost; *best_mv = hash_mv; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT best_intrabc_mode = cur_intrabc_mode; best_intrabc_drl_idx = cur_intrabc_drl_idx; best_ref_bv = cur_ref_bv; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT } } } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT mbmi->ref_bv = best_ref_bv; mbmi->intrabc_drl_idx = best_intrabc_drl_idx; mbmi->intrabc_mode = best_intrabc_mode; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT return best_hash_cost; } @@ -3429,9 +3483,22 @@ subpel_y_q3, ref, ref_stride, mask, mask_stride, invert_mask, xd->bd, subpel_search_type); } else { - aom_highbd_comp_avg_upsampled_pred( - xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h, subpel_x_q3, - subpel_y_q3, ref, ref_stride, xd->bd, subpel_search_type); +#if CONFIG_CWP + if (get_cwp_idx(xd->mi[0]) != CWP_EQUAL) { + DIST_WTD_COMP_PARAMS jcp_param; + set_cmp_weight(xd->mi[0], invert_mask, &jcp_param); + + aom_highbd_dist_wtd_comp_avg_upsampled_pred( + xd, cm, mi_row, mi_col, this_mv, pred, second_pred, w, h, + subpel_x_q3, subpel_y_q3, ref, ref_stride, xd->bd, &jcp_param, + subpel_search_type); + } else +#endif // CONFIG_CWP + + aom_highbd_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, + pred, second_pred, w, h, subpel_x_q3, + subpel_y_q3, ref, ref_stride, xd->bd, + subpel_search_type); } } else { aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, this_mv, pred, w, h, @@ -3914,18 +3981,11 @@ #endif // CONFIG_C071_SUBBLK_WARPMV lower_mv_precision(&ref_mv, mbmi->pb_mv_precision); // We are not signaling other_mv. So frame level precision should be okay. -#if !CONFIG_C071_SUBBLK_WARPMV - lower_mv_precision(other_mv, cm->features.fr_mv_precision); -#endif // !CONFIG_C071_SUBBLK_WARPMV #else -#if !CONFIG_C071_SUBBLK_WARPMV - lower_mv_precision(other_mv, allow_hp, - cm->features.cur_frame_force_integer_mv); -#endif // !CONFIG_C071_SUBBLK_WARPMV #endif - // How many steps to take. A round of 0 means fullpel search only, 1 means - // half-pel, and so on. + // How many steps to take. A round of 0 means fullpel search only, 1 means + // half-pel, and so on. #if CONFIG_FLEX_MVRES const int round = (mbmi->pb_mv_precision >= MV_PRECISION_ONE_PEL) ? AOMMIN(FULL_PEL - forced_stop, @@ -4163,10 +4223,7 @@ if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL) #endif lower_mv_precision(&ref_mv, mbmi->pb_mv_precision); - // We are not signaling other_mv. So frame level precision should be okay. -#if !CONFIG_C071_SUBBLK_WARPMV - lower_mv_precision(other_mv, cm->features.fr_mv_precision); -#endif // CONFIG_C071_SUBBLK_WARPMV + // We are not signaling other_mv. So frame level precision should be okay. unsigned int besterr = INT_MAX; @@ -4392,23 +4449,10 @@ // perform prediction for second MV const BLOCK_SIZE bsize = mbmi->sb_type[PLANE_TYPE_Y]; -#if CONFIG_FLEX_MVRES #if BUGFIX_AMVD_AMVR set_amvd_mv_precision(mbmi, mbmi->max_mv_precision); -#if !CONFIG_C071_SUBBLK_WARPMV - lower_mv_precision(other_mv, cm->features.fr_mv_precision); -#endif // !CONFIG_C071_SUBBLK_WARPMV #else assert(mbmi->pb_mv_precision == mbmi->max_mv_precision); -#if !CONFIG_C071_SUBBLK_WARPMV - lower_mv_precision(other_mv, mbmi->pb_mv_precision); -#endif // !CONFIG_C071_SUBBLK_WARPMV -#endif -#else -#if !CONFIG_C071_SUBBLK_WARPMV - lower_mv_precision(other_mv, allow_hp, - cm->features.cur_frame_force_integer_mv); -#endif // !CONFIG_C071_SUBBLK_WARPMV #endif // How many steps to take. A round of 0 means fullpel search only, 1 means @@ -5581,7 +5625,15 @@ int mi_row = xd->mi_row; int mi_col = xd->mi_col; - bool can_refine_mv = (mbmi->mode == NEWMV); +#if CONFIG_CWG_D067_IMPROVED_WARP + assert(IMPLIES(mbmi->warpmv_with_mvd_flag, mbmi->mode == WARPMV)); +#endif // CONFIG_CWG_D067_IMPROVED_WARP + + bool can_refine_mv = (mbmi->mode == NEWMV +#if CONFIG_CWG_D067_IMPROVED_WARP + || (mbmi->mode == WARPMV && mbmi->warpmv_with_mvd_flag) +#endif // CONFIG_CWG_D067_IMPROVED_WARP + ); const SubpelMvLimits *mv_limits = &ms_params->mv_limits; // get the base parameters
diff --git a/av1/encoder/mcomp.h b/av1/encoder/mcomp.h index e6571b2..2e57f5e 100644 --- a/av1/encoder/mcomp.h +++ b/av1/encoder/mcomp.h
@@ -92,7 +92,7 @@ #if CONFIG_ADAPTIVE_MVD int is_adaptive_mvd; #endif // CONFIG_ADAPTIVE_MVD -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT int is_ibc_cost; #endif #endif @@ -217,10 +217,10 @@ int mi_row; int mi_col; #endif // CONFIG_IBC_SR_EXT -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT MACROBLOCK *x; int ref_bv_cnt; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT MSBuffers ms_buffers; @@ -261,7 +261,7 @@ const MACROBLOCK *x, BLOCK_SIZE bsize, const MV *ref_mv, #if CONFIG_FLEX_MVRES const MvSubpelPrecision pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost, #endif #endif @@ -577,9 +577,6 @@ int row_max = av1_lower_mv_limit(GET_MV_SUBPEL(mv_limits->row_max), sub_pel_prec_shift); - const int mv_low = av1_lower_mv_limit(MV_LOW + 1, sub_pel_prec_shift); - const int mv_upp = av1_lower_mv_limit(MV_UPP - 1, sub_pel_prec_shift); - int minc = AOMMAX(col_min, low_prec_ref_mv.col - max_mv); int maxc = AOMMIN(col_max, low_prec_ref_mv.col + max_mv); int minr = AOMMAX(row_min, low_prec_ref_mv.row - max_mv); @@ -588,10 +585,10 @@ maxc = AOMMAX(minc, maxc); maxr = AOMMAX(minr, maxr); - subpel_limits->col_min = AOMMAX(mv_low + (1 << sub_pel_prec_shift), minc); - subpel_limits->col_max = AOMMIN(mv_upp - (1 << sub_pel_prec_shift), maxc); - subpel_limits->row_min = AOMMAX(mv_low + (1 << sub_pel_prec_shift), minr); - subpel_limits->row_max = AOMMIN(mv_upp - (1 << sub_pel_prec_shift), maxr); + subpel_limits->col_min = AOMMAX(MV_LOW + (1 << sub_pel_prec_shift), minc); + subpel_limits->col_max = AOMMIN(MV_UPP - (1 << sub_pel_prec_shift), maxc); + subpel_limits->row_min = AOMMAX(MV_LOW + (1 << sub_pel_prec_shift), minr); + subpel_limits->row_max = AOMMIN(MV_UPP - (1 << sub_pel_prec_shift), maxr); #else const int max_mv = GET_MV_SUBPEL(MAX_FULL_PEL_VAL); @@ -628,7 +625,13 @@ (mv.row >= mv_limits->row_min) && (mv.row <= mv_limits->row_max); } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_CWP +// Returns the cost for signaling the index of compound weighted prediction +int av1_get_cwp_idx_cost(int8_t cwp_idx, const AV1_COMMON *const cm, + const MACROBLOCK *x); +#endif // CONFIG_CWP + +#if CONFIG_IBC_BV_IMPROVEMENT // Returns the cost of using the current mv during the motion search int av1_get_mv_err_cost(const MV *mv, const MV_COST_PARAMS *mv_cost_params); @@ -654,7 +657,7 @@ int av1_get_ref_mvpred_var_cost(const struct AV1_COMP *cpi, const MACROBLOCKD *xd, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params); -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT #ifdef __cplusplus } // extern "C"
diff --git a/av1/encoder/motion_search_facade.c b/av1/encoder/motion_search_facade.c index 9b72201..b6481a4 100644 --- a/av1/encoder/motion_search_facade.c +++ b/av1/encoder/motion_search_facade.c
@@ -50,12 +50,23 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int ref_idx, int *rate_mv, int search_range, inter_mode_info *mode_info, - int_mv *best_mv) { + int_mv *best_mv +#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + , + const int_mv *warp_ref_mv +#endif // CONFIG_CWG_D067_IMPROVED_WARP +) { MACROBLOCKD *xd = &x->e_mbd; const AV1_COMMON *cm = &cpi->common; const MotionVectorSearchParams *mv_search_params = &cpi->mv_search_params; const int num_planes = av1_num_planes(cm); MB_MODE_INFO *mbmi = xd->mi[0]; +#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + MOTION_MODE backup_motion_mode = mbmi->motion_mode; + // Make the motion mode transalational, so that transalation MS can be used. + if (mbmi->mode == WARPMV) mbmi->motion_mode = SIMPLE_TRANSLATION; +#endif // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } }; int bestsme = INT_MAX; const int ref = mbmi->ref_frame[ref_idx]; @@ -97,7 +108,11 @@ } #if CONFIG_FLEX_MVRES - MV ref_mv_low_prec = av1_get_ref_mv(x, ref_idx).as_mv; + MV ref_mv_low_prec = +#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + (mbmi->mode == WARPMV) ? warp_ref_mv->as_mv : +#endif // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + av1_get_ref_mv(x, ref_idx).as_mv; #if CONFIG_C071_SUBBLK_WARPMV MV sub_mv_offset = { 0, 0 }; get_phase_from_mv(ref_mv_low_prec, &sub_mv_offset, mbmi->pb_mv_precision); @@ -229,7 +244,7 @@ mv_search_params->search_site_cfg[SS_CFG_SRC]; #if CONFIG_FLEX_MVRES const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision; -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 0; #endif #endif @@ -238,7 +253,7 @@ #if CONFIG_FLEX_MVRES av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv, pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif src_search_sites, fine_search_interval); @@ -305,6 +320,9 @@ // for the other ref_mv. if (cpi->sf.inter_sf.skip_repeated_full_newmv && mbmi->motion_mode == SIMPLE_TRANSLATION && +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->mode != WARPMV && +#endif // CONFIG_CWG_D067_IMPROVED_WARP best_mv->as_int != INVALID_MV) { int_mv this_mv; this_mv.as_mv = get_mv_from_fullmv(&best_mv->as_fullmv); @@ -320,7 +338,11 @@ this_mv.as_mv.col += sub_mv_offset.col; } #endif // CONFIG_C071_SUBBLK_WARPMV +#if CONFIG_SEP_COMP_DRL + const int ref_mv_idx = av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx); +#else const int ref_mv_idx = mbmi->ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL #if CONFIG_FLEX_MVRES const int this_mv_rate = av1_mv_bit_cost( &this_mv.as_mv, &ref_mv, pb_mv_precision, mv_costs, MV_COST_WEIGHT @@ -477,6 +499,10 @@ assert(is_this_mv_precision_compliant(best_mv->as_mv, mbmi->pb_mv_precision)); #endif // !CONFIG_C071_SUBBLK_WARPMV #endif +#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + // Restore the motion mode + if (mbmi->mode == WARPMV) mbmi->motion_mode = backup_motion_mode; +#endif // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP } #if CONFIG_FLEX_MVRES @@ -531,13 +557,13 @@ lower_mv_precision(&ref_mv_low_prec, mbmi->pb_mv_precision); const MV ref_mv = ref_mv_low_prec; -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 0; #endif av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv, pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif @@ -579,7 +605,11 @@ this_mv.as_mv.col += sub_mv_offset.col; } #endif // CONFIG_C071_SUBBLK_WARPMV +#if CONFIG_SEP_COMP_DRL + const int ref_mv_idx = av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx); +#else const int ref_mv_idx = mbmi->ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL const int this_mv_rate = av1_mv_bit_cost( &this_mv.as_mv, &ref_mv, pb_mv_precision, mv_costs, MV_COST_WEIGHT #if CONFIG_ADAPTIVE_MVD @@ -763,7 +793,7 @@ // Do full-pixel compound motion search on the current reference frame. if (id) xd->plane[plane].pre[0] = ref_yv12[id]; -#if CONFIG_FLEX_MVRES && CONFIG_BVCOST_UPDATE +#if CONFIG_FLEX_MVRES && CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 0; #endif @@ -773,7 +803,7 @@ &ref_mv[id].as_mv, #if CONFIG_FLEX_MVRES pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif #endif @@ -1020,7 +1050,7 @@ const MvCosts *mv_costs = &x->mv_costs; #if CONFIG_FLEX_MVRES MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision; -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 0; #endif #endif @@ -1178,7 +1208,7 @@ &ref_mv.as_mv, #if CONFIG_FLEX_MVRES pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif #endif @@ -1485,6 +1515,9 @@ mbmi->use_intrabc[1] = 0; #endif // CONFIG_IBC_SR_EXT +#if CONFIG_CWP + mbmi->cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP #if CONFIG_FLEX_MVRES set_default_max_mv_precision(mbmi, xd->sbi->sb_mv_precision); set_mv_precision(mbmi, mbmi->max_mv_precision); @@ -1500,6 +1533,10 @@ mbmi->bawp_flag = 0; #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref); const YV12_BUFFER_CONFIG *scaled_ref_frame = av1_get_scaled_ref_frame(cpi, ref); @@ -1530,7 +1567,7 @@ const int fine_search_interval = use_fine_search_interval(cpi); #if CONFIG_FLEX_MVRES const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision; -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 0; #endif #endif @@ -1539,7 +1576,7 @@ av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv, #if CONFIG_FLEX_MVRES pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif #endif @@ -1677,7 +1714,7 @@ #if CONFIG_FLEX_MVRES const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision; -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 0; #endif #endif @@ -1686,7 +1723,7 @@ av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, &ref_mv, #if CONFIG_FLEX_MVRES pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif #endif
diff --git a/av1/encoder/motion_search_facade.h b/av1/encoder/motion_search_facade.h index e116997..066c438 100644 --- a/av1/encoder/motion_search_facade.h +++ b/av1/encoder/motion_search_facade.h
@@ -35,7 +35,13 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int ref_idx, int *rate_mv, int search_range, inter_mode_info *mode_info, - int_mv *best_mv); + int_mv *best_mv +#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + , + const int_mv *warp_ref_mv +#endif // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP +); + #if CONFIG_FLEX_MVRES void av1_single_motion_search_high_precision(const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
diff --git a/av1/encoder/mv_prec.c b/av1/encoder/mv_prec.c index 2ed092d..2b0f175 100644 --- a/av1/encoder/mv_prec.c +++ b/av1/encoder/mv_prec.c
@@ -28,23 +28,42 @@ static AOM_INLINE int_mv get_ref_mv_for_mv_stats( const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT_FRAME *mbmi_ext_frame, int ref_idx) { +#if CONFIG_SEP_COMP_DRL + const int ref_mv_idx = get_ref_mv_idx(mbmi, ref_idx); +#else const int ref_mv_idx = mbmi->ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL assert(IMPLIES(have_nearmv_newmv_in_inter_mode(mbmi->mode), has_second_ref(mbmi))); const MV_REFERENCE_FRAME *ref_frames = mbmi->ref_frame; const int8_t ref_frame_type = av1_ref_frame_type(ref_frames); +#if CONFIG_SEP_COMP_DRL + const CANDIDATE_MV *curr_ref_mv_stack = + has_second_drl(mbmi) ? mbmi_ext_frame->ref_mv_stack[ref_idx] + : mbmi_ext_frame->ref_mv_stack[0]; +#else const CANDIDATE_MV *curr_ref_mv_stack = mbmi_ext_frame->ref_mv_stack; +#endif // CONFIG_SEP_COMP_DRL if (is_inter_ref_frame(ref_frames[1])) { assert(ref_idx == 0 || ref_idx == 1); +#if CONFIG_SEP_COMP_DRL + return ref_idx && !has_second_drl(mbmi) + ? curr_ref_mv_stack[ref_mv_idx].comp_mv +#else return ref_idx ? curr_ref_mv_stack[ref_mv_idx].comp_mv - : curr_ref_mv_stack[ref_mv_idx].this_mv; +#endif // CONFIG_SEP_COMP_DRL + : curr_ref_mv_stack[ref_mv_idx].this_mv; } assert(ref_idx == 0); #if CONFIG_TIP +#if CONFIG_SEP_COMP_DRL + if (ref_mv_idx < mbmi_ext_frame->ref_mv_count[0]) { +#else if (ref_mv_idx < mbmi_ext_frame->ref_mv_count) { +#endif // CONFIG_SEP_COMP_DRL return curr_ref_mv_stack[ref_mv_idx].this_mv; } else if (is_tip_ref_frame(ref_frame_type)) { int_mv zero_mv; @@ -629,10 +648,14 @@ const int hbs_w = mi_size_wide[bsize] / 2; const int hbs_h = mi_size_high[bsize] / 2; -#if !CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + const int ebs_w = mi_size_wide[bsize] / 8; + const int ebs_h = mi_size_high[bsize] / 8; +#endif // CONFIG_UNEVEN_4WAY +#if !CONFIG_EXT_RECUR_PARTITIONS const int qbs_w = mi_size_wide[bsize] / 4; const int qbs_h = mi_size_high[bsize] / 4; -#endif // !CONFIG_H_PARTITION +#endif // !CONFIG_EXT_RECUR_PARTITIONS switch (partition) { case PARTITION_NONE: collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col); @@ -670,7 +693,68 @@ subsize, ptree->sub_tree[3]); break; #if CONFIG_EXT_RECUR_PARTITIONS -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, + ptree->sub_tree[0]); + collect_mv_stats_sb(mv_stats, cpi, mi_row + ebs_h, mi_col, bsize_med, + ptree->sub_tree[1]); + collect_mv_stats_sb(mv_stats, cpi, mi_row + 3 * ebs_h, mi_col, bsize_big, + ptree->sub_tree[2]); + collect_mv_stats_sb(mv_stats, cpi, mi_row + 7 * ebs_h, mi_col, subsize, + ptree->sub_tree[3]); + break; + } + case PARTITION_HORZ_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, + ptree->sub_tree[0]); + collect_mv_stats_sb(mv_stats, cpi, mi_row + ebs_h, mi_col, bsize_big, + ptree->sub_tree[1]); + collect_mv_stats_sb(mv_stats, cpi, mi_row + 5 * ebs_h, mi_col, bsize_med, + ptree->sub_tree[2]); + collect_mv_stats_sb(mv_stats, cpi, mi_row + 7 * ebs_h, mi_col, subsize, + ptree->sub_tree[3]); + break; + } + case PARTITION_VERT_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, + ptree->sub_tree[0]); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + ebs_w, bsize_med, + ptree->sub_tree[1]); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 3 * ebs_w, bsize_big, + ptree->sub_tree[2]); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 7 * ebs_w, subsize, + ptree->sub_tree[3]); + break; + } + case PARTITION_VERT_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, + ptree->sub_tree[0]); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + ebs_w, bsize_big, + ptree->sub_tree[1]); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 5 * ebs_w, bsize_med, + ptree->sub_tree[2]); + collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 7 * ebs_w, subsize, + ptree->sub_tree[3]); + break; + } +#endif // CONFIG_UNEVEN_4WAY case PARTITION_HORZ_3: case PARTITION_VERT_3: { for (int i = 0; i < 4; ++i) { @@ -686,28 +770,6 @@ } break; } -#else - case PARTITION_HORZ_3: { - collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, - ptree->sub_tree[0]); - collect_mv_stats_sb(mv_stats, cpi, mi_row + qbs_h, mi_col, - get_partition_subsize(bsize, PARTITION_HORZ), - ptree->sub_tree[1]); - collect_mv_stats_sb(mv_stats, cpi, mi_row + 3 * qbs_h, mi_col, subsize, - ptree->sub_tree[2]); - break; - } - case PARTITION_VERT_3: { - collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, subsize, - ptree->sub_tree[0]); - collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + qbs_w, - get_partition_subsize(bsize, PARTITION_VERT), - ptree->sub_tree[1]); - collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col + 3 * qbs_w, subsize, - ptree->sub_tree[2]); - break; - } -#endif // CONFIG_H_PARTITION #else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_HORZ_A: collect_mv_stats_b(mv_stats, cpi, mi_row, mi_col); @@ -788,14 +850,22 @@ } mv_stats->q = current_q; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + mv_stats->order = cpi->common.current_frame.display_order_hint; +#else mv_stats->order = cpi->common.current_frame.order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC mv_stats->valid = 1; } static AOM_INLINE int get_smart_mv_prec(AV1_COMP *cpi, const MV_STATS *mv_stats, int current_q) { const AV1_COMMON *cm = &cpi->common; +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + const int order_hint = cpi->common.current_frame.display_order_hint; +#else const int order_hint = cpi->common.current_frame.order_hint; +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC const int order_diff = order_hint - mv_stats->order; aom_clear_system_state(); const float area = (float)(cm->width * cm->height);
diff --git a/av1/encoder/palette.c b/av1/encoder/palette.c index efeda05..0d963ab 100644 --- a/av1/encoder/palette.c +++ b/av1/encoder/palette.c
@@ -87,7 +87,7 @@ int n_in_cache = 0; int in_cache_flags[PALETTE_MAX_SIZE]; memset(in_cache_flags, 0, sizeof(in_cache_flags)); -#if CONFIG_INDEP_PALETTE_PARSING +#if CONFIG_PALETTE_IMPROVEMENTS for (int i = 0; i < n_cache; ++i) { int duplicate = 0; for (int j = 0; j < i; j++) { @@ -96,7 +96,7 @@ if (duplicate) continue; #else for (int i = 0; i < n_cache && n_in_cache < n_colors; ++i) { -#endif // CONFIG_INDEP_PALETTE_PARSING +#endif // CONFIG_PALETTE_IMPROVEMENTS for (int j = 0; j < n_colors; ++j) { if (colors[j] == color_cache[i]) { in_cache_flags[j] = 1;
diff --git a/av1/encoder/partition_search.c b/av1/encoder/partition_search.c index f62672c..ca6a45a 100644 --- a/av1/encoder/partition_search.c +++ b/av1/encoder/partition_search.c
@@ -417,8 +417,13 @@ pd->subsampling_x, pd->subsampling_y); } mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, - cm->current_frame.order_hint, plane, pixel_c, - pixel_r, pd->width, pd->height); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + plane, pixel_c, pixel_r, pd->width, + pd->height); } } #else @@ -491,15 +496,15 @@ if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txfm_search_info.txb_split_count; } -#if CONFIG_REF_MV_BANK && !CONFIG_C043_MVP_IMPROVEMENTS -#if CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT +#if CONFIG_REF_MV_BANK && !CONFIG_MVP_IMPROVEMENT +#if CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT if (cm->seq_params.enable_refmvbank && is_inter && !is_intrabc_block(mbmi, xd->tree_type)) #else if (cm->seq_params.enable_refmvbank && is_inter) -#endif // CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT av1_update_ref_mv_bank(cm, xd, mbmi); -#endif // CONFIG_REF_MV_BANK && !CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_REF_MV_BANK && !CONFIG_MVP_IMPROVEMENT #if CONFIG_WARP_REF_LIST && !WARP_CU_BANK if (is_inter) av1_update_warp_param_bank(cm, xd, mbmi); @@ -534,12 +539,7 @@ is_cfl_allowed(xd)) { #if CONFIG_ADAPTIVE_DS_FILTER cfl_store_block(xd, mbmi->sb_type[xd->tree_type == CHROMA_PART], - mbmi->tx_size, -#if DS_FRAME_LEVEL - cm->features.ds_filter_type); -#else - cm->seq_params.enable_cfl_ds_filter); -#endif // DS_FRAME_LEVEL + mbmi->tx_size, cm->seq_params.enable_cfl_ds_filter); #else cfl_store_block(xd, mbmi->sb_type[xd->tree_type == CHROMA_PART], mbmi->tx_size); @@ -742,16 +742,16 @@ rd_cost->rate = ctx->rd_stats.rate; rd_cost->dist = ctx->rd_stats.dist; rd_cost->rdcost = ctx->rd_stats.rdcost; -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT const int is_inter = is_inter_block(&ctx->mic, xd->tree_type); -#if CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT if (cm->seq_params.enable_refmvbank && is_inter && !is_intrabc_block(&ctx->mic, xd->tree_type)) #else if (cm->seq_params.enable_refmvbank && is_inter) -#endif // CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT av1_update_ref_mv_bank(cm, xd, &ctx->mic); -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK if (is_inter) av1_update_warp_param_bank(cm, xd, &ctx->mic); #endif // WARP_CU_BANK @@ -796,6 +796,9 @@ p[i].qcoeff = ctx->qcoeff[i]; p[i].dqcoeff = ctx->dqcoeff[i]; p[i].eobs = ctx->eobs[i]; +#if CONFIG_ATC_DCTX_ALIGNED + p[i].bobs = ctx->bobs[i]; +#endif // CONFIG_ATC_DCTX_ALIGNED p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; } @@ -827,7 +830,6 @@ start_timing(cpi, av1_rd_pick_intra_mode_sb_time); #endif av1_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd.rdcost); - #if CONFIG_COLLECT_COMPONENT_TIMING end_timing(cpi, av1_rd_pick_intra_mode_sb_time); #endif @@ -847,16 +849,16 @@ #endif } -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT const int is_inter = is_inter_block(mbmi, xd->tree_type); -#if CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT if (cm->seq_params.enable_refmvbank && is_inter && !is_intrabc_block(mbmi, xd->tree_type)) #else if (cm->seq_params.enable_refmvbank && is_inter) -#endif // CONFIG_IBC_SR_EXT && !CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_SR_EXT && !CONFIG_IBC_BV_IMPROVEMENT av1_update_ref_mv_bank(cm, xd, mbmi); -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK if (is_inter) av1_update_warp_param_bank(cm, xd, mbmi); @@ -898,6 +900,34 @@ if (mbmi->mode == AMVDNEWMV) max_drl_bits = AOMMIN(max_drl_bits, 1); #endif // IMPROVED_AMVD uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); +#if CONFIG_SEP_COMP_DRL + assert(mbmi->ref_mv_idx[0] < max_drl_bits + 1); + assert(mbmi->ref_mv_idx[1] < max_drl_bits + 1); + for (int ref = 0; ref < 1 + has_second_drl(mbmi); ++ref) { + for (int idx = 0; idx < max_drl_bits; ++idx) { + const uint16_t *weight = has_second_drl(mbmi) + ? mbmi_ext->weight[mbmi->ref_frame[ref]] + : mbmi_ext->weight[ref_frame_type]; + aom_cdf_prob *drl_cdf = av1_get_drl_cdf(fc, weight, mode_ctx, idx); +#if CONFIG_ENTROPY_STATS + int drl_ctx = av1_drl_ctx(mode_ctx); + switch (idx) { + case 0: + counts->drl_mode[0][drl_ctx][mbmi->ref_mv_idx[ref] != idx]++; + break; + case 1: + counts->drl_mode[1][drl_ctx][mbmi->ref_mv_idx[ref] != idx]++; + break; + default: + counts->drl_mode[2][drl_ctx][mbmi->ref_mv_idx[ref] != idx]++; + break; + } +#endif // CONFIG_ENTROPY_STATS + update_cdf(drl_cdf, mbmi->ref_mv_idx[ref] != idx, 2); + if (mbmi->ref_mv_idx[ref] == idx) break; + } + } +#else assert(mbmi->ref_mv_idx < max_drl_bits + 1); for (int idx = 0; idx < max_drl_bits; ++idx) { aom_cdf_prob *drl_cdf = @@ -913,9 +943,10 @@ update_cdf(drl_cdf, mbmi->ref_mv_idx != idx, 2); if (mbmi->ref_mv_idx == idx) break; } +#endif // CONFIG_SEP_COMP_DRL } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT static void update_intrabc_drl_idx_stats(int max_ref_bv_num, FRAME_CONTEXT *fc, FRAME_COUNTS *counts, const MB_MODE_INFO *mbmi) { @@ -934,7 +965,32 @@ ++bit_cnt; } } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT + +#if CONFIG_CWP +// Update the stats for compound weighted prediction +static void update_cwp_idx_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts, + const AV1_COMMON *const cm, MACROBLOCKD *xd) { +#if !CONFIG_ENTROPY_STATS + (void)counts; +#endif // !CONFIG_ENTROPY_STATS + const MB_MODE_INFO *mbmi = xd->mi[0]; + + assert(mbmi->cwp_idx >= CWP_MIN && mbmi->cwp_idx <= CWP_MAX); + int bit_cnt = 0; + const int ctx = 0; + + int8_t final_idx = get_cwp_coding_idx(mbmi->cwp_idx, 1, cm, mbmi); + for (int idx = 0; idx < MAX_CWP_NUM - 1; ++idx) { +#if CONFIG_ENTROPY_STATS + counts->cwp_idx[bit_cnt][final_idx != idx]++; +#endif // CONFIG_ENTROPY_STATS + update_cdf(fc->cwp_idx_cdf[ctx][bit_cnt], final_idx != idx, 2); + if (final_idx == idx) break; + ++bit_cnt; + } +} +#endif // CONFIG_CWP #if CONFIG_EXTENDED_WARP_PREDICTION static void update_warp_delta_param_stats(int index, int value, @@ -975,7 +1031,11 @@ if (mbmi->warp_ref_idx == bit_idx) break; } } - if (allow_warp_parameter_signaling(mbmi)) { + if (allow_warp_parameter_signaling( +#if CONFIG_CWG_D067_IMPROVED_WARP + cm, +#endif // CONFIG_CWG_D067_IMPROVED_WARP + mbmi)) { #endif // CONFIG_WARP_REF_LIST const WarpedMotionParams *params = &mbmi->wm_params[0]; WarpedMotionParams base_params; @@ -1017,7 +1077,7 @@ #endif // CONFIG_WARP_REF_LIST } #endif // CONFIG_EXTENDED_WARP_PREDICTION -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT static void update_skip_drl_index_stats(int max_drl_bits, FRAME_CONTEXT *fc, FRAME_COUNTS *counts, const MB_MODE_INFO *mbmi) { @@ -1025,9 +1085,26 @@ (void)counts; #endif // !CONFIG_ENTROPY_STATS assert(have_drl_index(mbmi->mode)); +#if CONFIG_SEP_COMP_DRL + assert(get_ref_mv_idx(mbmi, 0) < max_drl_bits + 1); + assert(get_ref_mv_idx(mbmi, 1) < max_drl_bits + 1); +#else assert(mbmi->ref_mv_idx < max_drl_bits + 1); +#endif // CONFIG_SEP_COMP_DRL for (int idx = 0; idx < max_drl_bits; ++idx) { aom_cdf_prob *drl_cdf = fc->skip_drl_cdf[AOMMIN(idx, 2)]; +#if CONFIG_SEP_COMP_DRL + update_cdf(drl_cdf, mbmi->ref_mv_idx[0] != idx, 2); +#if CONFIG_ENTROPY_STATS + switch (idx) { + case 0: counts->skip_drl_mode[idx][mbmi->ref_mv_idx[0] != idx]++; break; + case 1: counts->skip_drl_mode[idx][mbmi->ref_mv_idx[0] != idx]++; break; + default: counts->skip_drl_mode[2][mbmi->ref_mv_idx[0] != idx]++; break; + } +#endif // CONFIG_ENTROPY_STATS + if (mbmi->ref_mv_idx[0] == idx) break; +#else + update_cdf(drl_cdf, mbmi->ref_mv_idx != idx, 2); #if CONFIG_ENTROPY_STATS switch (idx) { case 0: counts->skip_drl_mode[idx][mbmi->ref_mv_idx != idx]++; break; @@ -1035,11 +1112,11 @@ default: counts->skip_drl_mode[2][mbmi->ref_mv_idx != idx]++; break; } #endif // CONFIG_ENTROPY_STATS - update_cdf(drl_cdf, mbmi->ref_mv_idx != idx, 2); if (mbmi->ref_mv_idx == idx) break; +#endif // CONFIG_SEP_COMP_DRL } } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT static void update_stats(const AV1_COMMON *const cm, ThreadData *td) { MACROBLOCK *x = &td->mb; @@ -1049,6 +1126,7 @@ const CurrentFrame *const current_frame = &cm->current_frame; const BLOCK_SIZE bsize = mbmi->sb_type[xd->tree_type == CHROMA_PART]; FRAME_CONTEXT *fc = xd->tile_ctx; + const int inter_block = mbmi->ref_frame[0] != INTRA_FRAME; const int seg_ref_active = 0; if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active && @@ -1059,6 +1137,50 @@ #endif update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2); } + +#if CONFIG_SKIP_TXFM_OPT + const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type); + if (!seg_ref_active) { + if (!mbmi->skip_mode && !frame_is_intra_only(cm)) { + const int intra_inter_ctx = av1_get_intra_inter_context(xd); +#if CONFIG_ENTROPY_STATS + td->counts->intra_inter[intra_inter_ctx][inter_block]++; +#endif // CONFIG_ENTROPY_STATS + update_cdf(fc->intra_inter_cdf[intra_inter_ctx], inter_block, 2); + } + + if (!inter_block && av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) { +#if CONFIG_NEW_CONTEXT_MODELING + const int intrabc_ctx = get_intrabc_ctx(xd); + update_cdf(fc->intrabc_cdf[intrabc_ctx], use_intrabc, 2); +#if CONFIG_ENTROPY_STATS + ++td->counts->intrabc[intrabc_ctx][use_intrabc]; +#endif // CONFIG_ENTROPY_STATS +#else + update_cdf(fc->intrabc_cdf, use_intrabc, 2); +#if CONFIG_ENTROPY_STATS + ++td->counts->intrabc[use_intrabc]; +#endif // CONFIG_ENTROPY_STATS +#endif // CONFIG_NEW_CONTEXT_MODELING + } + + if (inter_block || (!inter_block && use_intrabc)) { +#if !CONFIG_SKIP_MODE_ENHANCEMENT + if (!mbmi->skip_mode) { +#endif // !CONFIG_SKIP_MODE_ENHANCEMENT + const int skip_ctx = av1_get_skip_txfm_context(xd); +#if CONFIG_ENTROPY_STATS + td->counts->skip_txfm[skip_ctx] + [mbmi->skip_txfm[xd->tree_type == CHROMA_PART]]++; +#endif + update_cdf(fc->skip_txfm_cdfs[skip_ctx], + mbmi->skip_txfm[xd->tree_type == CHROMA_PART], 2); +#if !CONFIG_SKIP_MODE_ENHANCEMENT + } +#endif // !CONFIG_SKIP_MODE_ENHANCEMENT + } + } +#else #if CONFIG_SKIP_MODE_ENHANCEMENT if (!seg_ref_active) { #else @@ -1072,6 +1194,7 @@ update_cdf(fc->skip_txfm_cdfs[skip_ctx], mbmi->skip_txfm[xd->tree_type == CHROMA_PART], 2); } +#endif // CONFIG_SKIP_TXFM_OPT #if CONFIG_ENTROPY_STATS // delta quant applies to both intra and inter @@ -1121,6 +1244,7 @@ av1_sum_intra_stats(cm, td->counts, xd, mbmi); } if (av1_allow_intrabc(cm) && xd->tree_type != CHROMA_PART) { +#if !CONFIG_SKIP_TXFM_OPT const int use_intrabc = is_intrabc_block(mbmi, xd->tree_type); #if CONFIG_NEW_CONTEXT_MODELING const int intrabc_ctx = get_intrabc_ctx(xd); @@ -1134,7 +1258,8 @@ ++td->counts->intrabc[use_intrabc]; #endif // CONFIG_ENTROPY_STATS #endif // CONFIG_NEW_CONTEXT_MODELING -#if CONFIG_BVCOST_UPDATE +#endif // !CONFIG_SKIP_TXFM_OPT +#if CONFIG_IBC_BV_IMPROVEMENT if (use_intrabc) { const int_mv ref_mv = mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv; #if CONFIG_FLEX_MVRES @@ -1153,8 +1278,8 @@ } #endif -#endif // CONFIG_BVCOST_UPDATE -#if CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT if (use_intrabc) { update_cdf(fc->intrabc_mode_cdf, mbmi->intrabc_mode, 2); #if CONFIG_ENTROPY_STATS @@ -1162,29 +1287,37 @@ #endif // CONFIG_ENTROPY_STATS update_intrabc_drl_idx_stats(MAX_REF_BV_STACK_SIZE, fc, td->counts, mbmi); } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT } #if CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode && have_drl_index(mbmi->mode)) { FRAME_COUNTS *const counts = td->counts; -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT update_skip_drl_index_stats(cm->features.max_drl_bits, fc, counts, mbmi); #else const int16_t mode_ctx_pristine = av1_mode_context_pristine(mbmi_ext->mode_context, mbmi->ref_frame); update_drl_index_stats(cm->features.max_drl_bits, mode_ctx_pristine, fc, counts, mbmi, mbmi_ext); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT } #endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_REFINEMV + if (mbmi->skip_mode && switchable_refinemv_flag(cm, mbmi)) { + const int refinemv_ctx = av1_get_refinemv_context(cm, xd, bsize); + update_cdf(fc->refinemv_flag_cdf[refinemv_ctx], mbmi->refinemv_flag, + REFINEMV_NUM_MODES); + } +#endif // CONFIG_REFINEMV + if (frame_is_intra_only(cm) || mbmi->skip_mode) return; FRAME_COUNTS *const counts = td->counts; - const int inter_block = mbmi->ref_frame[0] != INTRA_FRAME; if (!seg_ref_active) { +#if !CONFIG_SKIP_TXFM_OPT #if CONFIG_ENTROPY_STATS && !CONFIG_CONTEXT_DERIVATION counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++; #endif // CONFIG_ENTROPY_STATS && !CONFIG_CONTEXT_DERIVATION @@ -1200,6 +1333,7 @@ update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)], inter_block, 2); #endif // CONFIG_CONTEXT_DERIVATION +#endif // !CONFIG_SKIP_TXFM_OPT // If the segment reference feature is enabled we have only a single // reference frame allowed for the segment so exclude it from // the reference frame counts used to work out probabilities. @@ -1492,10 +1626,33 @@ } #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWG_D067_IMPROVED_WARP + if (allow_warpmv_with_mvd_coding(cm, mbmi)) { + update_cdf(fc->warpmv_with_mvd_flag_cdf[mbmi->sb_type[PLANE_TYPE_Y]], + mbmi->warpmv_with_mvd_flag, 2); + } else { + assert(mbmi->warpmv_with_mvd_flag == 0); + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + +#if CONFIG_REFINEMV + int is_refinemv_signaled = switchable_refinemv_flag(cm, mbmi); + if (!mbmi->skip_mode && is_refinemv_signaled) { + const int refinemv_ctx = av1_get_refinemv_context(cm, xd, bsize); + update_cdf(fc->refinemv_flag_cdf[refinemv_ctx], mbmi->refinemv_flag, + REFINEMV_NUM_MODES); + } + assert(IMPLIES(mbmi->refinemv_flag && is_refinemv_signaled, + mbmi->comp_group_idx == 0 && + mbmi->interinter_comp.type == COMPOUND_AVERAGE)); +#endif // CONFIG_REFINEMV if (has_second_ref(mbmi) #if CONFIG_OPTFLOW_REFINEMENT && mbmi->mode < NEAR_NEARMV_OPTFLOW #endif // CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_REFINEMV + && (!mbmi->refinemv_flag || !is_refinemv_signaled) +#endif // CONFIG_REFINEMV #if IMPROVED_AMVD && CONFIG_JOINT_MVD && !is_joint_amvd_coding_mode(mbmi->mode) #endif // IMPROVED_AMVD && CONFIG_JOINT_MVD @@ -1546,12 +1703,21 @@ #endif // CONFIG_WEDGE_MOD_EXT } } + +#if CONFIG_CWP + if (cm->features.enable_cwp && is_cwp_allowed(mbmi) && !mbmi->skip_mode) { + update_cwp_idx_stats(fc, td->counts, cm, xd); + } +#endif // CONFIG_CWP } } if (inter_block && cm->features.interp_filter == SWITCHABLE && - !is_warp_mode(mbmi->motion_mode) && - !is_nontrans_global_motion(xd, mbmi)) { + !is_warp_mode(mbmi->motion_mode) && !is_nontrans_global_motion(xd, mbmi) +#if CONFIG_REFINEMV + && !(mbmi->refinemv_flag || mbmi->mode >= NEAR_NEARMV_OPTFLOW) +#endif // CONFIG_REFINEMV + ) { update_filter_type_cdf(xd, mbmi); } if (inter_block && @@ -1625,11 +1791,44 @@ update_drl_index_stats(cm->features.max_drl_bits, mode_ctx_pristine, fc, counts, mbmi, mbmi_ext); } - if (have_newmv_in_inter_mode(mbmi->mode) && xd->tree_type != CHROMA_PART) { + +#if CONFIG_CWG_D067_IMPROVED_WARP + if (xd->tree_type != CHROMA_PART && mbmi->mode == WARPMV) { + if (mbmi->warpmv_with_mvd_flag) { + WarpedMotionParams ref_warp_model = + mbmi_ext + ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)] + [mbmi->warp_ref_idx] + .wm_params; + const int_mv ref_mv = + get_mv_from_wrl(xd, &ref_warp_model, mbmi->pb_mv_precision, bsize, + xd->mi_col, xd->mi_row); + assert(is_adaptive_mvd == 0); + #if CONFIG_FLEX_MVRES - const int pb_mv_precision = mbmi->pb_mv_precision; - assert(IMPLIES(cm->features.cur_frame_force_integer_mv, - pb_mv_precision == MV_PRECISION_ONE_PEL)); + av1_update_mv_stats(mbmi->mv[0].as_mv, ref_mv.as_mv, &fc->nmvc, +#if CONFIG_ADAPTIVE_MVD + is_adaptive_mvd, +#endif // CONFIG_ADAPTIVE_MVD + mbmi->pb_mv_precision); +#else + av1_update_mv_stats(&mbmi->mv[0].as_mv, &ref_mv.as_mv, &fc->nmvc, +#if CONFIG_ADAPTIVE_MVD + is_adaptive_mvd, +#endif // CONFIG_ADAPTIVE_MVD + allow_hp); +#endif + } + + } else { +#endif // CONFIG_CWG_D067_IMPROVED_WARP + + if (have_newmv_in_inter_mode(mbmi->mode) && + xd->tree_type != CHROMA_PART) { +#if CONFIG_FLEX_MVRES + const int pb_mv_precision = mbmi->pb_mv_precision; + assert(IMPLIES(cm->features.cur_frame_force_integer_mv, + pb_mv_precision == MV_PRECISION_ONE_PEL)); #else const int allow_hp = cm->features.cur_frame_force_integer_mv ? MV_SUBPEL_NONE @@ -1637,41 +1836,42 @@ #endif #if CONFIG_FLEX_MVRES - if (is_pb_mv_precision_active(cm, mbmi, bsize)) { + if (is_pb_mv_precision_active(cm, mbmi, bsize)) { #if CONFIG_ADAPTIVE_MVD - assert(!is_adaptive_mvd); + assert(!is_adaptive_mvd); #endif - assert(mbmi->most_probable_pb_mv_precision <= mbmi->max_mv_precision); - const int mpp_flag_context = av1_get_mpp_flag_context(cm, xd); - const int mpp_flag = - (mbmi->pb_mv_precision == mbmi->most_probable_pb_mv_precision); - update_cdf(fc->pb_mv_mpp_flag_cdf[mpp_flag_context], mpp_flag, 2); + assert(mbmi->most_probable_pb_mv_precision <= mbmi->max_mv_precision); + const int mpp_flag_context = av1_get_mpp_flag_context(cm, xd); + const int mpp_flag = + (mbmi->pb_mv_precision == mbmi->most_probable_pb_mv_precision); + update_cdf(fc->pb_mv_mpp_flag_cdf[mpp_flag_context], mpp_flag, 2); - if (!mpp_flag) { - const PRECISION_SET *precision_def = - &av1_mv_precision_sets[mbmi->mb_precision_set]; - int down = av1_get_pb_mv_precision_index(mbmi); - int nsymbs = precision_def->num_precisions - 1; + if (!mpp_flag) { + const PRECISION_SET *precision_def = + &av1_mv_precision_sets[mbmi->mb_precision_set]; + int down = av1_get_pb_mv_precision_index(mbmi); + int nsymbs = precision_def->num_precisions - 1; - const int down_ctx = av1_get_pb_mv_precision_down_context(cm, xd); + const int down_ctx = av1_get_pb_mv_precision_down_context(cm, xd); - update_cdf(fc->pb_mv_precision_cdf[down_ctx][mbmi->max_mv_precision - - MV_PRECISION_HALF_PEL], - down, nsymbs); + update_cdf( + fc->pb_mv_precision_cdf[down_ctx][mbmi->max_mv_precision - + MV_PRECISION_HALF_PEL], + down, nsymbs); + } } - } #endif // CONFIG_FLEX_MVRES - if (new_mv) { - for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { - const int_mv ref_mv = av1_get_ref_mv(x, ref); + if (new_mv) { + for (int ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { + const int_mv ref_mv = av1_get_ref_mv(x, ref); #if CONFIG_FLEX_MVRES - av1_update_mv_stats(mbmi->mv[ref].as_mv, ref_mv.as_mv, &fc->nmvc, + av1_update_mv_stats(mbmi->mv[ref].as_mv, ref_mv.as_mv, &fc->nmvc, #if CONFIG_ADAPTIVE_MVD - is_adaptive_mvd, + is_adaptive_mvd, #endif // CONFIG_ADAPTIVE_MVD - pb_mv_precision); + pb_mv_precision); #else av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc, #if CONFIG_ADAPTIVE_MVD @@ -1679,23 +1879,23 @@ #endif // CONFIG_ADAPTIVE_MVD allow_hp); #endif - } - } else if (have_nearmv_newmv_in_inter_mode(mbmi->mode)) { - const int ref = + } + } else if (have_nearmv_newmv_in_inter_mode(mbmi->mode)) { + const int ref = #if CONFIG_OPTFLOW_REFINEMENT - mbmi->mode == NEAR_NEWMV_OPTFLOW || + mbmi->mode == NEAR_NEWMV_OPTFLOW || #endif // CONFIG_OPTFLOW_REFINEMENT #if CONFIG_JOINT_MVD - jmvd_base_ref_list || + jmvd_base_ref_list || #endif // CONFIG_JOINT_MVD - mbmi->mode == NEAR_NEWMV; - const int_mv ref_mv = av1_get_ref_mv(x, ref); + mbmi->mode == NEAR_NEWMV; + const int_mv ref_mv = av1_get_ref_mv(x, ref); #if CONFIG_FLEX_MVRES - av1_update_mv_stats(mbmi->mv[ref].as_mv, ref_mv.as_mv, &fc->nmvc, + av1_update_mv_stats(mbmi->mv[ref].as_mv, ref_mv.as_mv, &fc->nmvc, #if CONFIG_ADAPTIVE_MVD - is_adaptive_mvd, + is_adaptive_mvd, #endif // CONFIG_ADAPTIVE_MVD - pb_mv_precision); + pb_mv_precision); #else av1_update_mv_stats(&mbmi->mv[ref].as_mv, &ref_mv.as_mv, &fc->nmvc, #if CONFIG_ADAPTIVE_MVD @@ -1703,8 +1903,12 @@ #endif // CONFIG_ADAPTIVE_MVD allow_hp); #endif + } } + +#if CONFIG_CWG_D067_IMPROVED_WARP } +#endif // CONFIG_CWG_D067_IMPROVED_WARP } } @@ -1828,9 +2032,9 @@ assert(!frame_is_intra_only(cm)); rdc->skip_mode_used_flag = 1; if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT) { -#if !CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if !CONFIG_SKIP_MODE_ENHANCEMENT assert(has_second_ref(mbmi)); -#endif // !CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // !CONFIG_SKIP_MODE_ENHANCEMENT rdc->compound_ref_used_flag = 1; } set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); @@ -1859,7 +2063,11 @@ if ((!cpi->sf.inter_sf.disable_obmc && cpi->sf.inter_sf.prune_obmc_prob_thresh > 0) || #if CONFIG_EXTENDED_WARP_PREDICTION - cpi->sf.inter_sf.prune_warped_prob_thresh > 0) { + cpi->sf.inter_sf.prune_warped_prob_thresh > 0 +#if CONFIG_CWG_D067_IMPROVED_WARP + || cpi->sf.inter_sf.prune_warpmv_prob_thresh > 0 +#endif // CONFIG_CWG_D067_IMPROVED_WARP + ) { #else (cm->features.allow_warped_motion && cpi->sf.inter_sf.prune_warped_prob_thresh > 0)) { @@ -1874,11 +2082,20 @@ if (allowed_motion_modes & (1 << OBMC_CAUSAL)) { td->rd_counts.obmc_used[bsize][mbmi->motion_mode == OBMC_CAUSAL]++; } +#if CONFIG_CWG_D067_IMPROVED_WARP + int is_warp_allowed = (allowed_motion_modes & (1 << WARPED_CAUSAL)) || + (allowed_motion_modes & (1 << WARP_DELTA)) || + (allowed_motion_modes & (1 << WARP_EXTEND)); + if (is_warp_allowed) { + td->rd_counts.warped_used[mbmi->motion_mode >= WARPED_CAUSAL]++; + } +#else if (allowed_motion_modes & (1 << WARPED_CAUSAL)) { td->rd_counts.warped_used[mbmi->motion_mode == WARPED_CAUSAL]++; } - // TODO(rachelbarker): Add counts and pruning for WARP_DELTA and - // WARP_EXTEND +#endif // CONFIG_CWG_D067_IMPROVED_WARP + // TODO(rachelbarker): Add counts and pruning for WARP_DELTA and + // WARP_EXTEND } #else const MOTION_MODE motion_allowed = motion_mode_allowed(cm, xd, mbmi); @@ -1901,7 +2118,7 @@ // frame level buffer (cpi->mbmi_ext_info.frame_base) will be used during // bitstream preparation. if (xd->tree_type != CHROMA_PART) -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT { if (mbmi->skip_mode) { const SkipModeInfo *const skip_mode_info = @@ -1928,16 +2145,20 @@ // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs. av1_copy_usable_ref_mv_stack_and_weight(xd, x->mbmi_ext, ref_frame_type); } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT + av1_copy_mbmi_ext_to_mbmi_ext_frame( x->mbmi_ext_frame, x->mbmi_ext, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SEP_COMP_DRL + mbmi, +#endif // CONFIG_SEP_COMP_DRL +#if CONFIG_SKIP_MODE_ENHANCEMENT mbmi->skip_mode, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT av1_ref_frame_type(xd->mi[0]->ref_frame)); -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT x->rdmult = origin_mult; } @@ -1949,44 +2170,27 @@ const CommonModeInfoParams *const mi_params, #if CONFIG_EXT_RECUR_PARTITIONS int disable_ext_part, -#if !CONFIG_H_PARTITION - PARTITION_TREE const *ptree, -#endif // !CONFIG_H_PARTITION PARTITION_TREE const *ptree_luma, const CHROMA_REF_INFO *chroma_ref_info, #endif // CONFIG_EXT_RECUR_PARTITIONS PARTITION_TYPE partition, const int mi_row, const int mi_col, BLOCK_SIZE bsize, const int ctx, BLOCK_SIZE sb_size) { - const int plane_index = xd->tree_type == CHROMA_PART; + const TREE_TYPE tree_type = xd->tree_type; + const int plane_index = tree_type == CHROMA_PART; FRAME_CONTEXT *fc = xd->tile_ctx; #if CONFIG_EXT_RECUR_PARTITIONS - if (!is_partition_point(bsize)) { - return; - } - if (xd->tree_type == CHROMA_PART && bsize == BLOCK_8X8) { - return; - } + const bool ss_x = xd->plane[1].subsampling_x; + const bool ss_y = xd->plane[1].subsampling_y; - const int ss_x = xd->plane[1].subsampling_x; - const int ss_y = xd->plane[1].subsampling_y; - if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) { - PARTITION_TYPE derived_partition_mode = - sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ss_x, ss_y); - assert(partition == derived_partition_mode && - "Chroma partition does not match the derived mode."); - (void)derived_partition_mode; - return; - } - - PARTITION_TYPE implied_partition; - const bool is_part_implied = is_partition_implied_at_boundary( - mi_params, xd->tree_type, ss_x, ss_y, mi_row, mi_col, bsize, - chroma_ref_info, &implied_partition); - if (is_part_implied) { - assert(partition == implied_partition && - "Partition doesn't match the implied partition at boundary."); + const PARTITION_TYPE derived_partition = + av1_get_normative_forced_partition_type(mi_params, tree_type, ss_x, ss_y, + mi_row, mi_col, bsize, ptree_luma, + chroma_ref_info); + if (derived_partition != PARTITION_INVALID) { + assert(partition == derived_partition && + "Partition does not match normatively derived partition."); return; } @@ -2016,7 +2220,7 @@ } RECT_PART_TYPE rect_type = get_rect_part_type(partition); - if (rect_type_implied_by_bsize(bsize, xd->tree_type) == RECT_INVALID) { + if (rect_type_implied_by_bsize(bsize, tree_type) == RECT_INVALID) { #if CONFIG_ENTROPY_STATS counts->rect_type[plane_index][ctx][rect_type]++; #endif // CONFIG_ENTROPY_STATS @@ -2025,7 +2229,7 @@ const bool ext_partition_allowed = !disable_ext_part && - is_ext_partition_allowed(bsize, rect_type, xd->tree_type); + is_ext_partition_allowed(bsize, rect_type, tree_type); if (ext_partition_allowed) { const bool do_ext_partition = (partition >= PARTITION_HORZ_3); #if CONFIG_ENTROPY_STATS @@ -2033,6 +2237,35 @@ #endif // CONFIG_ENTROPY_STATS update_cdf(fc->do_ext_partition_cdf[plane_index][rect_type][ctx], do_ext_partition, 2); +#if CONFIG_UNEVEN_4WAY + if (do_ext_partition) { + const bool uneven_4way_partition_allowed = + is_uneven_4way_partition_allowed(bsize, rect_type, tree_type); + if (uneven_4way_partition_allowed) { + const bool do_uneven_4way_partition = (partition >= PARTITION_HORZ_4A); +#if CONFIG_ENTROPY_STATS + counts->do_uneven_4way_partition[plane_index][rect_type][ctx] + [do_uneven_4way_partition]++; +#endif // CONFIG_ENTROPY_STATS + update_cdf( + fc->do_uneven_4way_partition_cdf[plane_index][rect_type][ctx], + do_uneven_4way_partition, 2); + if (do_uneven_4way_partition) { + const UNEVEN_4WAY_PART_TYPE uneven_4way_type = + (partition == PARTITION_HORZ_4A || partition == PARTITION_VERT_4A) + ? UNEVEN_4A + : UNEVEN_4B; +#if CONFIG_ENTROPY_STATS + counts->uneven_4way_partition_type[plane_index][rect_type][ctx] + [uneven_4way_type]++; +#endif // CONFIG_ENTROPY_STATS + update_cdf( + fc->uneven_4way_partition_type_cdf[plane_index][rect_type][ctx], + uneven_4way_type, NUM_UNEVEN_4WAY_PARTS); + } + } + } +#endif // CONFIG_UNEVEN_4WAY } #else // CONFIG_EXT_RECUR_PARTITIONS const int hbs_w = mi_size_wide[bsize] / 2; @@ -2042,8 +2275,7 @@ if (has_rows && has_cols) { int luma_split_flag = 0; int parent_block_width = block_size_wide[bsize]; - if (xd->tree_type == CHROMA_PART && - parent_block_width >= SHARED_PART_SIZE) { + if (tree_type == CHROMA_PART && parent_block_width >= SHARED_PART_SIZE) { luma_split_flag = get_luma_split_flag(bsize, mi_params, mi_row, mi_col); } if (luma_split_flag <= 3) { @@ -2147,10 +2379,14 @@ assert(bsize < BLOCK_SIZES_ALL); const int hbs_w = mi_size_wide[bsize] / 2; const int hbs_h = mi_size_high[bsize] / 2; -#if !CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + const int ebs_w = mi_size_wide[bsize] / 8; + const int ebs_h = mi_size_high[bsize] / 8; +#endif // CONFIG_UNEVEN_4WAY +#if !CONFIG_EXT_RECUR_PARTITIONS const int qbs_w = mi_size_wide[bsize] / 4; const int qbs_h = mi_size_high[bsize] / 4; -#endif // !CONFIG_H_PARTITION +#endif // !CONFIG_EXT_RECUR_PARTITIONS const int is_partition_root = is_partition_point(bsize); const int ctx = is_partition_root ? partition_plane_context(xd, mi_row, mi_col, bsize) @@ -2159,9 +2395,10 @@ const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); #if CONFIG_EXT_RECUR_PARTITIONS const bool disable_ext_part = !cm->seq_params.enable_ext_partitions; -#else - const BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); #endif // CONFIG_EXT_RECUR_PARTITIONS +#if !CONFIG_EXT_RECUR_PARTITIONS + const BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); +#endif // !CONFIG_EXT_RECUR_PARTITIONS if (subsize == BLOCK_INVALID) return; @@ -2172,11 +2409,8 @@ #endif // CONFIG_ENTROPY_STATS tile_data->allow_update_cdf, mi_params, #if CONFIG_EXT_RECUR_PARTITIONS - disable_ext_part, -#if !CONFIG_H_PARTITION - ptree, -#endif // !CONFIG_H_PARTITION - ptree_luma, &pc_tree->chroma_ref_info, + disable_ext_part, ptree_luma, + &pc_tree->chroma_ref_info, #endif // CONFIG_EXT_RECUR_PARTITIONS partition, mi_row, mi_col, bsize, ctx, cm->sb_size); @@ -2198,12 +2432,18 @@ const int ss_x = xd->plane[1].subsampling_x; const int ss_y = xd->plane[1].subsampling_y; set_chroma_ref_info( - mi_row, mi_col, ptree->index, bsize, &ptree->chroma_ref_info, - parent ? &parent->chroma_ref_info : NULL, + xd->tree_type, mi_row, mi_col, ptree->index, bsize, + &ptree->chroma_ref_info, parent ? &parent->chroma_ref_info : NULL, parent ? parent->bsize : BLOCK_INVALID, parent ? parent->partition : PARTITION_NONE, ss_x, ss_y); switch (partition) { +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + case PARTITION_HORZ_4B: + case PARTITION_VERT_4A: + case PARTITION_VERT_4B: +#endif // CONFIG_UNEVEN_4WAY case PARTITION_SPLIT: ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); @@ -2221,9 +2461,7 @@ ptree->sub_tree[0] = av1_alloc_ptree_node(ptree, 0); ptree->sub_tree[1] = av1_alloc_ptree_node(ptree, 1); ptree->sub_tree[2] = av1_alloc_ptree_node(ptree, 2); -#if CONFIG_H_PARTITION ptree->sub_tree[3] = av1_alloc_ptree_node(ptree, 3); -#endif // CONFIG_H_PARTITION break; #endif // CONFIG_EXT_RECUR_PARTITIONS default: break; @@ -2290,7 +2528,96 @@ #endif // CONFIG_EXT_RECUR_PARTITIONS break; #if CONFIG_EXT_RECUR_PARTITIONS -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, + pc_tree->horizontal4a[0], sub_tree[0], + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate); + if (mi_row + ebs_h >= cm->mi_params.mi_rows) break; + encode_sb(cpi, td, tile_data, tp, mi_row + ebs_h, mi_col, dry_run, + bsize_med, pc_tree->horizontal4a[1], sub_tree[1], + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate); + if (mi_row + 3 * ebs_h >= cm->mi_params.mi_rows) break; + encode_sb(cpi, td, tile_data, tp, mi_row + 3 * ebs_h, mi_col, dry_run, + bsize_big, pc_tree->horizontal4a[2], sub_tree[2], + track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate); + if (mi_row + 7 * ebs_h >= cm->mi_params.mi_rows) break; + encode_sb(cpi, td, tile_data, tp, mi_row + 7 * ebs_h, mi_col, dry_run, + subsize, pc_tree->horizontal4a[3], sub_tree[3], + track_ptree_luma ? ptree_luma->sub_tree[3] : NULL, rate); + break; + } + case PARTITION_HORZ_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, + pc_tree->horizontal4b[0], sub_tree[0], + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate); + if (mi_row + ebs_h >= cm->mi_params.mi_rows) break; + encode_sb(cpi, td, tile_data, tp, mi_row + ebs_h, mi_col, dry_run, + bsize_big, pc_tree->horizontal4b[1], sub_tree[1], + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate); + if (mi_row + 5 * ebs_h >= cm->mi_params.mi_rows) break; + encode_sb(cpi, td, tile_data, tp, mi_row + 5 * ebs_h, mi_col, dry_run, + bsize_med, pc_tree->horizontal4b[2], sub_tree[2], + track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate); + if (mi_row + 7 * ebs_h >= cm->mi_params.mi_rows) break; + encode_sb(cpi, td, tile_data, tp, mi_row + 7 * ebs_h, mi_col, dry_run, + subsize, pc_tree->horizontal4b[3], sub_tree[3], + track_ptree_luma ? ptree_luma->sub_tree[3] : NULL, rate); + break; + } + case PARTITION_VERT_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, + pc_tree->vertical4a[0], sub_tree[0], + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate); + if (mi_col + ebs_w >= cm->mi_params.mi_cols) break; + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + ebs_w, dry_run, + bsize_med, pc_tree->vertical4a[1], sub_tree[1], + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate); + if (mi_col + 3 * ebs_w >= cm->mi_params.mi_cols) break; + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 3 * ebs_w, dry_run, + bsize_big, pc_tree->vertical4a[2], sub_tree[2], + track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate); + if (mi_col + 7 * ebs_w >= cm->mi_params.mi_cols) break; + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 7 * ebs_w, dry_run, + subsize, pc_tree->vertical4a[3], sub_tree[3], + track_ptree_luma ? ptree_luma->sub_tree[3] : NULL, rate); + break; + } + case PARTITION_VERT_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, + pc_tree->vertical4b[0], sub_tree[0], + track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate); + if (mi_col + ebs_w >= cm->mi_params.mi_cols) break; + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + ebs_w, dry_run, + bsize_big, pc_tree->vertical4b[1], sub_tree[1], + track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate); + if (mi_col + 5 * ebs_w >= cm->mi_params.mi_cols) break; + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 5 * ebs_w, dry_run, + bsize_med, pc_tree->vertical4b[2], sub_tree[2], + track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate); + if (mi_col + 7 * ebs_w >= cm->mi_params.mi_cols) break; + encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 7 * ebs_w, dry_run, + subsize, pc_tree->vertical4b[3], sub_tree[3], + track_ptree_luma ? ptree_luma->sub_tree[3] : NULL, rate); + break; + } +#endif // CONFIG_UNEVEN_4WAY case PARTITION_HORZ_3: case PARTITION_VERT_3: { for (int i = 0; i < 4; ++i) { @@ -2315,38 +2642,6 @@ } break; } -#else - case PARTITION_HORZ_3: { - const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_HORZ); - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, - pc_tree->horizontal3[0], sub_tree[0], - track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate); - if (mi_row + qbs_h >= cm->mi_params.mi_rows) break; - encode_sb(cpi, td, tile_data, tp, mi_row + qbs_h, mi_col, dry_run, bsize3, - pc_tree->horizontal3[1], sub_tree[1], - track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate); - if (mi_row + 3 * qbs_h >= cm->mi_params.mi_rows) break; - encode_sb(cpi, td, tile_data, tp, mi_row + 3 * qbs_h, mi_col, dry_run, - subsize, pc_tree->horizontal3[2], sub_tree[2], - track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate); - break; - } - case PARTITION_VERT_3: { - const BLOCK_SIZE bsize3 = get_partition_subsize(bsize, PARTITION_VERT); - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, - pc_tree->vertical3[0], sub_tree[0], - track_ptree_luma ? ptree_luma->sub_tree[0] : NULL, rate); - if (mi_col + qbs_w >= cm->mi_params.mi_cols) break; - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + qbs_w, dry_run, bsize3, - pc_tree->vertical3[1], sub_tree[1], - track_ptree_luma ? ptree_luma->sub_tree[1] : NULL, rate); - if (mi_col + 3 * qbs_w >= cm->mi_params.mi_cols) break; - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + 3 * qbs_w, dry_run, - subsize, pc_tree->vertical3[2], sub_tree[2], - track_ptree_luma ? ptree_luma->sub_tree[2] : NULL, rate); - break; - } -#endif // CONFIG_H_PARTITION case PARTITION_SPLIT: encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, pc_tree->split[0], sub_tree[0], @@ -2443,7 +2738,7 @@ const int ss_y = cm->seq_params.subsampling_y; PARTITION_TREE *parent = ptree->parent; - set_chroma_ref_info(mi_row, mi_col, ptree->index, bsize, + set_chroma_ref_info(tree_type, mi_row, mi_col, ptree->index, bsize, &ptree->chroma_ref_info, parent ? &parent->chroma_ref_info : NULL, parent ? parent->bsize : BLOCK_INVALID, @@ -2592,13 +2887,14 @@ if (ptree) { #ifndef NDEBUG #if CONFIG_EXT_RECUR_PARTITIONS - const bool ssx = cm->cur_frame->buf.subsampling_x; - const bool ssy = cm->cur_frame->buf.subsampling_y; - PARTITION_TYPE implied_partition; - const bool is_part_implied = is_partition_implied_at_boundary( - &cm->mi_params, tree_type, ssx, ssy, mi_row, mi_col, bsize, - &ptree->chroma_ref_info, &implied_partition); - assert(IMPLIES(is_part_implied, ptree->partition == implied_partition)); + const bool ss_x = cm->cur_frame->buf.subsampling_x; + const bool ss_y = cm->cur_frame->buf.subsampling_y; + const PARTITION_TYPE derived_partition = + av1_get_normative_forced_partition_type( + &cm->mi_params, tree_type, ss_x, ss_y, mi_row, mi_col, bsize, + /* ptree_luma= */ NULL, &ptree->chroma_ref_info); + assert(IMPLIES(derived_partition != PARTITION_INVALID, + ptree->partition == derived_partition)); #endif // CONFIG_EXT_RECUR_PARTITIONS #endif // NDEBUG return ptree->partition; @@ -2681,8 +2977,8 @@ if (pc_tree->none == NULL) { pc_tree->none = - av1_alloc_pmc(cm, mi_row, mi_col, bsize, pc_tree, PARTITION_NONE, 0, - ss_x, ss_y, &td->shared_coeff_buf); + av1_alloc_pmc(cm, xd->tree_type, mi_row, mi_col, bsize, pc_tree, + PARTITION_NONE, 0, ss_x, ss_y, &td->shared_coeff_buf); } PICK_MODE_CONTEXT *ctx_none = pc_tree->none; @@ -2719,9 +3015,9 @@ for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { int x_idx = (i & 1) * hbs; int y_idx = (i >> 1) * hbs; - pc_tree->split[i] = - av1_alloc_pc_tree_node(mi_row + y_idx, mi_col + x_idx, split_subsize, - pc_tree, PARTITION_SPLIT, i, i == 3, ss_x, ss_y); + pc_tree->split[i] = av1_alloc_pc_tree_node( + xd->tree_type, mi_row + y_idx, mi_col + x_idx, split_subsize, pc_tree, + PARTITION_SPLIT, i, i == 3, ss_x, ss_y); } #endif // !CONFIG_EXT_RECUR_PARTITIONS switch (partition) { @@ -2731,11 +3027,12 @@ break; case PARTITION_HORZ: #if CONFIG_EXT_RECUR_PARTITIONS - pc_tree->horizontal[0] = av1_alloc_pc_tree_node( - mi_row, mi_col, subsize, pc_tree, PARTITION_HORZ, 0, 0, ss_x, ss_y); + pc_tree->horizontal[0] = + av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, subsize, + pc_tree, PARTITION_HORZ, 0, 0, ss_x, ss_y); pc_tree->horizontal[1] = - av1_alloc_pc_tree_node(mi_row + hbh, mi_col, subsize, pc_tree, - PARTITION_HORZ, 1, 1, ss_x, ss_y); + av1_alloc_pc_tree_node(xd->tree_type, mi_row + hbh, mi_col, subsize, + pc_tree, PARTITION_HORZ, 1, 1, ss_x, ss_y); av1_rd_use_partition(cpi, td, tile_data, mib, tp, mi_row, mi_col, subsize, &last_part_rdc.rate, &last_part_rdc.dist, 1, ptree ? ptree->sub_tree[0] : NULL, @@ -2744,8 +3041,8 @@ for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { if (pc_tree->horizontal[i] == NULL) { pc_tree->horizontal[i] = av1_alloc_pmc( - cm, mi_row + hbs * i, mi_col, subsize, pc_tree, PARTITION_HORZ, i, - ss_x, ss_y, &td->shared_coeff_buf); + cm, xd->tree_type, mi_row + hbs * i, mi_col, subsize, pc_tree, + PARTITION_HORZ, i, ss_x, ss_y, &td->shared_coeff_buf); } } pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, @@ -2781,11 +3078,12 @@ break; case PARTITION_VERT: #if CONFIG_EXT_RECUR_PARTITIONS - pc_tree->vertical[0] = av1_alloc_pc_tree_node( - mi_row, mi_col, subsize, pc_tree, PARTITION_VERT, 0, 0, ss_x, ss_y); + pc_tree->vertical[0] = + av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col, subsize, + pc_tree, PARTITION_VERT, 0, 0, ss_x, ss_y); pc_tree->vertical[1] = - av1_alloc_pc_tree_node(mi_row, mi_col + hbw, subsize, pc_tree, - PARTITION_VERT, 1, 1, ss_x, ss_y); + av1_alloc_pc_tree_node(xd->tree_type, mi_row, mi_col + hbw, subsize, + pc_tree, PARTITION_VERT, 1, 1, ss_x, ss_y); av1_rd_use_partition(cpi, td, tile_data, mib, tp, mi_row, mi_col, subsize, &last_part_rdc.rate, &last_part_rdc.dist, 1, ptree ? ptree->sub_tree[0] : NULL, @@ -2794,8 +3092,8 @@ for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) { if (pc_tree->vertical[i] == NULL) { pc_tree->vertical[i] = av1_alloc_pmc( - cm, mi_row, mi_col + hbs * i, subsize, pc_tree, PARTITION_VERT, i, - ss_x, ss_y, &td->shared_coeff_buf); + cm, xd->tree_type, mi_row, mi_col + hbs * i, subsize, pc_tree, + PARTITION_VERT, i, ss_x, ss_y, &td->shared_coeff_buf); } } pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, @@ -2842,8 +3140,8 @@ (mi_col + x_idx >= mi_params->mi_cols)) continue; pc_tree->split[i] = av1_alloc_pc_tree_node( - mi_row + y_idx, mi_col + x_idx, subsize, pc_tree, PARTITION_SPLIT, - i, i == 3, ss_x, ss_y); + xd->tree_type, mi_row + y_idx, mi_col + x_idx, subsize, pc_tree, + PARTITION_SPLIT, i, i == 3, ss_x, ss_y); av1_init_rd_stats(&tmp_rdc); av1_rd_use_partition( @@ -2860,6 +3158,12 @@ last_part_rdc.dist += tmp_rdc.dist; } break; +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + case PARTITION_HORZ_4B: + case PARTITION_VERT_4A: + case PARTITION_VERT_4B: +#endif // CONFIG_UNEVEN_4WAY case PARTITION_HORZ_3: case PARTITION_VERT_3: #else // CONFIG_EXT_RECUR_PARTITIONS @@ -2950,6 +3254,44 @@ *dist = last_part_rdc.dist; x->rdmult = orig_rdmult; } +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK +/*! \brief Contains level banks used for rdopt.*/ +typedef struct LevelBanksRDO { +#if CONFIG_MVP_IMPROVEMENT + //! The current level bank, used to restore the level bank in MACROBLOCKD. + REF_MV_BANK curr_level_bank; + //! The best level bank from the rdopt process. + REF_MV_BANK best_level_bank; +#endif // CONFIG_MVP_IMPROVEMENT +#if WARP_CU_BANK + //! The current warp, level bank, used to restore the warp level bank in + //! MACROBLOCKD. + WARP_PARAM_BANK curr_level_warp_bank; + //! The best warp level bank from the rdopt process. + WARP_PARAM_BANK best_level_warp_bank; +#endif // WARP_CU_BANK +} LevelBanksRDO; + +static AOM_INLINE void update_best_level_banks(LevelBanksRDO *level_banks, + const MACROBLOCKD *xd) { +#if CONFIG_MVP_IMPROVEMENT + level_banks->best_level_bank = xd->ref_mv_bank; +#endif // CONFIG_MVP_IMPROVEMENT +#if WARP_CU_BANK + level_banks->best_level_warp_bank = xd->warp_param_bank; +#endif // WARP_CU_BANK +} + +static AOM_INLINE void restore_level_banks(MACROBLOCKD *xd, + const LevelBanksRDO *level_banks) { +#if CONFIG_MVP_IMPROVEMENT + xd->ref_mv_bank = level_banks->curr_level_bank; +#endif // CONFIG_MVP_IMPROVEMENT +#if WARP_CU_BANK + xd->warp_param_bank = level_banks->curr_level_warp_bank; +#endif // WARP_CU_BANK +} +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK #if !CONFIG_EXT_RECUR_PARTITIONS // Try searching for an encoding for the given subblock. Returns zero if the @@ -3012,15 +3354,10 @@ PARTITION_TYPE partition, const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB], const int ab_mi_pos[SUB_PARTITIONS_AB][2] -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - REF_MV_BANK *best_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - WARP_PARAM_BANK *best_level_warp_bank -#endif // WARP_CU_BANK - + LevelBanksRDO *level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ) { const MACROBLOCK *const x = &td->mb; const MACROBLOCKD *const xd = &x->e_mbd; @@ -3045,22 +3382,46 @@ if (sum_rdc.rdcost >= best_rdc->rdcost) return false; *best_rdc = sum_rdc; -#if CONFIG_C043_MVP_IMPROVEMENTS - *best_level_bank = x->e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - *best_level_warp_bank = x->e_mbd.warp_param_bank; -#endif // WARP_CU_BANK +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK pc_tree->partitioning = partition; return true; } #endif // !CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_EXT_RECUR_PARTITIONS +static AOM_INLINE PARTITION_TYPE get_forced_partition_type( + const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, int mi_col, + BLOCK_SIZE bsize, const PARTITION_TREE *ptree_luma, + const PARTITION_TREE *template_tree, + const CHROMA_REF_INFO *chroma_ref_info) { + // Partition types forced by bitstream syntax. + const MACROBLOCKD *xd = &x->e_mbd; + const bool ss_x = cm->seq_params.subsampling_x; + const bool ss_y = cm->seq_params.subsampling_y; + const PARTITION_TYPE derived_partition = + av1_get_normative_forced_partition_type(&cm->mi_params, xd->tree_type, + ss_x, ss_y, mi_row, mi_col, bsize, + ptree_luma, chroma_ref_info); + if (derived_partition != PARTITION_INVALID) { + return derived_partition; + } + + // Partition types forced by speed_features. + if (template_tree) { + return template_tree->partition; + } + + if (should_reuse_mode(x, REUSE_PARTITION_MODE_FLAG)) { + return av1_get_prev_partition(x, mi_row, mi_col, bsize, cm->sb_size); + } + return PARTITION_INVALID; +} + static AOM_INLINE void init_allowed_partitions( PartitionSearchState *part_search_state, const PartitionCfg *part_cfg, - const CHROMA_REF_INFO *chroma_ref_info, - const CommonModeInfoParams *mi_params, TREE_TYPE tree_type) { + const CHROMA_REF_INFO *chroma_ref_info, TREE_TYPE tree_type) { const PartitionBlkParams *blk_params = &part_search_state->part_blk_params; const int mi_row = blk_params->mi_row; const int mi_col = blk_params->mi_col; @@ -3099,38 +3460,89 @@ is_bsize_geq(vert_subsize, blk_params->min_partition_size) && is_vert_size_valid; - // Boundary Handling - PARTITION_TYPE implied_partition; - const bool is_part_implied = is_partition_implied_at_boundary( - mi_params, tree_type, ss_x, ss_y, mi_row, mi_col, bsize, chroma_ref_info, - &implied_partition); - if (is_part_implied) { - part_search_state->partition_none_allowed = false; - if (implied_partition == PARTITION_HORZ) { - part_search_state->partition_rect_allowed[VERT] = false; - } else { - assert(implied_partition == PARTITION_VERT); - part_search_state->partition_rect_allowed[HORZ] = false; - } - } + const int ext_partition_allowed = part_search_state->ext_partition_allowed = + part_cfg->enable_ext_partitions && + is_ext_partition_allowed_at_bsize(bsize, tree_type); + + part_search_state->partition_3_allowed[HORZ] = + ext_partition_allowed && + get_partition_subsize(bsize, PARTITION_HORZ_3) != BLOCK_INVALID && + check_is_chroma_size_valid(tree_type, PARTITION_HORZ_3, bsize, mi_row, + mi_col, ss_x, ss_y, chroma_ref_info) && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ_3), + blk_params->min_partition_size); + + part_search_state->partition_3_allowed[VERT] = + ext_partition_allowed && + get_partition_subsize(bsize, PARTITION_VERT_3) != BLOCK_INVALID && + check_is_chroma_size_valid(tree_type, PARTITION_VERT_3, bsize, mi_row, + mi_col, ss_x, ss_y, chroma_ref_info) && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT_3), + blk_params->min_partition_size); + +#if CONFIG_UNEVEN_4WAY + part_search_state->partition_4a_allowed[HORZ] = + ext_partition_allowed && + get_partition_subsize(bsize, PARTITION_HORZ_4A) != BLOCK_INVALID && + check_is_chroma_size_valid(tree_type, PARTITION_HORZ_4A, bsize, mi_row, + mi_col, ss_x, ss_y, chroma_ref_info) && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ_4A), + blk_params->min_partition_size) && + IMPLIES(have_nz_chroma_ref_offset(bsize, PARTITION_HORZ_4A, ss_x, ss_y), + blk_params->has_7_8th_rows); + + part_search_state->partition_4b_allowed[HORZ] = + ext_partition_allowed && + get_partition_subsize(bsize, PARTITION_HORZ_4B) != BLOCK_INVALID && + check_is_chroma_size_valid(tree_type, PARTITION_HORZ_4B, bsize, mi_row, + mi_col, ss_x, ss_y, chroma_ref_info) && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ_4B), + blk_params->min_partition_size) && + IMPLIES(have_nz_chroma_ref_offset(bsize, PARTITION_HORZ_4B, ss_x, ss_y), + blk_params->has_7_8th_rows); + + part_search_state->partition_4a_allowed[VERT] = + ext_partition_allowed && + get_partition_subsize(bsize, PARTITION_VERT_4A) != BLOCK_INVALID && + check_is_chroma_size_valid(tree_type, PARTITION_VERT_4A, bsize, mi_row, + mi_col, ss_x, ss_y, chroma_ref_info) && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT_4A), + blk_params->min_partition_size) && + IMPLIES(have_nz_chroma_ref_offset(bsize, PARTITION_VERT_4A, ss_x, ss_y), + blk_params->has_7_8th_cols); + + part_search_state->partition_4b_allowed[VERT] = + ext_partition_allowed && + get_partition_subsize(bsize, PARTITION_VERT_4B) != BLOCK_INVALID && + check_is_chroma_size_valid(tree_type, PARTITION_VERT_4B, bsize, mi_row, + mi_col, ss_x, ss_y, chroma_ref_info) && + is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT_4B), + blk_params->min_partition_size) && + IMPLIES(have_nz_chroma_ref_offset(bsize, PARTITION_VERT_4B, ss_x, ss_y), + blk_params->has_7_8th_cols); +#endif // CONFIG_UNEVEN_4WAY // Reset the flag indicating whether a partition leading to a rdcost lower // than the bound best_rdc has been found. part_search_state->found_best_partition = false; } + +static const int kZeroPartitionCosts[ALL_PARTITION_TYPES]; #endif // CONFIG_EXT_RECUR_PARTITIONS // Initialize state variables of partition search used in // av1_rd_pick_partition(). static void init_partition_search_state_params( MACROBLOCK *x, AV1_COMP *const cpi, PartitionSearchState *part_search_state, #if CONFIG_EXT_RECUR_PARTITIONS - PC_TREE *pc_tree, + PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma, + const PARTITION_TREE *template_tree, int max_recursion_depth, #endif // CONFIG_EXT_RECUR_PARTITIONS int mi_row, int mi_col, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; const AV1_COMMON *const cm = &cpi->common; PartitionBlkParams *blk_params = &part_search_state->part_blk_params; const CommonModeInfoParams *const mi_params = &cpi->common.mi_params; + const TREE_TYPE tree_type = xd->tree_type; assert(bsize < BLOCK_SIZES_ALL); @@ -3163,10 +3575,19 @@ #endif // !CONFIG_EXT_RECUR_PARTITIONS blk_params->bsize = bsize; + // Chroma subsampling. + part_search_state->ss_x = x->e_mbd.plane[1].subsampling_x; + part_search_state->ss_y = x->e_mbd.plane[1].subsampling_y; + // Check if the partition corresponds to edge block. blk_params->has_rows = (blk_params->mi_row_edge < mi_params->mi_rows); blk_params->has_cols = (blk_params->mi_col_edge < mi_params->mi_cols); + const int ebw = mi_size_wide[bsize] / 8; + const int ebh = mi_size_high[bsize] / 8; + blk_params->has_7_8th_rows = (mi_row + 7 * ebh < mi_params->mi_rows); + blk_params->has_7_8th_cols = (mi_col + 7 * ebw < mi_params->mi_cols); + // Update intra partitioning related info. part_search_state->intra_part_info = &x->part_search_info; // Prepare for segmentation CNN-based partitioning for intra-frame. @@ -3188,8 +3609,16 @@ // Partition cost buffer update ModeCosts *mode_costs = &x->mode_costs; part_search_state->partition_cost = - mode_costs->partition_cost[xd->tree_type == CHROMA_PART] + mode_costs->partition_cost[tree_type == CHROMA_PART] [part_search_state->pl_ctx_idx]; +#if CONFIG_EXT_RECUR_PARTITIONS + if (av1_get_normative_forced_partition_type( + mi_params, tree_type, part_search_state->ss_x, + part_search_state->ss_y, mi_row, mi_col, bsize, ptree_luma, + &pc_tree->chroma_ref_info) != PARTITION_INVALID) { + part_search_state->partition_cost = kZeroPartitionCosts; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS // Initialize HORZ and VERT win flags as true for all split partitions. for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++) { @@ -3210,26 +3639,46 @@ // Initialize HORZ and VERT partitions to be not ready. av1_zero(part_search_state->is_rect_ctx_is_ready); - // Chroma subsampling. - part_search_state->ss_x = x->e_mbd.plane[1].subsampling_x; - part_search_state->ss_y = x->e_mbd.plane[1].subsampling_y; - // Initialize partition search flags to defaults. part_search_state->terminate_partition_search = 0; av1_zero(part_search_state->prune_rect_part); #if CONFIG_EXT_RECUR_PARTITIONS + part_search_state->partition_boundaries = NULL; + part_search_state->prune_partition_none = false; + av1_zero(part_search_state->prune_partition_3); +#if CONFIG_UNEVEN_4WAY + av1_zero(part_search_state->prune_partition_4a); + av1_zero(part_search_state->prune_partition_4b); +#endif // CONFIG_UNEVEN_4WAY + + part_search_state->forced_partition = + get_forced_partition_type(cm, x, mi_row, mi_col, bsize, ptree_luma, + template_tree, &pc_tree->chroma_ref_info); + init_allowed_partitions(part_search_state, &cpi->oxcf.part_cfg, - &pc_tree->chroma_ref_info, &cm->mi_params, - xd->tree_type); + &pc_tree->chroma_ref_info, tree_type); + + if (max_recursion_depth == 0) { + part_search_state->prune_rect_part[HORZ] = + part_search_state->prune_rect_part[VERT] = true; + part_search_state->prune_partition_3[HORZ] = + part_search_state->prune_partition_3[VERT] = true; +#if CONFIG_UNEVEN_4WAY + part_search_state->prune_partition_4a[HORZ] = + part_search_state->prune_partition_4a[VERT] = true; + part_search_state->prune_partition_4b[HORZ] = + part_search_state->prune_partition_4b[VERT] = true; +#endif // CONFIG_UNEVEN_4WAY + } #else part_search_state->do_square_split = blk_params->bsize_at_least_8x8 && - (xd->tree_type != CHROMA_PART || bsize > BLOCK_8X8); + (tree_type != CHROMA_PART || bsize > BLOCK_8X8); part_search_state->do_rectangular_split = cpi->oxcf.part_cfg.enable_rect_partitions && - (xd->tree_type != CHROMA_PART || bsize > BLOCK_8X8); + (tree_type != CHROMA_PART || bsize > BLOCK_8X8); const BLOCK_SIZE horz_subsize = get_partition_subsize(bsize, PARTITION_HORZ); const BLOCK_SIZE vert_subsize = get_partition_subsize(bsize, PARTITION_VERT); @@ -3242,7 +3691,7 @@ get_plane_block_size(vert_subsize, part_search_state->ss_x, part_search_state->ss_y) != BLOCK_INVALID; const bool no_sub_16_chroma_part = - xd->tree_type != CHROMA_PART || + tree_type != CHROMA_PART || (block_size_wide[bsize] > 8 && block_size_high[bsize] > 8); // Initialize allowed partition types for the partition block. @@ -3264,27 +3713,11 @@ #endif // CONFIG_EXT_RECUR_PARTITIONS } +#if !CONFIG_EXT_RECUR_PARTITIONS // Override partition cost buffer for the edge blocks. static void set_partition_cost_for_edge_blk( AV1_COMMON const *cm, MACROBLOCKD *const xd, -#if CONFIG_EXT_RECUR_PARTITIONS - const CHROMA_REF_INFO *chroma_ref_info, -#endif // CONFIG_EXT_RECUR_PARTITIONS PartitionSearchState *part_search_state) { -#if CONFIG_EXT_RECUR_PARTITIONS - const PartitionBlkParams *blk_params = &part_search_state->part_blk_params; - const bool is_part_implied = is_partition_implied_at_boundary( - &cm->mi_params, xd->tree_type, part_search_state->ss_x, - part_search_state->ss_y, blk_params->mi_row, blk_params->mi_col, - blk_params->bsize, chroma_ref_info, NULL); - if (is_part_implied) { - for (int i = 0; i < PARTITION_TYPES; ++i) { - part_search_state->tmp_partition_cost[i] = 0; - } - part_search_state->partition_cost = part_search_state->tmp_partition_cost; - } - (void)xd; -#else // CONFIG_EXT_RECUR_PARTITIONS PartitionBlkParams blk_params = part_search_state->part_blk_params; assert(blk_params.bsize_at_least_8x8 && part_search_state->pl_ctx_idx >= 0); const int plane = xd->tree_type == CHROMA_PART; @@ -3313,10 +3746,8 @@ } // Override the partition cost buffer. part_search_state->partition_cost = part_search_state->tmp_partition_cost; -#endif // CONFIG_EXT_RECUR_PARTITIONS } -#if !CONFIG_EXT_RECUR_PARTITIONS // Reset the partition search state flags when // must_find_valid_partition is equal to 1. static AOM_INLINE void reset_part_limitations( @@ -3448,13 +3879,16 @@ } } } + +static AOM_INLINE bool is_part_pruned_by_forced_partition( + const PartitionSearchState *part_state, PARTITION_TYPE partition) { + const PARTITION_TYPE forced_partition = part_state->forced_partition; + return forced_partition != PARTITION_INVALID && forced_partition != partition; +} #endif typedef int (*active_edge_info)(const AV1_COMP *cpi, int mi_col, int mi_step); -#define IS_FORCED_PARTITION_TYPE(cur_partition) \ - (forced_partition == PARTITION_INVALID || forced_partition == (cur_partition)) - // Checks if HORZ / VERT partition search is allowed. static AOM_INLINE int is_rect_part_allowed( const AV1_COMP *cpi, PartitionSearchState *part_search_state, @@ -3479,35 +3913,30 @@ } #if CONFIG_EXT_RECUR_PARTITIONS -static AOM_INLINE PARTITION_TYPE get_forced_partition_type( - const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, int mi_col, - BLOCK_SIZE bsize, const PARTITION_TREE *template_tree, - const PARTITION_TREE *ptree_luma, const CHROMA_REF_INFO *chroma_ref_info) { - if (template_tree) { - return template_tree->partition; +static AOM_INLINE void prune_rect_with_none_rd( + PartitionSearchState *part_search_state, BLOCK_SIZE bsize, int q_index, + int rdmult, int64_t part_none_rd, const int *is_not_edge_block) { + for (RECT_PART_TYPE rect = 0; rect < NUM_RECT_PARTS; rect++) { + // Disable pruning on the boundary + if (!is_not_edge_block[rect]) { + continue; + } + const PARTITION_TYPE partition_type = rect_partition_type[rect]; + float discount_factor = 1.1f; + const int q_thresh = 180; + if (q_index < q_thresh) { + discount_factor -= 0.025f; + } + if (AOMMAX(block_size_wide[bsize], block_size_high[bsize]) < 16) { + discount_factor -= 0.02f; + } + const int part_rate = part_search_state->partition_cost[partition_type]; + const int64_t est_rd = (int64_t)(part_none_rd / discount_factor) + + RDCOST(rdmult, part_rate, 0); + if (est_rd > part_none_rd) { + part_search_state->prune_rect_part[rect] = true; + } } - - const MACROBLOCKD *xd = &x->e_mbd; - const int ss_x = cm->seq_params.subsampling_x; - const int ss_y = cm->seq_params.subsampling_y; - if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) { - const PARTITION_TYPE derived_partition_mode = - sdp_chroma_part_from_luma(bsize, ptree_luma->partition, ss_x, ss_y); - return derived_partition_mode; - } - - PARTITION_TYPE implied_partition; - const bool is_part_implied = is_partition_implied_at_boundary( - &cm->mi_params, xd->tree_type, ss_x, ss_y, mi_row, mi_col, bsize, - chroma_ref_info, &implied_partition); - if (is_part_implied) { - return implied_partition; - } - - if (should_reuse_mode(x, REUSE_PARTITION_MODE_FLAG)) { - return av1_get_prev_partition(x, mi_row, mi_col, bsize, cm->sb_size); - } - return PARTITION_INVALID; } #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -3522,13 +3951,9 @@ const PARTITION_TREE *template_tree, int max_recursion_depth, #endif // CONFIG_EXT_RECUR_PARTITIONS RD_RECT_PART_WIN_INFO *rect_part_win_info, -#if CONFIG_C043_MVP_IMPROVEMENTS - REF_MV_BANK *best_level_bank, REF_MV_BANK *curr_level_bank, -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - WARP_PARAM_BANK *best_level_warp_bank, - WARP_PARAM_BANK *curr_level_warp_bank, -#endif // WARP_CU_BANK +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + LevelBanksRDO *level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK int64_t part_none_rd) { const AV1_COMMON *const cm = &cpi->common; PartitionBlkParams blk_params = part_search_state->part_blk_params; @@ -3543,9 +3968,6 @@ #if CONFIG_EXT_RECUR_PARTITIONS const int ss_x = xd->plane[1].subsampling_x; const int ss_y = xd->plane[1].subsampling_y; - PARTITION_TYPE forced_partition = get_forced_partition_type( - cm, x, blk_params.mi_row, blk_params.mi_col, blk_params.bsize, - template_tree, ptree_luma, &pc_tree->chroma_ref_info); #else // !CONFIG_EXT_RECUR_PARTITIONS (void)part_none_rd; #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -3585,14 +4007,13 @@ (blk_params.mi_col + mi_size_wide[bsize] < mi_params->mi_cols); const bool try_prune_with_ml = cpi->sf.part_sf.prune_rect_with_ml && !frame_is_intra_only(cm) && - forced_partition == PARTITION_INVALID && is_whole_block_inside && - part_none_rd < INT64_MAX && + part_search_state->forced_partition == PARTITION_INVALID && + is_whole_block_inside && part_none_rd < INT64_MAX && (is_rect_part_allowed(cpi, part_search_state, active_edge_type, HORZ, mi_pos_rect[HORZ][0][HORZ]) || is_rect_part_allowed(cpi, part_search_state, active_edge_type, VERT, mi_pos_rect[VERT][0][VERT])); - bool prune_horz = false, prune_vert = false; if (try_prune_with_ml && bsize != BLOCK_4X8 && bsize != BLOCK_8X4 && is_partition_point(bsize)) { float ml_features[19]; @@ -3601,7 +4022,15 @@ mi_pos_rect); const bool is_hd = AOMMIN(cm->width, cm->height) >= 1080; - av1_erp_prune_rect(bsize, is_hd, ml_features, &prune_horz, &prune_vert); + av1_erp_prune_rect(bsize, is_hd, ml_features, + &part_search_state->prune_rect_part[HORZ], + &part_search_state->prune_rect_part[VERT]); + } + if (cpi->sf.part_sf.prune_rect_with_none_rd && + part_search_state->forced_partition == PARTITION_INVALID && + !frame_is_intra_only(cm) && part_none_rd < INT64_MAX) { + prune_rect_with_none_rd(part_search_state, bsize, x->qindex, x->rdmult, + part_none_rd, is_not_edge_block); } #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -3615,16 +4044,6 @@ mi_pos_rect[i][0][i])) continue; -#if CONFIG_EXT_RECUR_PARTITIONS - if (pc_tree->parent) { - if ((pc_tree->parent->horizontal3[1] == pc_tree && i == HORZ) || - (pc_tree->parent->vertical3[1] == pc_tree && i == VERT)) { - continue; - } - } - -#endif // CONFIG_EXT_RECUR_PARTITIONS - // Sub-partition idx. const PARTITION_TYPE partition_type = rect_partition_type[i]; blk_params.subsize = @@ -3639,38 +4058,12 @@ #endif // !CONFIG_EXT_RECUR_PARTITIONS av1_init_rd_stats(sum_rdc); #if CONFIG_EXT_RECUR_PARTITIONS - if (!IS_FORCED_PARTITION_TYPE(partition_type)) { + if (is_part_pruned_by_forced_partition(part_search_state, partition_type)) { continue; } - if (partition_type == PARTITION_HORZ && prune_horz) { - continue; - } else if (partition_type == PARTITION_VERT && prune_vert) { - continue; - } - - if (cpi->sf.part_sf.prune_rect_with_none_rd && - forced_partition == PARTITION_INVALID && !frame_is_intra_only(cm) && - part_none_rd < INT64_MAX && sum_rdc->rate < INT_MAX && - is_not_edge_block[i]) { - float discount_factor = 1.1f; - const int q_thresh = 180; - const int q = x->qindex; - if (q < q_thresh) { - discount_factor -= 0.025f; - } - if (AOMMAX(block_size_wide[blk_params.bsize], - block_size_high[blk_params.bsize]) < 16) { - discount_factor -= 0.02f; - } - const int64_t est_rd = (int64_t)(part_none_rd / discount_factor) + - RDCOST(x->rdmult, part_hv_rate, 0); - if (est_rd > part_none_rd) { - continue; - } - } - PC_TREE **sub_tree = (i == HORZ) ? pc_tree->horizontal : pc_tree->vertical; + assert(sub_tree); const int num_planes = av1_num_planes(cm); for (int idx = 0; idx < SUB_PARTITIONS_RECT; idx++) { @@ -3680,11 +4073,11 @@ } } sub_tree[0] = av1_alloc_pc_tree_node( - mi_pos_rect[i][0][0], mi_pos_rect[i][0][1], blk_params.subsize, pc_tree, - partition_type, 0, 0, ss_x, ss_y); + xd->tree_type, mi_pos_rect[i][0][0], mi_pos_rect[i][0][1], + blk_params.subsize, pc_tree, partition_type, 0, 0, ss_x, ss_y); sub_tree[1] = av1_alloc_pc_tree_node( - mi_pos_rect[i][1][0], mi_pos_rect[i][1][1], blk_params.subsize, pc_tree, - partition_type, 1, 1, ss_x, ss_y); + xd->tree_type, mi_pos_rect[i][1][0], mi_pos_rect[i][1][1], + blk_params.subsize, pc_tree, partition_type, 1, 1, ss_x, ss_y); bool both_blocks_skippable = true; @@ -3703,10 +4096,11 @@ for (int j = 0; j < SUB_PARTITIONS_RECT; j++) { assert(cur_ctx[i][j] != NULL); if (cur_ctx[i][j][0] == NULL) { - cur_ctx[i][j][0] = av1_alloc_pmc( - cm, mi_pos_rect[i][j][0], mi_pos_rect[i][j][1], blk_params.subsize, - pc_tree, partition_type, j, part_search_state->ss_x, - part_search_state->ss_y, &td->shared_coeff_buf); + cur_ctx[i][j][0] = + av1_alloc_pmc(cm, xd->tree_type, mi_pos_rect[i][j][0], + mi_pos_rect[i][j][1], blk_params.subsize, pc_tree, + partition_type, j, part_search_state->ss_x, + part_search_state->ss_y, &td->shared_coeff_buf); } } sum_rdc->rate = part_search_state->partition_cost[partition_type]; @@ -3766,12 +4160,10 @@ pc_tree->skippable = both_blocks_skippable; #endif // CONFIG_EXT_RECUR_PARTITIONS *best_rdc = *sum_rdc; -#if CONFIG_C043_MVP_IMPROVEMENTS - *best_level_bank = x->e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - *best_level_warp_bank = x->e_mbd.warp_param_bank; -#endif // WARP_CU_BANK + +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK part_search_state->found_best_partition = true; pc_tree->partitioning = partition_type; } @@ -3780,12 +4172,9 @@ if (rect_part_win_info != NULL) rect_part_win_info->rect_part_win[i] = false; } -#if CONFIG_C043_MVP_IMPROVEMENTS - x->e_mbd.ref_mv_bank = *curr_level_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - x->e_mbd.warp_param_bank = *curr_level_warp_bank; -#endif // WARP_CU_BANK +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK av1_restore_context(cm, x, x_ctx, blk_params.mi_row, blk_params.mi_col, blk_params.bsize, av1_num_planes(cm)); #if CONFIG_EXT_RECUR_PARTITIONS @@ -3822,14 +4211,10 @@ PartitionSearchState *part_search_state, RD_STATS *best_rdc, const BLOCK_SIZE ab_subsize[SUB_PARTITIONS_AB], const int ab_mi_pos[SUB_PARTITIONS_AB][2], const PARTITION_TYPE part_type -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - REF_MV_BANK *best_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - WARP_PARAM_BANK *best_level_warp_bank -#endif // WARP_CU_BANK + LevelBanksRDO *level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ) { const AV1_COMMON *const cm = &cpi->common; PartitionBlkParams blk_params = part_search_state->part_blk_params; @@ -3856,14 +4241,10 @@ part_search_state->found_best_partition |= rd_test_partition3(cpi, td, tile_data, tp, pc_tree, best_rdc, dst_ctxs, mi_row, mi_col, bsize, part_type, ab_subsize, ab_mi_pos -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - best_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - best_level_warp_bank -#endif // WARP_CU_BANK + level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ); #if CONFIG_COLLECT_PARTITION_STATS @@ -3913,14 +4294,10 @@ PC_TREE *pc_tree, PartitionSearchState *part_search_state, RD_STATS *best_rdc, RD_RECT_PART_WIN_INFO *rect_part_win_info, int pb_source_variance, int ext_partition_allowed -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - REF_MV_BANK *best_level_bank, REF_MV_BANK *curr_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - WARP_PARAM_BANK *best_level_warp_bank, WARP_PARAM_BANK *curr_level_warp_bank -#endif // WARP_CU_BANK + LevelBanksRDO *level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ) { const AV1_COMMON *const cm = &cpi->common; PartitionBlkParams blk_params = part_search_state->part_blk_params; @@ -4004,9 +4381,9 @@ // Set AB partition context. if (cur_part_ctxs[ab_part_type][i] == NULL) cur_part_ctxs[ab_part_type][i] = av1_alloc_pmc( - cm, ab_mi_pos[ab_part_type][i][0], ab_mi_pos[ab_part_type][i][1], - ab_subsize[ab_part_type][i], pc_tree, part_type, i, - part_search_state->ss_x, part_search_state->ss_y, + cm, x->e_mbd.tree_type, ab_mi_pos[ab_part_type][i][0], + ab_mi_pos[ab_part_type][i][1], ab_subsize[ab_part_type][i], pc_tree, + part_type, i, part_search_state->ss_x, part_search_state->ss_y, &td->shared_coeff_buf); // Set mode as not ready. cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0; @@ -4030,21 +4407,14 @@ rd_pick_ab_part(cpi, td, tile_data, tp, x, x_ctx, pc_tree, cur_part_ctxs[ab_part_type], part_search_state, best_rdc, ab_subsize[ab_part_type], ab_mi_pos[ab_part_type], part_type -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - best_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - best_level_warp_bank -#endif // WARP_CU_BANK + level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ); -#if CONFIG_C043_MVP_IMPROVEMENTS - x->e_mbd.ref_mv_bank = *curr_level_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - x->e_mbd.warp_param_bank = *curr_level_warp_bank; -#endif // WARP_CU_BANK +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK } } @@ -4073,10 +4443,10 @@ RDCOST(x->rdmult, part_search_state->sum_rdc.rate, 0); for (PART4_TYPES i = 0; i < SUB_PARTITIONS_PART4; ++i) { if (cur_part_ctx[i] == NULL) - cur_part_ctx[i] = - av1_alloc_pmc(cm, mi_pos[i][0], mi_pos[i][1], subsize, pc_tree, - partition_type, i, part_search_state->ss_x, - part_search_state->ss_y, &td->shared_coeff_buf); + cur_part_ctx[i] = av1_alloc_pmc( + cm, x->e_mbd.tree_type, mi_pos[i][0], mi_pos[i][1], subsize, pc_tree, + partition_type, i, part_search_state->ss_x, part_search_state->ss_y, + &td->shared_coeff_buf); } } @@ -4087,14 +4457,10 @@ PC_TREE *pc_tree, PICK_MODE_CONTEXT *cur_part_ctx[SUB_PARTITIONS_PART4], PartitionSearchState *part_search_state, RD_STATS *best_rdc, const int inc_step[NUM_PART4_TYPES], PARTITION_TYPE partition_type -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - REF_MV_BANK *best_level_bank, REF_MV_BANK *curr_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - WARP_PARAM_BANK *best_level_warp_bank, WARP_PARAM_BANK *curr_level_warp_bank -#endif // WARP_CU_BANK + LevelBanksRDO *level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ) { const AV1_COMMON *const cm = &cpi->common; PartitionBlkParams blk_params = part_search_state->part_blk_params; @@ -4136,12 +4502,9 @@ av1_rd_cost_update(x->rdmult, &part_search_state->sum_rdc); if (part_search_state->sum_rdc.rdcost < best_rdc->rdcost) { *best_rdc = part_search_state->sum_rdc; -#if CONFIG_C043_MVP_IMPROVEMENTS - *best_level_bank = x->e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - *best_level_warp_bank = x->e_mbd.warp_param_bank; -#endif // WARP_CU_BANK +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK part_search_state->found_best_partition = true; pc_tree->partitioning = partition_type; } @@ -4153,14 +4516,11 @@ partition_timer_on = 0; } #endif -#if CONFIG_C043_MVP_IMPROVEMENTS - x->e_mbd.ref_mv_bank = *curr_level_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - x->e_mbd.warp_param_bank = *curr_level_warp_bank; -#endif // WARP_CU_BANK av1_restore_context(cm, x, x_ctx, blk_params.mi_row, blk_params.mi_col, blk_params.bsize, av1_num_planes(cm)); +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK } // Prune 4-way partitions based on the number of horz/vert wins @@ -4276,12 +4636,13 @@ blk_params.has_rows && blk_params.has_cols) #endif // CONFIG_EXT_RECUR_PARTITIONS part_search_state->partition_none_allowed = 1; - assert(part_search_state->terminate_partition_search == 0); +#if !CONFIG_EXT_RECUR_PARTITIONS if (part_search_state->partition_none_allowed == BLOCK_INVALID) { part_search_state->partition_none_allowed = 0; return; } +#endif // CONFIG_EXT_RECUR_PARTITIONS // Set PARTITION_NONE for screen content. if (cpi->is_screen_content_type) @@ -4301,9 +4662,9 @@ // Set PARTITION_NONE context. if (pc_tree->none == NULL) pc_tree->none = av1_alloc_pmc( - cm, blk_params.mi_row, blk_params.mi_col, blk_params.bsize, pc_tree, - PARTITION_NONE, 0, part_search_state->ss_x, part_search_state->ss_y, - &td->shared_coeff_buf); + cm, x->e_mbd.tree_type, blk_params.mi_row, blk_params.mi_col, + blk_params.bsize, pc_tree, PARTITION_NONE, 0, part_search_state->ss_x, + part_search_state->ss_y, &td->shared_coeff_buf); // Set PARTITION_NONE type cost. if (part_search_state->partition_none_allowed) { @@ -4478,14 +4839,10 @@ RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, PartitionSearchState *part_search_state, RD_STATS *best_rdc, unsigned int *pb_source_variance, int64_t *none_rd, int64_t *part_none_rd -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - REF_MV_BANK *best_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - WARP_PARAM_BANK *best_level_warp_bank -#endif // WARP_CU_BANK + LevelBanksRDO *level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ) { const AV1_COMMON *const cm = &cpi->common; PartitionBlkParams blk_params = part_search_state->part_blk_params; @@ -4495,13 +4852,25 @@ const BLOCK_SIZE bsize = blk_params.bsize; assert(bsize < BLOCK_SIZES_ALL); +#if CONFIG_EXT_RECUR_PARTITIONS + if (is_part_pruned_by_forced_partition(part_search_state, PARTITION_NONE)) { + return; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS // Set PARTITION_NONE allowed flag. set_part_none_allowed_flag(cpi, #if CONFIG_EXT_RECUR_PARTITIONS x->e_mbd.tree_type, #endif // CONFIG_EXT_RECUR_PARTITIONS part_search_state); - if (!part_search_state->partition_none_allowed) return; + if (!part_search_state->partition_none_allowed) { + return; + } +#if CONFIG_EXT_RECUR_PARTITIONS + if (part_search_state->prune_partition_none) { + return; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS int pt_cost = 0; RD_STATS best_remain_rdcost; @@ -4569,12 +4938,9 @@ *part_none_rd = this_rdc->rdcost; if (this_rdc->rdcost < best_rdc->rdcost) { *best_rdc = *this_rdc; -#if CONFIG_C043_MVP_IMPROVEMENTS - *best_level_bank = x->e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - *best_level_warp_bank = x->e_mbd.warp_param_bank; -#endif // WARP_CU_BANK +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK part_search_state->found_best_partition = true; #if !CONFIG_EXT_RECUR_PARTITIONS if (blk_params.bsize_at_least_8x8) { @@ -4592,6 +4958,9 @@ } } av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK } // PARTITION_SPLIT search. @@ -4601,14 +4970,10 @@ SIMPLE_MOTION_DATA_TREE *sms_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, PartitionSearchState *part_search_state, RD_STATS *best_rdc, SB_MULTI_PASS_MODE multi_pass_mode, int64_t *part_split_rd -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - REF_MV_BANK *best_level_bank + LevelBanksRDO *level_banks #endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - WARP_PARAM_BANK *best_level_warp_bank -#endif // WARP_CU_BANK #if CONFIG_EXT_RECUR_PARTITIONS , const PARTITION_TREE *ptree_luma, const PARTITION_TREE *template_tree, @@ -4632,6 +4997,13 @@ !is_square_split_eligible(bsize, cm->sb_size)) { return; } + if (part_search_state->forced_partition != PARTITION_INVALID && + part_search_state->forced_partition != PARTITION_SPLIT) { + return; + } + if (max_recursion_depth < 0) { + return; + } const int num_planes = av1_num_planes(cm); PC_TREE **sub_tree = pc_tree->split; @@ -4677,8 +5049,9 @@ if (pc_tree->split[idx] == NULL) { pc_tree->split[idx] = av1_alloc_pc_tree_node( - mi_row + y_idx, mi_col + x_idx, subsize, pc_tree, PARTITION_SPLIT, - idx, idx == 3, part_search_state->ss_x, part_search_state->ss_y); + x->e_mbd.tree_type, mi_row + y_idx, mi_col + x_idx, subsize, pc_tree, + PARTITION_SPLIT, idx, idx == 3, part_search_state->ss_x, + part_search_state->ss_y); } #if !CONFIG_EXT_RECUR_PARTITIONS int64_t *p_split_rd = &part_search_state->split_rd[idx]; @@ -4751,12 +5124,9 @@ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (sum_rdc.rdcost < best_rdc->rdcost) { *best_rdc = sum_rdc; -#if CONFIG_C043_MVP_IMPROVEMENTS - *best_level_bank = x->e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - *best_level_warp_bank = x->e_mbd.warp_param_bank; -#endif // WARP_CU_BANK +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK part_search_state->found_best_partition = true; pc_tree->partitioning = PARTITION_SPLIT; } @@ -4774,6 +5144,9 @@ #endif // !CONFIG_EXT_RECUR_PARTITIONS } av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm)); +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK } #if CONFIG_EXT_RECUR_PARTITIONS @@ -4799,15 +5172,23 @@ /*!\brief Whether the current partition node uses horizontal type partitions. */ static AOM_INLINE bool node_uses_horz(const PC_TREE *pc_tree) { assert(pc_tree); - return pc_tree->partitioning == PARTITION_HORZ || - pc_tree->partitioning == PARTITION_HORZ_3; + return pc_tree->partitioning == PARTITION_HORZ +#if CONFIG_UNEVEN_4WAY + || pc_tree->partitioning == PARTITION_HORZ_4A || + pc_tree->partitioning == PARTITION_HORZ_4B +#endif // CONFIG_UNEVEN_4WAY + || pc_tree->partitioning == PARTITION_HORZ_3; } /*!\brief Whether the current partition node uses vertical type partitions. */ static AOM_INLINE bool node_uses_vert(const PC_TREE *pc_tree) { assert(pc_tree); - return pc_tree->partitioning == PARTITION_VERT || - pc_tree->partitioning == PARTITION_VERT_3; + return pc_tree->partitioning == PARTITION_VERT +#if CONFIG_UNEVEN_4WAY + || pc_tree->partitioning == PARTITION_VERT_4A || + pc_tree->partitioning == PARTITION_VERT_4B +#endif // CONFIG_UNEVEN_4WAY + || pc_tree->partitioning == PARTITION_VERT_3; } /*!\brief Try searching for an encoding for the given subblock. @@ -4863,43 +5244,1054 @@ return 1; } +/*!\brief Trace out the partition boundaries using the structure in pc_tree. + * + * The results are stored in partition_boundaries. The array + * partition_boundaries has a stride of MAX_MIB_SIZE, and the units are in mi. + * The actual values stored is a bitmask, with 1 << HORZ means that there is a + * horizontal boundary, and 1 << VERT means that there is a vertical boundary. + * */ +static AOM_INLINE void trace_partition_boundary(bool *partition_boundaries, + const PC_TREE *pc_tree, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + mi_row &= MAX_MIB_MASK; + mi_col &= MAX_MIB_MASK; + const PARTITION_TYPE partition = pc_tree->partitioning; + assert(bsize < BLOCK_SIZES_ALL); + const int mi_width = mi_size_wide[bsize]; + const int mi_height = mi_size_high[bsize]; +#if CONFIG_UNEVEN_4WAY + const int ebs_w = mi_size_wide[bsize] / 8; + const int ebs_h = mi_size_high[bsize] / 8; + const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); +#endif // CONFIG_UNEVEN_4WAY + switch (partition) { + case PARTITION_NONE: + for (int col = 0; col < mi_width; col++) { + partition_boundaries[(mi_row + mi_height - 1) * MAX_MIB_SIZE + + (mi_col + col)] |= (1 << HORZ); + } + for (int row = 0; row < mi_height; row++) { + partition_boundaries[(mi_row + row) * MAX_MIB_SIZE + mi_col + mi_width - + 1] |= (1 << VERT); + } + break; + case PARTITION_HORZ: + trace_partition_boundary(partition_boundaries, pc_tree->horizontal[0], + mi_row, mi_col, + get_partition_subsize(bsize, PARTITION_HORZ)); + trace_partition_boundary(partition_boundaries, pc_tree->horizontal[1], + mi_row + mi_height / 2, mi_col, + get_partition_subsize(bsize, PARTITION_HORZ)); + break; + case PARTITION_VERT: + trace_partition_boundary(partition_boundaries, pc_tree->vertical[0], + mi_row, mi_col, + get_partition_subsize(bsize, PARTITION_VERT)); + trace_partition_boundary(partition_boundaries, pc_tree->vertical[1], + mi_row, mi_col + mi_width / 2, + get_partition_subsize(bsize, PARTITION_VERT)); + break; + case PARTITION_HORZ_3: + trace_partition_boundary( + partition_boundaries, pc_tree->horizontal3[0], mi_row, mi_col, + get_h_partition_subsize(bsize, 0, PARTITION_HORZ_3)); + trace_partition_boundary( + partition_boundaries, pc_tree->horizontal3[1], mi_row + mi_height / 4, + mi_col, get_h_partition_subsize(bsize, 1, PARTITION_HORZ_3)); + trace_partition_boundary( + partition_boundaries, pc_tree->horizontal3[2], mi_row + mi_height / 4, + mi_col + mi_width / 2, + get_h_partition_subsize(bsize, 1, PARTITION_HORZ_3)); + trace_partition_boundary( + partition_boundaries, pc_tree->horizontal3[3], + mi_row + 3 * mi_height / 4, mi_col, + get_h_partition_subsize(bsize, 0, PARTITION_HORZ_3)); + break; + case PARTITION_VERT_3: + trace_partition_boundary( + partition_boundaries, pc_tree->vertical3[0], mi_row, mi_col, + get_h_partition_subsize(bsize, 0, PARTITION_VERT_3)); + trace_partition_boundary( + partition_boundaries, pc_tree->vertical3[1], mi_row, + mi_col + mi_width / 4, + get_h_partition_subsize(bsize, 1, PARTITION_VERT_3)); + trace_partition_boundary( + partition_boundaries, pc_tree->vertical3[2], mi_row + mi_height / 2, + mi_col + mi_width / 4, + get_h_partition_subsize(bsize, 1, PARTITION_VERT_3)); + trace_partition_boundary( + partition_boundaries, pc_tree->vertical3[3], mi_row, + mi_col + 3 * mi_width / 4, + get_h_partition_subsize(bsize, 0, PARTITION_VERT_3)); + break; +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + trace_partition_boundary(partition_boundaries, pc_tree->horizontal4a[0], + mi_row, mi_col, subsize); + trace_partition_boundary(partition_boundaries, pc_tree->horizontal4a[1], + mi_row + ebs_h, mi_col, bsize_med); + trace_partition_boundary(partition_boundaries, pc_tree->horizontal4a[2], + mi_row + 3 * ebs_h, mi_col, bsize_big); + trace_partition_boundary(partition_boundaries, pc_tree->horizontal4a[3], + mi_row + 7 * ebs_h, mi_col, subsize); + break; + } + case PARTITION_HORZ_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_HORZ); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_HORZ)); + trace_partition_boundary(partition_boundaries, pc_tree->horizontal4b[0], + mi_row, mi_col, subsize); + trace_partition_boundary(partition_boundaries, pc_tree->horizontal4b[1], + mi_row + ebs_h, mi_col, bsize_big); + trace_partition_boundary(partition_boundaries, pc_tree->horizontal4b[2], + mi_row + 5 * ebs_h, mi_col, bsize_med); + trace_partition_boundary(partition_boundaries, pc_tree->horizontal4b[3], + mi_row + 7 * ebs_h, mi_col, subsize); + break; + } + case PARTITION_VERT_4A: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + trace_partition_boundary(partition_boundaries, pc_tree->vertical4a[0], + mi_row, mi_col, subsize); + trace_partition_boundary(partition_boundaries, pc_tree->vertical4a[1], + mi_row, mi_col + ebs_w, bsize_med); + trace_partition_boundary(partition_boundaries, pc_tree->vertical4a[2], + mi_row, mi_col + 3 * ebs_w, bsize_big); + trace_partition_boundary(partition_boundaries, pc_tree->vertical4a[3], + mi_row, mi_col + 7 * ebs_w, subsize); + break; + } + case PARTITION_VERT_4B: { + const BLOCK_SIZE bsize_big = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE bsize_med = + get_partition_subsize(bsize_big, PARTITION_VERT); + assert(subsize == get_partition_subsize(bsize_med, PARTITION_VERT)); + trace_partition_boundary(partition_boundaries, pc_tree->vertical4b[0], + mi_row, mi_col, subsize); + trace_partition_boundary(partition_boundaries, pc_tree->vertical4b[1], + mi_row, mi_col + ebs_w, bsize_big); + trace_partition_boundary(partition_boundaries, pc_tree->vertical4b[2], + mi_row, mi_col + 5 * ebs_w, bsize_med); + trace_partition_boundary(partition_boundaries, pc_tree->vertical4b[3], + mi_row, mi_col + 7 * ebs_w, subsize); + break; + } +#endif // CONFIG_UNEVEN_4WAY + default: assert(0 && "Invalid partition type in trace_partition_boundary!"); + } +} + +/*!\brief Prunes h partitions using the current best partition boundaries. + * + * If the H-shaped partitions don't have any overlap with the current best + * partition boundaries, then they are pruned from the search. + * */ +static AOM_INLINE void prune_part_3_with_partition_boundary( + PartitionSearchState *part_search_state, BLOCK_SIZE bsize, int mi_row, + int mi_col, bool can_search_horz, bool can_search_vert) { + const int mi_width = mi_size_wide[bsize]; + const int mi_height = mi_size_high[bsize]; + const int masked_mi_row = mi_row & MAX_MIB_MASK; + const int masked_mi_col = mi_col & MAX_MIB_MASK; + const bool *partition_boundaries = part_search_state->partition_boundaries; + if (can_search_horz) { + bool keep_horz_3 = false; + for (int col = 0; col < mi_width; col++) { + if (partition_boundaries[(masked_mi_row + mi_height / 4 - 1) * + MAX_MIB_SIZE + + masked_mi_col + col] & + (1 << HORZ)) { + keep_horz_3 = true; + break; + } + } + if (!keep_horz_3) { + for (int col = 0; col < mi_width; col++) { + if (partition_boundaries[(masked_mi_row + 3 * mi_height / 4 - 1) * + MAX_MIB_SIZE + + masked_mi_col + col] & + (1 << HORZ)) { + keep_horz_3 = true; + break; + } + } + } + if (!keep_horz_3) { + for (int row = 0; row < mi_height / 2; row++) { + if (partition_boundaries[(masked_mi_row + mi_height / 4 + row) * + MAX_MIB_SIZE + + masked_mi_col + mi_width / 2 - 1] & + (1 << VERT)) { + keep_horz_3 = true; + break; + } + } + } + part_search_state->prune_partition_3[HORZ] |= !keep_horz_3; + } + if (can_search_vert) { + bool keep_vert_3 = false; + for (int row = 0; row < mi_height; row++) { + if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE + + masked_mi_col + mi_width / 4 - 1] & + (1 << VERT)) { + keep_vert_3 = true; + break; + } + } + if (!keep_vert_3) { + for (int row = 0; row < mi_height; row++) { + if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE + + masked_mi_col + 3 * mi_width / 4 - 1] & + (1 << VERT)) { + keep_vert_3 = true; + break; + } + } + } + if (!keep_vert_3) { + for (int col = 0; col < mi_width / 2; col++) { + if (partition_boundaries[(masked_mi_row + mi_height / 2 - 1) * + MAX_MIB_SIZE + + masked_mi_col + mi_width / 4 + col] & + (1 << HORZ)) { + keep_vert_3 = true; + break; + } + } + } + part_search_state->prune_partition_3[VERT] |= !keep_vert_3; + } +} + +#if CONFIG_UNEVEN_4WAY +/*!\brief Prunes 4-way partitions using the current best partition boundaries. + * + * If the 4-way partitions don't have any overlap with the current best + * partition boundaries, then they are pruned from the search. + */ +static AOM_INLINE void prune_part_4_with_partition_boundary( + PartitionSearchState *part_search_state, const bool *partition_boundaries, + BLOCK_SIZE bsize, int mi_row, int mi_col, bool can_search_horz_4a, + bool can_search_horz_4b, bool can_search_vert_4a, bool can_search_vert_4b) { + const int mi_width = mi_size_wide[bsize]; + const int mi_height = mi_size_high[bsize]; + const int masked_mi_row = mi_row & MAX_MIB_MASK; + const int masked_mi_col = mi_col & MAX_MIB_MASK; + bool keep_horz_4a = false, keep_horz_4b = false; + bool keep_vert_4a = false, keep_vert_4b = false; + if (can_search_horz_4a || can_search_horz_4b) { + for (int col = 0; col < mi_width; col++) { + if (partition_boundaries[(masked_mi_row + mi_height / 8 - 1) * + MAX_MIB_SIZE + + masked_mi_col + col] & + (1 << HORZ)) { + keep_horz_4a = true; + keep_horz_4b = true; + break; + } + if (partition_boundaries[(masked_mi_row + 7 * mi_height / 8 - 1) * + MAX_MIB_SIZE + + masked_mi_col + col] & + (1 << HORZ)) { + keep_horz_4a = true; + keep_horz_4b = true; + break; + } + } + if (can_search_horz_4a && !keep_horz_4a) { + for (int col = 0; col < mi_width; col++) { + if (partition_boundaries[(masked_mi_row + 3 * mi_height / 8 - 1) * + MAX_MIB_SIZE + + masked_mi_col + col] & + (1 << HORZ)) { + keep_horz_4a = true; + break; + } + } + } + if (can_search_horz_4b && !keep_horz_4b) { + for (int col = 0; col < mi_width; col++) { + if (partition_boundaries[(masked_mi_row + 5 * mi_height / 8 - 1) * + MAX_MIB_SIZE + + masked_mi_col + col] & + (1 << HORZ)) { + keep_horz_4b = true; + break; + } + } + } + part_search_state->prune_partition_4a[HORZ] |= !keep_horz_4a; + part_search_state->prune_partition_4b[HORZ] |= !keep_horz_4b; + } + if (can_search_vert_4a || can_search_vert_4b) { + for (int row = 0; row < mi_height; row++) { + if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE + + masked_mi_col + mi_width / 8 - 1] & + (1 << VERT)) { + keep_vert_4a = true; + keep_vert_4b = true; + break; + } + if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE + + masked_mi_col + 7 * mi_width / 8 - 1] & + (1 << VERT)) { + keep_vert_4a = true; + keep_vert_4b = true; + break; + } + } + if (can_search_vert_4a && !keep_vert_4a) { + for (int row = 0; row < mi_height; row++) { + if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE + + masked_mi_col + 3 * mi_width / 8 - 1] & + (1 << VERT)) { + keep_vert_4a = true; + break; + } + } + } + if (can_search_vert_4b && !keep_vert_4b) { + for (int row = 0; row < mi_height; row++) { + if (partition_boundaries[(masked_mi_row + row) * MAX_MIB_SIZE + + masked_mi_col + 5 * mi_width / 8 - 1] & + (1 << VERT)) { + keep_vert_4b = true; + break; + } + } + } + part_search_state->prune_partition_4a[VERT] |= !keep_vert_4a; + part_search_state->prune_partition_4b[VERT] |= !keep_vert_4b; + } +} + +#endif // CONFIG_UNEVEN_4WAY + +// Pruning logic for PARTITION_HORZ_3 and PARTITION_VERT_3. +static AOM_INLINE void prune_ext_partitions_3way( + AV1_COMP *const cpi, PC_TREE *pc_tree, + PartitionSearchState *part_search_state, bool *partition_boundaries) { + const AV1_COMMON *const cm = &cpi->common; + const PARTITION_SPEED_FEATURES *part_sf = &cpi->sf.part_sf; + const PARTITION_TYPE forced_partition = part_search_state->forced_partition; + if (part_search_state->forced_partition != PARTITION_INVALID) { + return; + } + + // Prune horz 3 with speed features + if (part_search_state->partition_3_allowed[HORZ] && + !frame_is_intra_only(cm) && forced_partition != PARTITION_HORZ_3) { + if (part_sf->prune_ext_part_with_part_none && + pc_tree->partitioning == PARTITION_NONE) { + // Prune if the best partition does not split + part_search_state->prune_partition_3[HORZ] = 1; + } + if (part_sf->prune_ext_part_with_part_rect) { + // Prune if the best partition is rect but the subtrees did not further + // split in horz + if (pc_tree->partitioning == PARTITION_HORZ && + !node_uses_horz(pc_tree->horizontal[0]) && + !node_uses_horz(pc_tree->horizontal[1])) { + part_search_state->prune_partition_3[HORZ] = 1; + } + if (pc_tree->partitioning == PARTITION_VERT && + !node_uses_horz(pc_tree->vertical[0]) && + !node_uses_horz(pc_tree->vertical[1])) { + part_search_state->prune_partition_3[HORZ] = 1; + } + } + } + + if (part_search_state->partition_3_allowed[VERT] && + !frame_is_intra_only(cm) && forced_partition != PARTITION_VERT_3) { + if (part_sf->prune_ext_part_with_part_none && + pc_tree->partitioning == PARTITION_NONE) { + // Prune if the best partition does not split + part_search_state->prune_partition_3[VERT] = 1; + } + if (part_sf->prune_ext_part_with_part_rect) { + // Prune if the best partition is rect but the subtrees did not further + // split in vert + if (pc_tree->partitioning == PARTITION_VERT && + !node_uses_vert(pc_tree->vertical[0]) && + !node_uses_vert(pc_tree->vertical[1])) { + part_search_state->prune_partition_3[VERT] = 1; + } + if (pc_tree->partitioning == PARTITION_HORZ && + !node_uses_vert(pc_tree->horizontal[0]) && + !node_uses_vert(pc_tree->horizontal[1])) { + part_search_state->prune_partition_3[VERT] = 1; + } + } + } + + const bool can_search_horz = part_search_state->partition_3_allowed[HORZ] && + !part_search_state->prune_partition_3[HORZ]; + const bool can_search_vert = part_search_state->partition_3_allowed[VERT] && + !part_search_state->prune_partition_3[VERT]; + const PartitionBlkParams *blk_params = &part_search_state->part_blk_params; + const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col, + bsize = blk_params->bsize; + if (part_sf->prune_part_h_with_partition_boundary && + (can_search_horz || can_search_vert) && + part_search_state->found_best_partition) { + if (!part_search_state->partition_boundaries) { + part_search_state->partition_boundaries = partition_boundaries; + trace_partition_boundary(partition_boundaries, pc_tree, mi_row, mi_col, + bsize); + } + prune_part_3_with_partition_boundary(part_search_state, bsize, mi_row, + mi_col, can_search_horz, + can_search_vert); + } +} + +#if CONFIG_UNEVEN_4WAY +// Pruning logic for PARTITION_HORZ_4A/B and PARTITION_VERT_4A/B. +static AOM_INLINE void prune_ext_partitions_4way( + AV1_COMP *const cpi, PC_TREE *pc_tree, + PartitionSearchState *part_search_state, bool *partition_boundaries) { + const AV1_COMMON *const cm = &cpi->common; + const PARTITION_SPEED_FEATURES *part_sf = &cpi->sf.part_sf; + const PARTITION_TYPE forced_partition = part_search_state->forced_partition; + if (part_search_state->partition_4a_allowed[HORZ] && + forced_partition != PARTITION_HORZ_4A) { + if (part_sf->prune_ext_part_with_part_none && + pc_tree->partitioning == PARTITION_NONE) { + // Prune if the best partition does not split + part_search_state->prune_partition_4a[HORZ] = 1; + } + if (part_sf->prune_ext_part_with_part_rect && + pc_tree->partitioning == PARTITION_HORZ && + !node_uses_horz(pc_tree->horizontal[0]) && + !node_uses_horz(pc_tree->horizontal[1])) { + // Prune if the best partition is horz but horz did not further split in + // horz + part_search_state->prune_partition_4a[HORZ] = 1; + } + if (part_sf->prune_part_4_with_part_3 && !frame_is_intra_only(cm) && + pc_tree->partitioning == PARTITION_HORZ_3 && + !node_uses_horz(pc_tree->horizontal3[0]) && + !node_uses_horz(pc_tree->horizontal3[3])) { + // Prune is best partition is horizontal H, but first and last + // subpartitions did not further split in horizontal direction. + part_search_state->prune_partition_4a[HORZ] = 1; + } + if (part_sf->prune_part_4_horz_or_vert && !frame_is_intra_only(cm) && + pc_tree->partitioning == PARTITION_VERT && + part_search_state->partition_rect_allowed[HORZ]) { + part_search_state->prune_partition_4a[HORZ] = 1; + } + } + + // Prune HORZ 4B with speed features + if (part_search_state->partition_4b_allowed[HORZ] && + forced_partition != PARTITION_HORZ_4B) { + if (part_sf->prune_ext_part_with_part_none && + pc_tree->partitioning == PARTITION_NONE) { + // Prune if the best partition does not split + part_search_state->prune_partition_4b[HORZ] = 1; + } + if (part_sf->prune_ext_part_with_part_rect && + pc_tree->partitioning == PARTITION_HORZ && + !node_uses_horz(pc_tree->horizontal[0]) && + !node_uses_horz(pc_tree->horizontal[1])) { + // Prune if the best partition is horz but horz did not further split in + // horz + part_search_state->prune_partition_4b[HORZ] = 1; + } + if (part_sf->prune_part_4_with_part_3 && !frame_is_intra_only(cm) && + pc_tree->partitioning == PARTITION_HORZ_3 && + !node_uses_horz(pc_tree->horizontal3[0]) && + !node_uses_horz(pc_tree->horizontal3[3])) { + // Prune is best partition is horizontal H, but first and last + // subpartitions did not further split in horizontal direction. + part_search_state->prune_partition_4b[HORZ] = 1; + } + if (part_sf->prune_part_4_horz_or_vert && !frame_is_intra_only(cm) && + pc_tree->partitioning == PARTITION_VERT && + part_search_state->partition_rect_allowed[HORZ]) { + part_search_state->prune_partition_4b[HORZ] = 1; + } + } + + // Prune VERT_4A with speed features + if (part_search_state->partition_4a_allowed[VERT] && + forced_partition != PARTITION_VERT_4A) { + if (part_sf->prune_ext_part_with_part_none && + pc_tree->partitioning == PARTITION_NONE) { + // Prune if the best partition does not split + part_search_state->prune_partition_4a[VERT] = 1; + } + if (part_sf->prune_ext_part_with_part_rect && + pc_tree->partitioning == PARTITION_VERT && + !node_uses_vert(pc_tree->vertical[0]) && + !node_uses_vert(pc_tree->vertical[1])) { + // Prune if the best partition is vert but vert did not further split in + // vert + part_search_state->prune_partition_4a[VERT] = 1; + } + if (part_sf->prune_part_4_with_part_3 && !frame_is_intra_only(cm) && + pc_tree->partitioning == PARTITION_VERT_3 && + !node_uses_vert(pc_tree->vertical3[0]) && + !node_uses_vert(pc_tree->vertical3[3])) { + // Prune is best partition is vertical H, but first and last + // subpartitions did not further split in vertical direction. + part_search_state->prune_partition_4a[VERT] = 1; + } + if (part_sf->prune_part_4_horz_or_vert && !frame_is_intra_only(cm) && + pc_tree->partitioning == PARTITION_HORZ && + part_search_state->partition_rect_allowed[VERT]) { + part_search_state->prune_partition_4a[VERT] = 1; + } + } + + // Prune VERT_4B with speed features + if (part_search_state->partition_4b_allowed[VERT] && + forced_partition != PARTITION_VERT_4B) { + if (part_sf->prune_ext_part_with_part_none && + pc_tree->partitioning == PARTITION_NONE) { + // Prune if the best partition does not split + part_search_state->prune_partition_4b[VERT] = 1; + } + if (part_sf->prune_ext_part_with_part_rect && + pc_tree->partitioning == PARTITION_VERT && + !node_uses_vert(pc_tree->vertical[0]) && + !node_uses_vert(pc_tree->vertical[1])) { + // Prune if the best partition is vert but vert did not further split in + // vert + part_search_state->prune_partition_4b[VERT] = 1; + } + if (part_sf->prune_part_4_with_part_3 && !frame_is_intra_only(cm) && + pc_tree->partitioning == PARTITION_VERT_3 && + !node_uses_vert(pc_tree->vertical3[0]) && + !node_uses_vert(pc_tree->vertical3[3])) { + // Prune is best partition is vertical H, but first and last + // subpartitions did not further split in vertical direction. + part_search_state->prune_partition_4b[VERT] = 1; + } + if (part_sf->prune_part_4_horz_or_vert && !frame_is_intra_only(cm) && + pc_tree->partitioning == PARTITION_HORZ && + part_search_state->partition_rect_allowed[VERT]) { + part_search_state->prune_partition_4b[VERT] = 1; + } + } + + const bool can_search_horz_4a = + part_search_state->partition_4a_allowed[HORZ] && + !part_search_state->prune_partition_4a[HORZ]; + const bool can_search_horz_4b = + part_search_state->partition_4b_allowed[HORZ] && + !part_search_state->prune_partition_4b[HORZ]; + const bool can_search_vert_4a = + part_search_state->partition_4a_allowed[VERT] && + !part_search_state->prune_partition_4a[VERT]; + const bool can_search_vert_4b = + part_search_state->partition_4b_allowed[VERT] && + !part_search_state->prune_partition_4b[VERT]; + const PartitionBlkParams *blk_params = &part_search_state->part_blk_params; + const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col, + bsize = blk_params->bsize; + if (part_sf->prune_part_4_with_partition_boundary && + (can_search_horz_4a || can_search_vert_4a || can_search_horz_4b || + can_search_vert_4b) && + part_search_state->found_best_partition) { + if (!part_search_state->partition_boundaries || + pc_tree->partitioning == PARTITION_HORZ_3 || + pc_tree->partitioning == PARTITION_VERT_3) { + part_search_state->partition_boundaries = partition_boundaries; + trace_partition_boundary(partition_boundaries, pc_tree, mi_row, mi_col, + bsize); + } + prune_part_4_with_partition_boundary( + part_search_state, partition_boundaries, bsize, mi_row, mi_col, + can_search_horz_4a, can_search_horz_4b, can_search_vert_4a, + can_search_vert_4b); + } +} + +static INLINE void search_partition_horz_4a( + PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td, + TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc, + PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma, + const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, + const PartitionSearchState *part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + LevelBanksRDO *level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) { + const AV1_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + const int num_planes = av1_num_planes(cm); + MACROBLOCKD *const xd = &x->e_mbd; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; + + const PartitionBlkParams *blk_params = &search_state->part_blk_params; + const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; + const BLOCK_SIZE bsize = blk_params->bsize; + + if (is_part_pruned_by_forced_partition(part_search_state, + PARTITION_HORZ_4A) || + !part_search_state->partition_4a_allowed[HORZ] || + part_search_state->prune_partition_4a[HORZ]) { + return; + } + + if (search_state->terminate_partition_search || !blk_params->has_rows || + !is_partition_valid(bsize, PARTITION_HORZ_4A) || + !(search_state->do_rectangular_split || + av1_active_h_edge(cpi, mi_row, blk_params->mi_step_h))) { + return; + } + + const int part_h4a_rate = search_state->partition_cost[PARTITION_HORZ_4A]; + if (part_h4a_rate == INT_MAX || + RDCOST(x->rdmult, part_h4a_rate, 0) >= best_rdc->rdcost) { + return; + } + RD_STATS sum_rdc; + av1_init_rd_stats(&sum_rdc); + const int eighth_step = mi_size_high[bsize] / 8; + + sum_rdc.rate = search_state->partition_cost[PARTITION_HORZ_4A]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); + + const BLOCK_SIZE sml_subsize = + get_partition_subsize(bsize, PARTITION_HORZ_4A); + const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE med_subsize = + get_partition_subsize(big_subsize, PARTITION_HORZ); + assert(sml_subsize == get_partition_subsize(med_subsize, PARTITION_HORZ)); + + const int cum_step_multipliers[4] = { 0, 1, 3, 7 }; + const BLOCK_SIZE subblock_sizes[4] = { sml_subsize, med_subsize, big_subsize, + sml_subsize }; + + for (int idx = 0; idx < 4; idx++) { + if (pc_tree->horizontal4a[idx]) { + av1_free_pc_tree_recursive(pc_tree->horizontal4a[idx], num_planes, 0, 0); + pc_tree->horizontal4a[idx] = NULL; + } + const int this_mi_row = mi_row + eighth_step * cum_step_multipliers[idx]; + pc_tree->horizontal4a[idx] = av1_alloc_pc_tree_node( + xd->tree_type, this_mi_row, mi_col, subblock_sizes[idx], pc_tree, + PARTITION_HORZ_4A, idx, idx == 3, ss_x, ss_y); + } + + bool skippable = true; + for (int i = 0; i < 4; ++i) { + const int this_mi_row = mi_row + eighth_step * cum_step_multipliers[i]; + + if (i > 0 && this_mi_row >= cm->mi_params.mi_rows) break; + + SUBBLOCK_RDO_DATA rdo_data = { pc_tree->horizontal4a[i], + get_partition_subtree_const(ptree_luma, i), + get_partition_subtree_const(template_tree, + i), + this_mi_row, + mi_col, + subblock_sizes[i], + PARTITION_HORZ_4A }; + if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc, + &sum_rdc, multi_pass_mode, &skippable, + max_recursion_depth)) { + av1_invalid_rd_stats(&sum_rdc); + break; + } + } + + av1_rd_cost_update(x->rdmult, &sum_rdc); + if (sum_rdc.rdcost < best_rdc->rdcost) { +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + *best_rdc = sum_rdc; + search_state->found_best_partition = true; + pc_tree->partitioning = PARTITION_HORZ_4A; + pc_tree->skippable = skippable; + } + + av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes); +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK +} + +static INLINE void search_partition_horz_4b( + PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td, + TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc, + PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma, + const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, + const PartitionSearchState *part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + LevelBanksRDO *level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) { + const AV1_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + const int num_planes = av1_num_planes(cm); + MACROBLOCKD *const xd = &x->e_mbd; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; + + const PartitionBlkParams *blk_params = &search_state->part_blk_params; + const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; + const BLOCK_SIZE bsize = blk_params->bsize; + + if (is_part_pruned_by_forced_partition(part_search_state, + PARTITION_HORZ_4B) || + !part_search_state->partition_4b_allowed[HORZ] || + part_search_state->prune_partition_4b[HORZ]) { + return; + } + + if (search_state->terminate_partition_search || !blk_params->has_rows || + !is_partition_valid(bsize, PARTITION_HORZ_4B) || + !(search_state->do_rectangular_split || + av1_active_h_edge(cpi, mi_row, blk_params->mi_step_h))) { + return; + } + + const int part_h4b_rate = search_state->partition_cost[PARTITION_HORZ_4B]; + if (part_h4b_rate == INT_MAX || + RDCOST(x->rdmult, part_h4b_rate, 0) >= best_rdc->rdcost) { + return; + } + RD_STATS sum_rdc; + av1_init_rd_stats(&sum_rdc); + const int eighth_step = mi_size_high[bsize] / 8; + + sum_rdc.rate = search_state->partition_cost[PARTITION_HORZ_4B]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); + + const BLOCK_SIZE sml_subsize = + get_partition_subsize(bsize, PARTITION_HORZ_4B); + const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_HORZ); + const BLOCK_SIZE med_subsize = + get_partition_subsize(big_subsize, PARTITION_HORZ); + assert(sml_subsize == get_partition_subsize(med_subsize, PARTITION_HORZ)); + + const int cum_step_multipliers[4] = { 0, 1, 5, 7 }; + const BLOCK_SIZE subblock_sizes[4] = { sml_subsize, big_subsize, med_subsize, + sml_subsize }; + + for (int idx = 0; idx < 4; idx++) { + if (pc_tree->horizontal4b[idx]) { + av1_free_pc_tree_recursive(pc_tree->horizontal4b[idx], num_planes, 0, 0); + pc_tree->horizontal4b[idx] = NULL; + } + const int this_mi_row = mi_row + eighth_step * cum_step_multipliers[idx]; + pc_tree->horizontal4b[idx] = av1_alloc_pc_tree_node( + xd->tree_type, this_mi_row, mi_col, subblock_sizes[idx], pc_tree, + PARTITION_HORZ_4B, idx, idx == 3, ss_x, ss_y); + } + + bool skippable = true; + for (int i = 0; i < 4; ++i) { + const int this_mi_row = mi_row + eighth_step * cum_step_multipliers[i]; + + if (i > 0 && this_mi_row >= cm->mi_params.mi_rows) break; + + SUBBLOCK_RDO_DATA rdo_data = { pc_tree->horizontal4b[i], + get_partition_subtree_const(ptree_luma, i), + get_partition_subtree_const(template_tree, + i), + this_mi_row, + mi_col, + subblock_sizes[i], + PARTITION_HORZ_4B }; + if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc, + &sum_rdc, multi_pass_mode, &skippable, + max_recursion_depth)) { + av1_invalid_rd_stats(&sum_rdc); + break; + } + } + + av1_rd_cost_update(x->rdmult, &sum_rdc); + if (sum_rdc.rdcost < best_rdc->rdcost) { +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + *best_rdc = sum_rdc; + search_state->found_best_partition = true; + pc_tree->partitioning = PARTITION_HORZ_4B; + pc_tree->skippable = skippable; + } + + av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes); +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK +} + +static INLINE void search_partition_vert_4a( + PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td, + TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc, + PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma, + const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, + const PartitionSearchState *part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + LevelBanksRDO *level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) { + const AV1_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + const int num_planes = av1_num_planes(cm); + MACROBLOCKD *const xd = &x->e_mbd; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; + + const PartitionBlkParams *blk_params = &search_state->part_blk_params; + const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; + const BLOCK_SIZE bsize = blk_params->bsize; + + if (is_part_pruned_by_forced_partition(part_search_state, + PARTITION_VERT_4A) || + !part_search_state->partition_4a_allowed[VERT] || + part_search_state->prune_partition_4a[VERT]) { + return; + } + + if (search_state->terminate_partition_search || !blk_params->has_cols || + !is_partition_valid(bsize, PARTITION_VERT_4A) || + !(search_state->do_rectangular_split || + av1_active_v_edge(cpi, mi_col, blk_params->mi_step_w))) { + return; + } + + const int part_v4a_rate = search_state->partition_cost[PARTITION_VERT_4A]; + if (part_v4a_rate == INT_MAX || + RDCOST(x->rdmult, part_v4a_rate, 0) >= best_rdc->rdcost) { + return; + } + RD_STATS sum_rdc; + av1_init_rd_stats(&sum_rdc); + const int eighth_step = mi_size_wide[bsize] / 8; + + sum_rdc.rate = search_state->partition_cost[PARTITION_VERT_4A]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); + + const BLOCK_SIZE sml_subsize = + get_partition_subsize(bsize, PARTITION_VERT_4A); + const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE med_subsize = + get_partition_subsize(big_subsize, PARTITION_VERT); + assert(sml_subsize == get_partition_subsize(med_subsize, PARTITION_VERT)); + + const int cum_step_multipliers[4] = { 0, 1, 3, 7 }; + const BLOCK_SIZE subblock_sizes[4] = { sml_subsize, med_subsize, big_subsize, + sml_subsize }; + + for (int idx = 0; idx < 4; idx++) { + if (pc_tree->vertical4a[idx]) { + av1_free_pc_tree_recursive(pc_tree->vertical4a[idx], num_planes, 0, 0); + pc_tree->vertical4a[idx] = NULL; + } + const int this_mi_col = mi_col + eighth_step * cum_step_multipliers[idx]; + pc_tree->vertical4a[idx] = av1_alloc_pc_tree_node( + xd->tree_type, mi_row, this_mi_col, subblock_sizes[idx], pc_tree, + PARTITION_VERT_4A, idx, idx == 3, ss_x, ss_y); + } + + bool skippable = true; + for (int i = 0; i < 4; ++i) { + const int this_mi_col = mi_col + eighth_step * cum_step_multipliers[i]; + + if (i > 0 && this_mi_col >= cm->mi_params.mi_cols) break; + + SUBBLOCK_RDO_DATA rdo_data = { pc_tree->vertical4a[i], + get_partition_subtree_const(ptree_luma, i), + get_partition_subtree_const(template_tree, + i), + mi_row, + this_mi_col, + subblock_sizes[i], + PARTITION_VERT_4A }; + if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc, + &sum_rdc, multi_pass_mode, &skippable, + max_recursion_depth)) { + av1_invalid_rd_stats(&sum_rdc); + break; + } + } + + av1_rd_cost_update(x->rdmult, &sum_rdc); + if (sum_rdc.rdcost < best_rdc->rdcost) { +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + *best_rdc = sum_rdc; + search_state->found_best_partition = true; + pc_tree->partitioning = PARTITION_VERT_4A; + pc_tree->skippable = skippable; + } + + av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes); +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK +} + +static INLINE void search_partition_vert_4b( + PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td, + TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc, + PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma, + const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, + const PartitionSearchState *part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + LevelBanksRDO *level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) { + const AV1_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + const int num_planes = av1_num_planes(cm); + MACROBLOCKD *const xd = &x->e_mbd; + const int ss_x = xd->plane[1].subsampling_x; + const int ss_y = xd->plane[1].subsampling_y; + + const PartitionBlkParams *blk_params = &search_state->part_blk_params; + const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; + const BLOCK_SIZE bsize = blk_params->bsize; + + if (is_part_pruned_by_forced_partition(part_search_state, + PARTITION_VERT_4B) || + !part_search_state->partition_4b_allowed[VERT] || + part_search_state->prune_partition_4b[VERT]) { + return; + } + + if (search_state->terminate_partition_search || !blk_params->has_cols || + !is_partition_valid(bsize, PARTITION_VERT_4B) || + !(search_state->do_rectangular_split || + av1_active_v_edge(cpi, mi_col, blk_params->mi_step_w))) { + return; + } + + const int part_v4b_rate = search_state->partition_cost[PARTITION_VERT_4B]; + if (part_v4b_rate == INT_MAX || + RDCOST(x->rdmult, part_v4b_rate, 0) >= best_rdc->rdcost) { + return; + } + RD_STATS sum_rdc; + av1_init_rd_stats(&sum_rdc); + const int eighth_step = mi_size_wide[bsize] / 8; + + sum_rdc.rate = search_state->partition_cost[PARTITION_VERT_4B]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); + + const BLOCK_SIZE sml_subsize = + get_partition_subsize(bsize, PARTITION_VERT_4B); + const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_VERT); + const BLOCK_SIZE med_subsize = + get_partition_subsize(big_subsize, PARTITION_VERT); + assert(sml_subsize == get_partition_subsize(med_subsize, PARTITION_VERT)); + + const int cum_step_multipliers[4] = { 0, 1, 5, 7 }; + const BLOCK_SIZE subblock_sizes[4] = { sml_subsize, big_subsize, med_subsize, + sml_subsize }; + + for (int idx = 0; idx < 4; idx++) { + if (pc_tree->vertical4b[idx]) { + av1_free_pc_tree_recursive(pc_tree->vertical4b[idx], num_planes, 0, 0); + pc_tree->vertical4b[idx] = NULL; + } + const int this_mi_col = mi_col + eighth_step * cum_step_multipliers[idx]; + pc_tree->vertical4b[idx] = av1_alloc_pc_tree_node( + xd->tree_type, mi_row, this_mi_col, subblock_sizes[idx], pc_tree, + PARTITION_VERT_4B, idx, idx == 3, ss_x, ss_y); + } + + bool skippable = true; + for (int i = 0; i < 4; ++i) { + const int this_mi_col = mi_col + eighth_step * cum_step_multipliers[i]; + + if (i > 0 && this_mi_col >= cm->mi_params.mi_cols) break; + + SUBBLOCK_RDO_DATA rdo_data = { pc_tree->vertical4b[i], + get_partition_subtree_const(ptree_luma, i), + get_partition_subtree_const(template_tree, + i), + mi_row, + this_mi_col, + subblock_sizes[i], + PARTITION_VERT_4B }; + if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc, + &sum_rdc, multi_pass_mode, &skippable, + max_recursion_depth)) { + av1_invalid_rd_stats(&sum_rdc); + break; + } + } + + av1_rd_cost_update(x->rdmult, &sum_rdc); + if (sum_rdc.rdcost < best_rdc->rdcost) { +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + *best_rdc = sum_rdc; + search_state->found_best_partition = true; + pc_tree->partitioning = PARTITION_VERT_4B; + pc_tree->skippable = skippable; + } + + av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes); +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK +} +#endif // CONFIG_UNEVEN_4WAY + /*!\brief Performs rdopt on PARTITION_HORZ_3. */ static INLINE void search_partition_horz_3( PartitionSearchState *search_state, AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc, PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma, const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, -#if CONFIG_C043_MVP_IMPROVEMENTS - REF_MV_BANK *best_level_bank, -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - WARP_PARAM_BANK *best_level_warp_bank, -#endif // WARP_CU_BANK + const PartitionSearchState *part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + LevelBanksRDO *level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) { const AV1_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; const int num_planes = av1_num_planes(cm); -#if CONFIG_EXT_RECUR_PARTITIONS MACROBLOCKD *const xd = &x->e_mbd; const int ss_x = xd->plane[1].subsampling_x; const int ss_y = xd->plane[1].subsampling_y; -#endif // CONFIG_EXT_RECUR_PARTITIONS const PartitionBlkParams *blk_params = &search_state->part_blk_params; const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; const BLOCK_SIZE bsize = blk_params->bsize; + if (is_part_pruned_by_forced_partition(part_search_state, PARTITION_HORZ_3) || + !part_search_state->partition_3_allowed[HORZ] || + part_search_state->prune_partition_3[HORZ]) { + return; + } + if (search_state->terminate_partition_search || !blk_params->has_rows || !is_partition_valid(bsize, PARTITION_HORZ_3) || !(search_state->do_rectangular_split || av1_active_h_edge(cpi, mi_row, blk_params->mi_step_h))) { return; } -#if CONFIG_H_PARTITION // TODO(yuec): set default partition modes for the edge directly by ruling out // h partitions from the syntax if the 2nd middle block is not in the frame. if (mi_col + (mi_size_wide[bsize] >> 1) >= cm->mi_params.mi_cols) return; -#endif // CONFIG_H_PARTITION const int part_h3_rate = search_state->partition_cost[PARTITION_HORZ_3]; if (part_h3_rate == INT_MAX || @@ -4913,7 +6305,6 @@ sum_rdc.rate = search_state->partition_cost[PARTITION_HORZ_3]; sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); -#if CONFIG_H_PARTITION const BLOCK_SIZE sml_subsize = get_h_partition_subsize(bsize, 0, PARTITION_HORZ_3); const BLOCK_SIZE big_subsize = @@ -4930,59 +6321,26 @@ } pc_tree->horizontal3[idx] = av1_alloc_pc_tree_node( - mi_row + offset_mr[idx], mi_col + offset_mc[idx], subblock_sizes[idx], - pc_tree, PARTITION_HORZ_3, idx, idx == 3, ss_x, ss_y); + xd->tree_type, mi_row + offset_mr[idx], mi_col + offset_mc[idx], + subblock_sizes[idx], pc_tree, PARTITION_HORZ_3, idx, idx == 3, ss_x, + ss_y); } -#else // CONFIG_H_PARTITION - const BLOCK_SIZE sml_subsize = get_partition_subsize(bsize, PARTITION_HORZ_3); - const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_HORZ); - const int step_multipliers[3] = { 0, 1, 2 }; - const BLOCK_SIZE subblock_sizes[3] = { sml_subsize, big_subsize, - sml_subsize }; - - for (int idx = 0; idx < 3; idx++) { - if (pc_tree->horizontal3[idx]) { - av1_free_pc_tree_recursive(pc_tree->horizontal3[idx], num_planes, 0, 0); - pc_tree->horizontal3[idx] = NULL; - } - } - pc_tree->horizontal3[0] = - av1_alloc_pc_tree_node(mi_row, mi_col, subblock_sizes[0], pc_tree, - PARTITION_HORZ_3, 0, 0, ss_x, ss_y); - pc_tree->horizontal3[1] = - av1_alloc_pc_tree_node(mi_row + quarter_step, mi_col, subblock_sizes[1], - pc_tree, PARTITION_HORZ_3, 1, 0, ss_x, ss_y); - pc_tree->horizontal3[2] = av1_alloc_pc_tree_node( - mi_row + quarter_step * 3, mi_col, subblock_sizes[2], pc_tree, - PARTITION_HORZ_3, 2, 1, ss_x, ss_y); -#endif // CONFIG_H_PARTITION bool skippable = true; -#if CONFIG_H_PARTITION for (int i = 0; i < 4; ++i) { const int this_mi_row = mi_row + offset_mr[i]; const int this_mi_col = mi_col + offset_mc[i]; -#else // CONFIG_H_PARTITION - int this_mi_row = mi_row; - for (int i = 0; i < 3; ++i) { - this_mi_row += quarter_step * step_multipliers[i]; -#endif // CONFIG_H_PARTITION if (i > 0 && this_mi_row >= cm->mi_params.mi_rows) break; - SUBBLOCK_RDO_DATA rdo_data = { - pc_tree->horizontal3[i], - get_partition_subtree_const(ptree_luma, i), - get_partition_subtree_const(template_tree, i), - this_mi_row, -#if CONFIG_H_PARTITION - this_mi_col, -#else - mi_col, -#endif // CONFIG_H_PARTITION - subblock_sizes[i], - PARTITION_HORZ_3 - }; + SUBBLOCK_RDO_DATA rdo_data = { pc_tree->horizontal3[i], + get_partition_subtree_const(ptree_luma, i), + get_partition_subtree_const(template_tree, + i), + this_mi_row, + this_mi_col, + subblock_sizes[i], + PARTITION_HORZ_3 }; if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc, &sum_rdc, multi_pass_mode, &skippable, max_recursion_depth)) { @@ -4993,12 +6351,9 @@ av1_rd_cost_update(x->rdmult, &sum_rdc); if (sum_rdc.rdcost < best_rdc->rdcost) { -#if CONFIG_C043_MVP_IMPROVEMENTS - *best_level_bank = x->e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - *best_level_warp_bank = x->e_mbd.warp_param_bank; -#endif // WARP_CU_BANK +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK *best_rdc = sum_rdc; search_state->found_best_partition = true; pc_tree->partitioning = PARTITION_HORZ_3; @@ -5006,6 +6361,9 @@ } av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes); +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK } /*!\brief Performs rdopt on PARTITION_VERT_3. */ @@ -5014,37 +6372,37 @@ TileDataEnc *tile_data, TokenExtra **tp, RD_STATS *best_rdc, PC_TREE *pc_tree, const PARTITION_TREE *ptree_luma, const PARTITION_TREE *template_tree, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx, -#if CONFIG_C043_MVP_IMPROVEMENTS - REF_MV_BANK *best_level_bank, -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - WARP_PARAM_BANK *best_level_warp_bank, -#endif // WARP_CU_BANK + const PartitionSearchState *part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + LevelBanksRDO *level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK SB_MULTI_PASS_MODE multi_pass_mode, int max_recursion_depth) { const AV1_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; const int num_planes = av1_num_planes(cm); -#if CONFIG_EXT_RECUR_PARTITIONS MACROBLOCKD *const xd = &x->e_mbd; const int ss_x = xd->plane[1].subsampling_x; const int ss_y = xd->plane[1].subsampling_y; -#endif // CONFIG_EXT_RECUR_PARTITIONS const PartitionBlkParams *blk_params = &search_state->part_blk_params; const int mi_row = blk_params->mi_row, mi_col = blk_params->mi_col; const BLOCK_SIZE bsize = blk_params->bsize; + if (is_part_pruned_by_forced_partition(part_search_state, PARTITION_VERT_3) || + !part_search_state->partition_3_allowed[VERT] || + part_search_state->prune_partition_3[VERT]) { + return; + } + if (search_state->terminate_partition_search || !blk_params->has_cols || !is_partition_valid(bsize, PARTITION_VERT_3) || !(search_state->do_rectangular_split || av1_active_v_edge(cpi, mi_col, blk_params->mi_step_w))) { return; } -#if CONFIG_H_PARTITION // TODO(yuec): set default partition modes for the edge directly by ruling out // h partitions from the syntax if the 2nd middle block is not in the frame. if (mi_row + (mi_size_high[bsize] >> 1) >= cm->mi_params.mi_rows) return; -#endif // CONFIG_H_PARTITION const int part_v3_rate = search_state->partition_cost[PARTITION_VERT_3]; if (part_v3_rate == INT_MAX || @@ -5059,7 +6417,6 @@ sum_rdc.rate = search_state->partition_cost[PARTITION_VERT_3]; sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); -#if CONFIG_H_PARTITION const BLOCK_SIZE sml_subsize = get_h_partition_subsize(bsize, 0, PARTITION_VERT_3); const BLOCK_SIZE big_subsize = @@ -5076,59 +6433,26 @@ } pc_tree->vertical3[idx] = av1_alloc_pc_tree_node( - mi_row + offset_mr[idx], mi_col + offset_mc[idx], subblock_sizes[idx], - pc_tree, PARTITION_VERT_3, idx, idx == 3, ss_x, ss_y); + xd->tree_type, mi_row + offset_mr[idx], mi_col + offset_mc[idx], + subblock_sizes[idx], pc_tree, PARTITION_VERT_3, idx, idx == 3, ss_x, + ss_y); } -#else - const BLOCK_SIZE sml_subsize = get_partition_subsize(bsize, PARTITION_VERT_3); - const BLOCK_SIZE big_subsize = get_partition_subsize(bsize, PARTITION_VERT); - const int step_multipliers[3] = { 0, 1, 2 }; - const BLOCK_SIZE subblock_sizes[3] = { sml_subsize, big_subsize, - sml_subsize }; - - for (int idx = 0; idx < 3; idx++) { - if (pc_tree->vertical3[idx]) { - av1_free_pc_tree_recursive(pc_tree->vertical3[idx], num_planes, 0, 0); - pc_tree->vertical3[idx] = NULL; - } - } - pc_tree->vertical3[0] = - av1_alloc_pc_tree_node(mi_row, mi_col, subblock_sizes[0], pc_tree, - PARTITION_VERT_3, 0, 0, ss_x, ss_y); - pc_tree->vertical3[1] = - av1_alloc_pc_tree_node(mi_row, mi_col + quarter_step, subblock_sizes[1], - pc_tree, PARTITION_VERT_3, 1, 0, ss_x, ss_y); - pc_tree->vertical3[2] = av1_alloc_pc_tree_node( - mi_row, mi_col + quarter_step * 3, subblock_sizes[2], pc_tree, - PARTITION_VERT_3, 2, 1, ss_x, ss_y); -#endif // CONFIG_H_PARTITION bool skippable = true; -#if CONFIG_H_PARTITION for (int i = 0; i < 4; ++i) { const int this_mi_row = mi_row + offset_mr[i]; const int this_mi_col = mi_col + offset_mc[i]; -#else - int this_mi_col = mi_col; - for (int i = 0; i < 3; ++i) { - this_mi_col += quarter_step * step_multipliers[i]; -#endif // CONFIG_H_PARTITION if (i > 0 && this_mi_col >= cm->mi_params.mi_cols) break; - SUBBLOCK_RDO_DATA rdo_data = { - pc_tree->vertical3[i], - get_partition_subtree_const(ptree_luma, i), - get_partition_subtree_const(template_tree, i), -#if CONFIG_H_PARTITION - this_mi_row, -#else // CONFIG_H_PARTITION - mi_row, -#endif // CONFIG_H_PARTITION - this_mi_col, - subblock_sizes[i], - PARTITION_VERT_3 - }; + SUBBLOCK_RDO_DATA rdo_data = { pc_tree->vertical3[i], + get_partition_subtree_const(ptree_luma, i), + get_partition_subtree_const(template_tree, + i), + this_mi_row, + this_mi_col, + subblock_sizes[i], + PARTITION_VERT_3 }; if (!rd_try_subblock_new(cpi, td, tile_data, tp, &rdo_data, *best_rdc, &sum_rdc, multi_pass_mode, &skippable, max_recursion_depth)) { @@ -5139,19 +6463,20 @@ av1_rd_cost_update(x->rdmult, &sum_rdc); if (sum_rdc.rdcost < best_rdc->rdcost) { -#if CONFIG_C043_MVP_IMPROVEMENTS - *best_level_bank = x->e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - *best_level_warp_bank = x->e_mbd.warp_param_bank; -#endif // WARP_CU_BANK +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + update_best_level_banks(level_banks, &x->e_mbd); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK *best_rdc = sum_rdc; search_state->found_best_partition = true; pc_tree->partitioning = PARTITION_VERT_3; pc_tree->skippable = skippable; } av1_restore_context(cm, x, x_ctx, mi_row, mi_col, bsize, num_planes); +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + restore_level_banks(&x->e_mbd, level_banks); +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK } + #endif // CONFIG_EXT_RECUR_PARTITIONS static AOM_INLINE int get_partition_depth(const PC_TREE *pc_tree, @@ -5194,12 +6519,144 @@ get_partition_depth(pc_tree->vertical3[idx], curr_depth + 1)); } break; +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + for (int idx = 0; idx < 4; idx++) { + max_depth = AOMMAX( + max_depth, + get_partition_depth(pc_tree->horizontal4a[idx], curr_depth + 1)); + } + break; + case PARTITION_HORZ_4B: + for (int idx = 0; idx < 4; idx++) { + max_depth = AOMMAX( + max_depth, + get_partition_depth(pc_tree->horizontal4b[idx], curr_depth + 1)); + } + break; + case PARTITION_VERT_4A: + for (int idx = 0; idx < 4; idx++) { + max_depth = AOMMAX( + max_depth, + get_partition_depth(pc_tree->vertical4a[idx], curr_depth + 1)); + } + break; + case PARTITION_VERT_4B: + for (int idx = 0; idx < 4; idx++) { + max_depth = AOMMAX( + max_depth, + get_partition_depth(pc_tree->vertical4b[idx], curr_depth + 1)); + } + break; +#endif // CONFIG_UNEVEN_4WAY default: assert(0); break; } return max_depth; } #if CONFIG_EXT_RECUR_PARTITIONS +static AOM_INLINE bool try_none_after_rect( + const MACROBLOCKD *xd, const CommonModeInfoParams *mi_params, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + if (!is_partition_point(bsize)) { + return false; + } + const int tree_idx = av1_get_sdp_idx(xd->tree_type); + // This speed feature is not applicable if either the above or left block is + // unavailable. + if (tree_idx == 0 && !(xd->up_available && xd->left_available)) { + return false; + } + if (tree_idx == 1 && + !(xd->chroma_up_available && xd->chroma_left_available)) { + return false; + } + // Scan for the maximum and minimum dimension of the above and left blocks. + const int mi_stride = xd->mi_stride; + int min_left_dim_log2 = INT_MAX, min_above_dim_log2 = INT_MAX; + int max_left_dim_log2 = 0, max_above_dim_log2 = 0; + const int mi_height = + AOMMIN(mi_size_high[bsize], mi_params->mi_rows - mi_row); + const int mi_width = AOMMIN(mi_size_wide[bsize], mi_params->mi_cols - mi_col); + for (int row = 0; row < mi_height;) { + const MB_MODE_INFO *mi = xd->mi[row * mi_stride - 1]; + const BLOCK_SIZE left_bsize = mi->sb_type[tree_idx]; + + min_left_dim_log2 = + AOMMIN(min_left_dim_log2, mi_size_high_log2[left_bsize]); + max_left_dim_log2 = + AOMMAX(max_left_dim_log2, mi_size_high_log2[left_bsize]); + const int row_step = + tree_idx == 0 + ? mi_size_high[left_bsize] - AOMMAX(mi_row - mi->mi_row_start, 0) + : mi_size_high[left_bsize] - + AOMMAX(mi_row - mi->chroma_mi_row_start, 0); + row += row_step; + assert(row_step > 0); + } + for (int col = 0; col < mi_width;) { + const MB_MODE_INFO *mi = xd->mi[-1 * mi_stride + col]; + const BLOCK_SIZE above_bsize = mi->sb_type[tree_idx]; + + min_above_dim_log2 = + AOMMIN(min_above_dim_log2, mi_size_wide_log2[above_bsize]); + max_above_dim_log2 = + AOMMAX(max_above_dim_log2, mi_size_wide_log2[above_bsize]); + const int col_step = + tree_idx == 0 + ? mi_size_wide[above_bsize] - AOMMAX(mi_col - mi->mi_col_start, 0) + : mi_size_wide[above_bsize] - + AOMMAX(mi_col - mi->chroma_mi_col_start, 0); + col += col_step; + assert(col_step > 0); + } + // Delay the search for partition none if the above width and left height + // are not bigger than the current block dimension AND at least one of the + // dimensions if smaller than the current block by a factor of 4. + if ((mi_size_high_log2[bsize] > max_left_dim_log2 + 1 && + mi_size_wide_log2[bsize] >= min_above_dim_log2) || + (mi_size_wide_log2[bsize] > max_above_dim_log2 + 1 && + mi_size_high_log2[bsize] >= min_left_dim_log2)) { + return true; + } + return false; +} + +/*!\brief Prune PARTITION_NONE search if rect partitions split deeper. + */ +static AOM_INLINE void prune_none_with_rect_results( + PartitionSearchState *part_search_state, const PC_TREE *pc_tree) { + if (!part_search_state->found_best_partition) { + return; + } + + const PARTITION_TYPE cur_best_partition = pc_tree->partitioning; + PC_TREE *const *tree = NULL; + int num_sub_parts = 0; + if (cur_best_partition == PARTITION_SPLIT) { + tree = pc_tree->split; + num_sub_parts = SUB_PARTITIONS_SPLIT; + } else if (cur_best_partition == PARTITION_HORZ) { + tree = pc_tree->horizontal; + num_sub_parts = NUM_RECT_PARTS; + } else if (cur_best_partition == PARTITION_VERT) { + tree = pc_tree->vertical; + num_sub_parts = NUM_RECT_PARTS; + } else { + assert(0 && + "Unexpected best partition type in prune_none_with_rect_results."); + } + // Give up on PARTITION_NONE if either of the subtrees decided to split + // further. + for (int idx = 0; idx < num_sub_parts; idx++) { + if (!tree[idx]) { + break; + } + part_search_state->prune_partition_none |= + tree[idx]->partitioning != PARTITION_NONE; + } +} + /*!\brief AV1 block partition search (full search). * * \ingroup partition_search @@ -5309,14 +6766,12 @@ // Initialization of state variables used in partition search. init_partition_search_state_params(x, cpi, &part_search_state, #if CONFIG_EXT_RECUR_PARTITIONS - pc_tree, + pc_tree, ptree_luma, template_tree, + max_recursion_depth, #endif // CONFIG_EXT_RECUR_PARTITIONS mi_row, mi_col, bsize); PartitionBlkParams blk_params = part_search_state.part_blk_params; #if CONFIG_EXT_RECUR_PARTITIONS - PARTITION_TYPE forced_partition = - get_forced_partition_type(cm, x, mi_row, mi_col, bsize, template_tree, - ptree_luma, &pc_tree->chroma_ref_info); if (sms_tree != NULL) #endif // CONFIG_EXT_RECUR_PARTITIONS sms_tree->partitioning = PARTITION_NONE; @@ -5334,11 +6789,12 @@ if (counterpart_block->rd_cost.rate != INT_MAX) { av1_copy_pc_tree_recursive(cm, pc_tree, counterpart_block, part_search_state.ss_x, part_search_state.ss_y, - &td->shared_coeff_buf, num_planes); + &td->shared_coeff_buf, xd->tree_type, + num_planes); *rd_cost = pc_tree->rd_cost; -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT x->e_mbd.ref_mv_bank = counterpart_block->ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK x->e_mbd.warp_param_bank = counterpart_block->warp_param_bank; #endif // WARP_CU_BANK @@ -5377,14 +6833,12 @@ #endif #endif +#if !CONFIG_EXT_RECUR_PARTITIONS // Override partition costs at the edges of the frame in the same // way as in read_partition (see decodeframe.c). if (!(blk_params.has_rows && blk_params.has_cols)) - set_partition_cost_for_edge_blk(cm, xd, -#if CONFIG_EXT_RECUR_PARTITIONS - &pc_tree->chroma_ref_info, -#endif // CONFIG_EXT_RECUR_PARTITIONS - &part_search_state); + set_partition_cost_for_edge_blk(cm, xd, &part_search_state); +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Disable rectangular partitions for inner blocks when the current block is // forced to only use square partitions. @@ -5412,6 +6866,17 @@ av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize, &pc_tree->chroma_ref_info); + bool search_none_after_rect = false; +#if CONFIG_EXT_RECUR_PARTITIONS + if (part_search_state.forced_partition == PARTITION_INVALID) { + if (cpi->sf.part_sf.adaptive_partition_search_order) { + search_none_after_rect = + try_none_after_rect(xd, &cm->mi_params, bsize, mi_row, mi_col); + } + search_none_after_rect |= bsize == BLOCK_256X256; + } +#endif // CONFIG_EXT_RECUR_PARTITIONS + // Save rdmult before it might be changed, so it can be restored later. const int orig_rdmult = x->rdmult; setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL); @@ -5428,6 +6893,16 @@ xd->left_txfm_context = xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); +#if CONFIG_MVP_IMPROVEMENT + LevelBanksRDO level_banks = { + x->e_mbd.ref_mv_bank, /* curr_level_bank*/ + x->e_mbd.ref_mv_bank, /* best_level_bank*/ +#if WARP_CU_BANK + x->e_mbd.warp_param_bank, /* curr_level_warp_bank*/ + x->e_mbd.warp_param_bank, /* best_level_warp_bank*/ +#endif // WARP_CU_BANK + }; +#endif // CONFIG_MVP_IMPROVEMENT #if CONFIG_EXT_RECUR_PARTITIONS { SimpleMotionData *sms_data = @@ -5439,11 +6914,11 @@ int *partition_horz_allowed = &part_search_state.partition_rect_allowed[HORZ]; int *partition_vert_allowed = &part_search_state.partition_rect_allowed[VERT]; #if CONFIG_EXT_RECUR_PARTITIONS - if (forced_partition == PARTITION_INVALID && + if (part_search_state.forced_partition == PARTITION_INVALID && is_bsize_gt(bsize, x->sb_enc.min_partition_size)) { #endif // CONFIG_EXT_RECUR_PARTITIONS - int *prune_horz = &part_search_state.prune_rect_part[HORZ]; - int *prune_vert = &part_search_state.prune_rect_part[VERT]; + bool *prune_horz = &part_search_state.prune_rect_part[HORZ]; + bool *prune_vert = &part_search_state.prune_rect_part[VERT]; #if CONFIG_EXT_RECUR_PARTITIONS int do_square_split = true; int *sqr_split_ptr = &do_square_split; @@ -5458,9 +6933,9 @@ partition_vert_allowed, &part_search_state.do_rectangular_split, sqr_split_ptr, prune_horz, prune_vert, pc_tree); #if CONFIG_EXT_RECUR_PARTITIONS - forced_partition = get_forced_partition_type( + part_search_state.forced_partition = get_forced_partition_type( cm, x, blk_params.mi_row, blk_params.mi_col, blk_params.bsize, - template_tree, ptree_luma, &pc_tree->chroma_ref_info); + ptree_luma, template_tree, &pc_tree->chroma_ref_info); } #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -5476,34 +6951,7 @@ #endif int luma_split_flag = 0; -#if CONFIG_EXT_RECUR_PARTITIONS - int horz_3_allowed_sdp = 1; - int vert_3_allowed_sdp = 1; - if (is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize)) { - PARTITION_TYPE derived_partition_mode = sdp_chroma_part_from_luma( - bsize, ptree_luma->partition, part_search_state.ss_x, - part_search_state.ss_y); - - if (derived_partition_mode != PARTITION_NONE) - part_search_state.partition_none_allowed = BLOCK_INVALID; - if (derived_partition_mode != PARTITION_HORZ) - part_search_state.partition_rect_allowed[HORZ] = 0; - if (derived_partition_mode != PARTITION_VERT) - part_search_state.partition_rect_allowed[VERT] = 0; - if (derived_partition_mode != PARTITION_HORZ_3) horz_3_allowed_sdp = 0; - if (derived_partition_mode != PARTITION_VERT_3) vert_3_allowed_sdp = 0; - - // TODO(yuec): Need to make sure there is at least one valid partition - // mode - assert(IMPLIES( - is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize), - forced_partition == PARTITION_INVALID || - forced_partition == sdp_chroma_part_from_luma( - bsize, ptree_luma->partition, - cpi->common.seq_params.subsampling_x, - cpi->common.seq_params.subsampling_x))); - } -#else // CONFIG_EXT_RECUR_PARTITIONS +#if !CONFIG_EXT_RECUR_PARTITIONS const CommonModeInfoParams *const mi_params = &cm->mi_params; const int parent_block_width = block_size_wide[bsize]; if (xd->tree_type == CHROMA_PART && parent_block_width >= SHARED_PART_SIZE) { @@ -5515,7 +6963,7 @@ part_search_state.partition_rect_allowed[HORZ] = 0; part_search_state.partition_rect_allowed[VERT] = 0; } -#endif // CONFIG_EXT_RECUR_PARTITIONS +#endif // !CONFIG_EXT_RECUR_PARTITIONS // Partition search BEGIN_PARTITION_SEARCH: @@ -5525,8 +6973,7 @@ if (x->must_find_valid_partition) { #if CONFIG_EXT_RECUR_PARTITIONS init_allowed_partitions(&part_search_state, &cpi->oxcf.part_cfg, - &pc_tree->chroma_ref_info, &cm->mi_params, - xd->tree_type); + &pc_tree->chroma_ref_info, xd->tree_type); #else reset_part_limitations(cpi, &part_search_state); #endif // CONFIG_EXT_RECUR_PARTITIONS @@ -5537,41 +6984,21 @@ // PARTITION_NONE search stage. int64_t part_none_rd = INT64_MAX; -#if CONFIG_C043_MVP_IMPROVEMENTS - REF_MV_BANK curr_level_bank = x->e_mbd.ref_mv_bank; - REF_MV_BANK best_level_bank = x->e_mbd.ref_mv_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - WARP_PARAM_BANK curr_level_warp_bank = x->e_mbd.warp_param_bank; - WARP_PARAM_BANK best_level_warp_bank = x->e_mbd.warp_param_bank; -#endif // WARP_CU_BANK -#if CONFIG_EXT_RECUR_PARTITIONS - if (IS_FORCED_PARTITION_TYPE(PARTITION_NONE) && - (forced_partition == PARTITION_NONE || bsize != BLOCK_256X256)) { -#endif // CONFIG_EXT_RECUR_PARTITIONS + if (!search_none_after_rect) { none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx, &part_search_state, &best_rdc, &pb_source_variance, none_rd, &part_none_rd -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - &best_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - &best_level_warp_bank -#endif // WARP_CU_BANK + &level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ); -#if CONFIG_C043_MVP_IMPROVEMENTS - x->e_mbd.ref_mv_bank = curr_level_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - x->e_mbd.warp_param_bank = curr_level_warp_bank; -#endif // WARP_CU_BANK -#if CONFIG_EXT_RECUR_PARTITIONS } +#if CONFIG_EXT_RECUR_PARTITIONS if (cpi->sf.part_sf.end_part_search_after_consec_failures && x->is_whole_sb && - !frame_is_intra_only(cm) && forced_partition == PARTITION_INVALID && + !frame_is_intra_only(cm) && + part_search_state.forced_partition == PARTITION_INVALID && pc_tree->parent && pc_tree->parent->parent) { if (pc_tree->none_rd.rate == INT_MAX && pc_tree->parent->none_rd.rate == INT_MAX && @@ -5585,33 +7012,20 @@ // PARTITION_SPLIT search stage. int64_t part_split_rd = INT64_MAX; - if (IS_FORCED_PARTITION_TYPE(PARTITION_SPLIT) && max_recursion_depth > 0 && - !frame_is_intra_only(cm)) { - split_partition_search(cpi, td, tile_data, tp, x, pc_tree, sms_tree, &x_ctx, - &part_search_state, &best_rdc, multi_pass_mode, - &part_split_rd -#if CONFIG_C043_MVP_IMPROVEMENTS - , - &best_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - &best_level_warp_bank -#endif // WARP_CU_BANK + split_partition_search(cpi, td, tile_data, tp, x, pc_tree, sms_tree, &x_ctx, + &part_search_state, &best_rdc, multi_pass_mode, + &part_split_rd +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + , + &level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK #if CONFIG_EXT_RECUR_PARTITIONS - , - ptree_luma, template_tree, max_recursion_depth - 1 + , + ptree_luma, template_tree, max_recursion_depth - 1 #endif // CONFIG_EXT_RECUR_PARTITIONS - ); - } -#if CONFIG_C043_MVP_IMPROVEMENTS - x->e_mbd.ref_mv_bank = curr_level_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - x->e_mbd.warp_param_bank = curr_level_warp_bank; -#endif // WARP_CU_BANK -#if !CONFIG_EXT_RECUR_PARTITIONS + ); +#if !CONFIG_EXT_RECUR_PARTITIONS // Terminate partition search for child partition, // when NONE and SPLIT partition rd_costs are INT64_MAX. if (cpi->sf.part_sf.early_term_after_none_split && @@ -5626,15 +7040,18 @@ #endif // !CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_EXT_RECUR_PARTITIONS bool prune_none = false; - if (forced_partition == PARTITION_INVALID && bsize == BLOCK_256X256) { + if (part_search_state.forced_partition == PARTITION_INVALID && + bsize == BLOCK_256X256) { + assert(pc_tree->partitioning == PARTITION_SPLIT); for (int idx = 0; idx < 4; idx++) { const int depth = get_partition_depth(pc_tree->split[idx], 0); prune_none |= depth > 0; } } if (cpi->sf.part_sf.prune_rect_with_split_depth && !frame_is_intra_only(cm) && - forced_partition == PARTITION_INVALID && pc_tree->split[0] && - pc_tree->split[1] && pc_tree->split[2] && pc_tree->split[3]) { + part_search_state.forced_partition == PARTITION_INVALID && + pc_tree->split[0] && pc_tree->split[1] && pc_tree->split[2] && + pc_tree->split[3]) { int min_depth = INT_MAX, max_depth = 0; for (int idx = 0; idx < 4; idx++) { const int depth = get_partition_depth(pc_tree->split[idx], 0); @@ -5649,52 +7066,35 @@ } #endif // CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_EXT_RECUR_PARTITIONS - if (forced_partition == PARTITION_INVALID && bsize == BLOCK_256X256 && - !prune_none) { + bool none_searched = false; + if (part_search_state.forced_partition == PARTITION_INVALID && + bsize == BLOCK_256X256 && !prune_none) { #endif // CONFIG_EXT_RECUR_PARTITIONS none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx, &part_search_state, &best_rdc, &pb_source_variance, none_rd, &part_none_rd -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - &best_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - &best_level_warp_bank -#endif // WARP_CU_BANK + &level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ); -#if CONFIG_C043_MVP_IMPROVEMENTS - x->e_mbd.ref_mv_bank = curr_level_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - x->e_mbd.warp_param_bank = curr_level_warp_bank; -#endif // WARP_CU_BANK #if CONFIG_EXT_RECUR_PARTITIONS + none_searched = true; } #endif // CONFIG_EXT_RECUR_PARTITIONS // Rectangular partitions search stage. + rectangular_partition_search( + cpi, td, tile_data, tp, x, pc_tree, &x_ctx, &part_search_state, &best_rdc, #if CONFIG_EXT_RECUR_PARTITIONS - if (max_recursion_depth > 0) { + multi_pass_mode, ptree_luma, template_tree, max_recursion_depth - 1, #endif // CONFIG_EXT_RECUR_PARTITIONS - rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx, - &part_search_state, &best_rdc, -#if CONFIG_EXT_RECUR_PARTITIONS - multi_pass_mode, ptree_luma, template_tree, - max_recursion_depth - 1, -#endif // CONFIG_EXT_RECUR_PARTITIONS - rect_part_win_info, -#if CONFIG_C043_MVP_IMPROVEMENTS - &best_level_bank, &curr_level_bank, -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - &best_level_warp_bank, &curr_level_warp_bank, -#endif // WARP_CU_BANK - part_none_rd); -#if CONFIG_EXT_RECUR_PARTITIONS - } -#endif // CONFIG_EXT_RECUR_PARTITIONS + rect_part_win_info, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + &level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + part_none_rd); + if (pb_source_variance == UINT_MAX) { av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, NULL); pb_source_variance = av1_high_get_sby_perpixel_variance( @@ -5703,6 +7103,19 @@ assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions, !part_search_state.do_rectangular_split)); +#if CONFIG_EXT_RECUR_PARTITIONS + if (search_none_after_rect && !none_searched) { + prune_none_with_rect_results(&part_search_state, pc_tree); + none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx, + &part_search_state, &best_rdc, &pb_source_variance, + none_rd, &part_none_rd +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + , + &level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + ); + } +#endif // CONFIG_EXT_RECUR_PARTITIONS #if !CONFIG_EXT_RECUR_PARTITIONS const int ext_partition_allowed = @@ -5714,14 +7127,10 @@ ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, &part_search_state, &best_rdc, rect_part_win_info, pb_source_variance, ext_partition_allowed -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - &best_level_bank, &curr_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - &best_level_warp_bank, &curr_level_warp_bank -#endif // WARP_CU_BANK + &level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ); // 4-way partitions search stage. @@ -5754,14 +7163,10 @@ rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, pc_tree->horizontal4, &part_search_state, &best_rdc, inc_step, PARTITION_HORZ_4 -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - &best_level_bank, &curr_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - &best_level_warp_bank, &curr_level_warp_bank -#endif // WARP_CU_BANK + &level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ); } @@ -5778,132 +7183,88 @@ rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree, pc_tree->vertical4, &part_search_state, &best_rdc, inc_step, PARTITION_VERT_4 -#if CONFIG_C043_MVP_IMPROVEMENTS +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK , - &best_level_bank, &curr_level_bank -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - , - &best_level_warp_bank, &curr_level_warp_bank -#endif // WARP_CU_BANK + &level_banks +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK ); } #endif // !CONFIG_EXT_RECUR_PARTITIONS #if CONFIG_EXT_RECUR_PARTITIONS - const int ext_partition_allowed = !is_partition_implied_at_boundary( - &cm->mi_params, xd->tree_type, part_search_state.ss_x, - part_search_state.ss_y, mi_row, mi_col, bsize, &pc_tree->chroma_ref_info, - NULL); - const int partition_3_allowed = ext_partition_allowed && - max_recursion_depth > 0 && - cpi->oxcf.part_cfg.enable_ext_partitions; - const int is_wide_block = block_size_wide[bsize] > block_size_high[bsize]; - const int is_tall_block = block_size_wide[bsize] < block_size_high[bsize]; - const PARTITION_SPEED_FEATURES *part_sf = &cpi->sf.part_sf; - - int horz_3_allowed = - partition_3_allowed && !is_wide_block && horz_3_allowed_sdp && - check_is_chroma_size_valid(xd->tree_type, PARTITION_HORZ_3, bsize, mi_row, - mi_col, part_search_state.ss_x, - part_search_state.ss_y, - &pc_tree->chroma_ref_info) && - is_bsize_geq(get_partition_subsize(bsize, PARTITION_HORZ_3), - blk_params.min_partition_size); - // Prune horz 3 with speed features - if (horz_3_allowed && !frame_is_intra_only(cm) && - forced_partition != PARTITION_HORZ_3) { - if (part_sf->prune_part_3_with_part_none && - pc_tree->partitioning == PARTITION_NONE) { - // Prune if the best partition does not split - horz_3_allowed = 0; - } - if (part_sf->prune_part_3_with_part_rect && - pc_tree->partitioning == PARTITION_HORZ && - !node_uses_horz(pc_tree->horizontal[0]) && - !node_uses_horz(pc_tree->horizontal[1])) { - // Prune if the best partition is horz but horz did not further split in - // horz - horz_3_allowed = 0; - } - } - - int vert_3_allowed = - partition_3_allowed && !is_tall_block && vert_3_allowed_sdp && - check_is_chroma_size_valid(xd->tree_type, PARTITION_VERT_3, bsize, mi_row, - mi_col, part_search_state.ss_x, - part_search_state.ss_y, - &pc_tree->chroma_ref_info) && - is_bsize_geq(get_partition_subsize(bsize, PARTITION_VERT_3), - blk_params.min_partition_size); - - if (vert_3_allowed && !frame_is_intra_only(cm) && - forced_partition != PARTITION_VERT_3) { - if (part_sf->prune_part_3_with_part_none && - pc_tree->partitioning == PARTITION_NONE) { - // Prune if the best partition does not split - vert_3_allowed = 0; - } - if (part_sf->prune_part_3_with_part_rect && - pc_tree->partitioning == PARTITION_VERT && - !node_uses_vert(pc_tree->vertical[0]) && - !node_uses_vert(pc_tree->vertical[1])) { - // Prune if the best partition is vert but vert did not further split in - // vert - vert_3_allowed = 0; - } - } + bool partition_boundaries[MAX_MIB_SQUARE] = { 0 }; + prune_ext_partitions_3way(cpi, pc_tree, &part_search_state, + partition_boundaries); const int ext_recur_depth = AOMMIN(max_recursion_depth - 1, cpi->sf.part_sf.ext_recur_depth); + const bool track_ptree_luma = + is_luma_chroma_share_same_partition(xd->tree_type, ptree_luma, bsize); // PARTITION_HORZ_3 - if (IS_FORCED_PARTITION_TYPE(PARTITION_HORZ_3) && horz_3_allowed) { - search_partition_horz_3( - &part_search_state, cpi, td, tile_data, tp, &best_rdc, pc_tree, - (ptree_luma && ptree_luma->partition == PARTITION_HORZ_3) ? ptree_luma - : NULL, - - template_tree, &x_ctx, -#if CONFIG_C043_MVP_IMPROVEMENTS - &best_level_bank, -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - &best_level_warp_bank, -#endif // WARP_CU_BANK - multi_pass_mode, ext_recur_depth); -#if CONFIG_C043_MVP_IMPROVEMENTS - x->e_mbd.ref_mv_bank = curr_level_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - x->e_mbd.warp_param_bank = curr_level_warp_bank; -#endif // WARP_CU_BANK - } + search_partition_horz_3(&part_search_state, cpi, td, tile_data, tp, &best_rdc, + pc_tree, track_ptree_luma ? ptree_luma : NULL, + template_tree, &x_ctx, &part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + &level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + multi_pass_mode, ext_recur_depth); // PARTITION_VERT_3 - if (IS_FORCED_PARTITION_TYPE(PARTITION_VERT_3) && vert_3_allowed) { - search_partition_vert_3( - &part_search_state, cpi, td, tile_data, tp, &best_rdc, pc_tree, - (ptree_luma && ptree_luma->partition == PARTITION_VERT_3) ? ptree_luma - : NULL, - - template_tree, &x_ctx, -#if CONFIG_C043_MVP_IMPROVEMENTS - &best_level_bank, -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - &best_level_warp_bank, -#endif // WARP_CU_BANK - multi_pass_mode, ext_recur_depth); -#if CONFIG_C043_MVP_IMPROVEMENTS - x->e_mbd.ref_mv_bank = curr_level_bank; -#endif // CONFIG_C043_MVP_IMPROVEMENTS -#if WARP_CU_BANK - x->e_mbd.warp_param_bank = curr_level_warp_bank; -#endif // WARP_CU_BANK - } + search_partition_vert_3(&part_search_state, cpi, td, tile_data, tp, &best_rdc, + pc_tree, track_ptree_luma ? ptree_luma : NULL, + template_tree, &x_ctx, &part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + &level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + multi_pass_mode, ext_recur_depth); #endif // CONFIG_EXT_RECUR_PARTITIONS +#if CONFIG_UNEVEN_4WAY + prune_ext_partitions_4way(cpi, pc_tree, &part_search_state, + partition_boundaries); + + // PARTITION_HORZ_4A + search_partition_horz_4a(&part_search_state, cpi, td, tile_data, tp, + &best_rdc, pc_tree, + track_ptree_luma ? ptree_luma : NULL, template_tree, + &x_ctx, &part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + &level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + multi_pass_mode, ext_recur_depth); + + // PARTITION_HORZ_4B + search_partition_horz_4b(&part_search_state, cpi, td, tile_data, tp, + &best_rdc, pc_tree, + track_ptree_luma ? ptree_luma : NULL, template_tree, + &x_ctx, &part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + &level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + multi_pass_mode, ext_recur_depth); + + // PARTITION_VERT_4A + search_partition_vert_4a(&part_search_state, cpi, td, tile_data, tp, + &best_rdc, pc_tree, + track_ptree_luma ? ptree_luma : NULL, template_tree, + &x_ctx, &part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + &level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + multi_pass_mode, ext_recur_depth); + + // PARTITION_VERT_4B + search_partition_vert_4b(&part_search_state, cpi, td, tile_data, tp, + &best_rdc, pc_tree, + track_ptree_luma ? ptree_luma : NULL, template_tree, + &x_ctx, &part_search_state, +#if CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + &level_banks, +#endif // CONFIG_MVP_IMPROVEMENT || WARP_CU_BANK + multi_pass_mode, ext_recur_depth); +#endif // CONFIG_UNEVEN_4WAY + if (bsize == cm->sb_size && !part_search_state.found_best_partition) { if (x->must_find_valid_partition) { aom_internal_error( @@ -5924,23 +7285,27 @@ pc_tree->partitioning != template_tree->partition) { assert(0); printf("Mismatch with template at fr: %d, mi: (%d, %d), BLOCK_%dX%d\n", - cm->current_frame.order_hint, mi_row, mi_col, block_size_wide[bsize], - block_size_high[bsize]); +#if CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + cm->current_frame.display_order_hint, +#else + cm->current_frame.order_hint, +#endif // CONFIG_EXPLICIT_TEMPORAL_DIST_CALC + mi_row, mi_col, block_size_wide[bsize], block_size_high[bsize]); } #endif // CONFIG_EXT_RECUR_PARTITIONS && !defined(NDEBUG) // Store the final rd cost *rd_cost = best_rdc; -#if CONFIG_C043_MVP_IMPROVEMENTS - x->e_mbd.ref_mv_bank = best_level_bank; +#if CONFIG_MVP_IMPROVEMENT + x->e_mbd.ref_mv_bank = level_banks.best_level_bank; #if CONFIG_EXT_RECUR_PARTITIONS - pc_tree->ref_mv_bank = best_level_bank; + pc_tree->ref_mv_bank = level_banks.best_level_bank; #endif // CONFIG_EXT_RECUR_PARTITIONS -#endif // CONFIG_C043_MVP_IMPROVEMENTS +#endif // CONFIG_MVP_IMPROVEMENT #if WARP_CU_BANK - x->e_mbd.warp_param_bank = best_level_warp_bank; + x->e_mbd.warp_param_bank = level_banks.best_level_warp_bank; #if CONFIG_EXT_RECUR_PARTITIONS - pc_tree->warp_param_bank = best_level_warp_bank; + pc_tree->warp_param_bank = level_banks.best_level_warp_bank; #endif // CONFIG_EXT_RECUR_PARTITIONS #endif // WARP_CU_BANK pc_tree->rd_cost = best_rdc;
diff --git a/av1/encoder/partition_strategy.c b/av1/encoder/partition_strategy.c index 89fb2d7..cc8ea99 100644 --- a/av1/encoder/partition_strategy.c +++ b/av1/encoder/partition_strategy.c
@@ -253,6 +253,9 @@ int *partition_horz_allowed, int *partition_vert_allowed, int *do_rectangular_split, int *do_square_split) { aom_clear_system_state(); + (void)partition_horz_allowed; + (void)partition_vert_allowed; + (void)do_rectangular_split; const AV1_COMMON *const cm = &cpi->common; const int bsize_idx = convert_bsize_to_idx(bsize); @@ -511,7 +514,11 @@ void av1_simple_motion_search_prune_rect( AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, int mi_row, int mi_col, BLOCK_SIZE bsize, int partition_horz_allowed, - int partition_vert_allowed, int *prune_horz, int *prune_vert) { + int partition_vert_allowed, bool *prune_horz, bool *prune_vert) { + // TODO(urvang): Need to change for CONFIG_UNEVEN_4WAY. +#if CONFIG_UNEVEN_4WAY + assert(0 && "Not implemented"); +#endif // CONFIG_UNEVEN_4WAY aom_clear_system_state(); const AV1_COMMON *const cm = &cpi->common; const int bsize_idx = convert_bsize_to_idx(bsize); @@ -924,8 +931,8 @@ void av1_ml_prune_rect_partition(const AV1_COMP *const cpi, const MACROBLOCK *const x, BLOCK_SIZE bsize, int64_t best_rd, int64_t none_rd, - int64_t *split_rd, int *const dst_prune_horz, - int *const dst_prune_vert) { + int64_t *split_rd, bool *const dst_prune_horz, + bool *const dst_prune_vert) { if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; best_rd = AOMMAX(best_rd, 1); const NN_CONFIG *nn_config = NULL; @@ -1091,6 +1098,7 @@ } } +#if !CONFIG_EXT_RECUR_PARTITIONS #define FEATURES 18 #define LABELS 4 // Use a ML model to predict if horz4 and vert4 should be considered. @@ -1229,6 +1237,8 @@ #undef FEATURES #undef LABELS +#endif // !CONFIG_EXT_RECUR_PARTITIONS + #define FEATURES 4 int av1_ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize, const MACROBLOCK *const x, @@ -1300,7 +1310,7 @@ BLOCK_SIZE bsize, SIMPLE_MOTION_DATA_TREE *const sms_tree, int *partition_none_allowed, int *partition_horz_allowed, int *partition_vert_allowed, int *do_rectangular_split, - int *do_square_split, int *prune_horz, int *prune_vert, + int *do_square_split, bool *prune_horz, bool *prune_vert, const PC_TREE *pc_tree) { const AV1_COMMON *const cm = &cpi->common; const CommonModeInfoParams *const mi_params = &cm->mi_params; @@ -1342,27 +1352,15 @@ do_square_split); #if CONFIG_EXT_RECUR_PARTITIONS if (!*partition_none_allowed) { - if (!pc_tree->parent || pc_tree != pc_tree->parent->horizontal3[1]) { - av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, bsize, - cm->sb_size, PARTITION_HORZ); - const int mi_step = block_size_high[bsize] / 2; - BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); - av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, subsize, - cm->sb_size, PARTITION_VERT); - av1_cache_best_partition(x->sms_bufs, mi_row + mi_step, mi_col, subsize, - cm->sb_size, PARTITION_VERT); - } else if (pc_tree != pc_tree->parent->vertical[1]) { - av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, bsize, - cm->sb_size, PARTITION_VERT); - const int mi_step = block_size_wide[bsize] / 2; - BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT); - av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, subsize, - cm->sb_size, PARTITION_HORZ); - av1_cache_best_partition(x->sms_bufs, mi_row, mi_col + mi_step, subsize, - cm->sb_size, PARTITION_HORZ); - } + av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, bsize, cm->sb_size, + PARTITION_HORZ); + const int mi_step = block_size_high[bsize] / 2; + BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ); + av1_cache_best_partition(x->sms_bufs, mi_row, mi_col, subsize, + cm->sb_size, PARTITION_VERT); + av1_cache_best_partition(x->sms_bufs, mi_row + mi_step, mi_col, subsize, + cm->sb_size, PARTITION_VERT); } -#else (void)pc_tree; #endif // CONFIG_EXT_RECUR_PARTITIONS } @@ -1631,6 +1629,24 @@ } // Gets the linear index corresponds to the current block. + +#if CONFIG_UNEVEN_4WAY +static INLINE int get_sms_arr_1d_idx(int mi_bsize, int mi_in_sb) { + int idx = -1; + if (mi_bsize <= 2) { + idx = mi_in_sb; + } else if (mi_bsize <= 8) { + assert(mi_in_sb % (mi_bsize / 4) == 0); + idx = mi_in_sb / (mi_bsize / 4); + } else { + assert(mi_in_sb % (mi_bsize / 2) == 0); + idx = mi_in_sb / (mi_bsize / 2); + } + assert(idx >= 0 && idx < get_sms_count_from_length(mi_bsize)); + + return idx; +} +#else static INLINE int get_sms_arr_1d_idx(int mi_bsize, int mi_in_sb) { int idx = -1; if (mi_bsize == 1) { @@ -1643,6 +1659,7 @@ return idx; } +#endif // CONFIG_UNEVEN_4WAY #define MAKE_SMS_ARR_SWITCH_CASE(width, height) \ case BLOCK_##width##X##height: { \ @@ -1817,42 +1834,59 @@ SimpleMotionDataBufs *sms_bufs, int mi_row, int mi_col, BLOCK_SIZE bsize, BLOCK_SIZE sb_size, PARTITION_TYPE partition, MV start_mv) { assert(bsize < BLOCK_SIZES_ALL); - const int quarter_step_h = block_size_high[bsize] / 4; - const int quarter_step_w = block_size_wide[bsize] / 4; + const int eighth_step_h = block_size_high[bsize] / 8; + const int eighth_step_w = block_size_wide[bsize] / 8; static const int subblock_count[ALL_PARTITION_TYPES] = { 1, // PARTITION_NONE 2, // PARTITION_HORZ 2, // PARTITION_VERT -#if CONFIG_H_PARTITION 4, // PARTITION_HORZ_3 4, // PARTITION_VERT_3 -#else - 3, // PARTITION_HORZ_3 - 3, // PARTITION_VERT_3 -#endif // CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + 4, // PARTITION_HORZ_4A + 4, // PARTITION_HORZ_4B + 4, // PARTITION_VERT_4A + 4, // PARTITION_VERT_4B +#endif // CONFIG_UNEVEN_4WAY 4, // PARTITION_SPLIT }; // PARTITION x NUM_SUBBLOCKS x (ROW and COL) static const int step_multiplier[ALL_PARTITION_TYPES][4][2] = { { { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } }, // PARTITION_NONE - { { 0, 0 }, { 2, 0 }, { 0, 0 }, { 0, 0 } }, // PARTITION_HORZ - { { 0, 0 }, { 0, 2 }, { 0, 0 }, { 0, 0 } }, // PARTITION_VERT -#if CONFIG_H_PARTITION - { { 0, 0 }, { 1, 0 }, { 1, 2 }, { 3, 0 } }, // PARTITION_HORZ_3 - { { 0, 0 }, { 0, 1 }, { 2, 1 }, { 0, 3 } }, // PARTITION_VERT_3 -#else - { { 0, 0 }, { 1, 0 }, { 3, 0 }, { 0, 0 } }, // PARTITION_HORZ_3 - { { 0, 0 }, { 0, 1 }, { 0, 3 }, { 0, 0 } }, // PARTITION_VERT_3 -#endif // CONFIG_H_PARTITION - { { 0, 0 }, { 0, 2 }, { 2, 0 }, { 2, 2 } }, // PARTITION_SPLIT + { { 0, 0 }, { 4, 0 }, { 0, 0 }, { 0, 0 } }, // PARTITION_HORZ + { { 0, 0 }, { 0, 4 }, { 0, 0 }, { 0, 0 } }, // PARTITION_VERT + { { 0, 0 }, { 2, 0 }, { 2, 4 }, { 6, 0 } }, // PARTITION_HORZ_3 + { { 0, 0 }, { 0, 2 }, { 4, 2 }, { 0, 6 } }, // PARTITION_VERT_3 +#if CONFIG_UNEVEN_4WAY + { { 0, 0 }, { 1, 0 }, { 3, 0 }, { 7, 0 } }, // PARTITION_HORZ_4A + { { 0, 0 }, { 1, 0 }, { 5, 0 }, { 7, 0 } }, // PARTITION_HORZ_4B + { { 0, 0 }, { 0, 1 }, { 0, 3 }, { 0, 7 } }, // PARTITION_VERT_4A + { { 0, 0 }, { 0, 1 }, { 0, 5 }, { 0, 7 } }, // PARTITION_VERT_4B +#endif // CONFIG_UNEVEN_4WAY + { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } }, // PARTITION_SPLIT }; + // Sizes of subblocks. const BLOCK_SIZE part_subsize = get_partition_subsize(bsize, partition); if (part_subsize == BLOCK_INVALID) return; BLOCK_SIZE subsizes[4] = { part_subsize, part_subsize, part_subsize, part_subsize }; -#if CONFIG_H_PARTITION +#if CONFIG_UNEVEN_4WAY + if (partition == PARTITION_HORZ_4A) { + subsizes[2] = get_partition_subsize(bsize, PARTITION_HORZ); + subsizes[1] = get_partition_subsize(subsizes[2], PARTITION_HORZ); + } else if (partition == PARTITION_HORZ_4B) { + subsizes[1] = get_partition_subsize(bsize, PARTITION_HORZ); + subsizes[2] = get_partition_subsize(subsizes[1], PARTITION_HORZ); + } else if (partition == PARTITION_VERT_4A) { + subsizes[2] = get_partition_subsize(bsize, PARTITION_VERT); + subsizes[1] = get_partition_subsize(subsizes[2], PARTITION_VERT); + } else if (partition == PARTITION_VERT_4B) { + subsizes[1] = get_partition_subsize(bsize, PARTITION_VERT); + subsizes[2] = get_partition_subsize(subsizes[1], PARTITION_VERT); + } +#endif // CONFIG_UNEVEN_4WAY if (partition == PARTITION_HORZ_3) { subsizes[1] = get_h_partition_subsize(sb_size, 1, PARTITION_HORZ_3); subsizes[2] = get_h_partition_subsize(sb_size, 2, PARTITION_HORZ_3); @@ -1860,12 +1894,12 @@ subsizes[1] = get_h_partition_subsize(sb_size, 1, PARTITION_VERT_3); subsizes[2] = get_h_partition_subsize(sb_size, 2, PARTITION_VERT_3); } -#endif // CONFIG_H_PARTITION + for (int idx = 0; idx < subblock_count[partition]; idx++) { const int sub_row = - mi_row + step_multiplier[partition][idx][0] * quarter_step_h / 4; + mi_row + step_multiplier[partition][idx][0] * eighth_step_h / 4; const int sub_col = - mi_col + step_multiplier[partition][idx][1] * quarter_step_w / 4; + mi_col + step_multiplier[partition][idx][1] * eighth_step_w / 4; SimpleMotionData *subblock = av1_get_sms_data_entry( sms_bufs, sub_row, sub_col, subsizes[idx], sb_size); add_start_mv_to_block(subblock, start_mv); @@ -1989,15 +2023,10 @@ // Whether we are in the middle of a PARTITION_3 subblock const PC_TREE *parent = pc_tree->parent; -#if CONFIG_H_PARTITION ml_features[num_features++] = parent && (parent->horizontal3[1] == pc_tree || parent->horizontal3[2] == pc_tree); ml_features[num_features++] = parent && (parent->vertical3[1] == pc_tree || parent->vertical3[2] == pc_tree); -#else - ml_features[num_features++] = parent && parent->horizontal3[1] == pc_tree; - ml_features[num_features++] = parent && parent->vertical3[1] == pc_tree; -#endif // CONFIG_H_PARTITION assert(num_features == 19); } #endif // CONFIG_EXT_RECUR_PARTITIONS
diff --git a/av1/encoder/partition_strategy.h b/av1/encoder/partition_strategy.h index 4c59fb9..63fc974 100644 --- a/av1/encoder/partition_strategy.h +++ b/av1/encoder/partition_strategy.h
@@ -87,7 +87,7 @@ void av1_simple_motion_search_prune_rect( AV1_COMP *const cpi, MACROBLOCK *x, SIMPLE_MOTION_DATA_TREE *sms_tree, int mi_row, int mi_col, BLOCK_SIZE bsize, int partition_horz_allowed, - int partition_vert_allowed, int *prune_horz, int *prune_vert); + int partition_vert_allowed, bool *prune_horz, bool *prune_vert); // Early terminates PARTITION_NONE using simple_motion_search features and the // rate, distortion, and rdcost of PARTITION_NONE. This is only called when: @@ -130,8 +130,8 @@ void av1_ml_prune_rect_partition(const AV1_COMP *const cpi, const MACROBLOCK *const x, BLOCK_SIZE bsize, int64_t best_rd, int64_t none_rd, - int64_t *split_rd, int *const dst_prune_horz, - int *const dst_prune_vert); + int64_t *split_rd, bool *const dst_prune_horz, + bool *const dst_prune_vert); // Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be // considered. @@ -165,7 +165,7 @@ BLOCK_SIZE bsize, SIMPLE_MOTION_DATA_TREE *const sms_tree, int *partition_none_allowed, int *partition_horz_allowed, int *partition_vert_allowed, int *do_rectangular_split, - int *do_square_split, int *prune_horz, int *prune_vert, + int *do_square_split, bool *prune_horz, bool *prune_vert, const PC_TREE *pc_tree); // Prune out partitions that lead to coding block sizes outside the min and max
diff --git a/av1/encoder/pickccso.c b/av1/encoder/pickccso.c index 2653a09..1dd7be9 100644 --- a/av1/encoder/pickccso.c +++ b/av1/encoder/pickccso.c
@@ -281,12 +281,13 @@ return ssd; } /* Compute SSE */ -void compute_distortion(const uint16_t *org, const int org_stride, - const uint16_t *rec16, const int rec_stride, - const int log2_filter_unit_size, const int height, - const int width, uint64_t *distortion_buf, - const int distortion_buf_stride, - uint64_t *total_distortion) { +static void compute_distortion(const uint16_t *org, const int org_stride, + const uint16_t *rec16, const int rec_stride, + const int log2_filter_unit_size, + const int height, const int width, + uint64_t *distortion_buf, + const int distortion_buf_stride, + uint64_t *total_distortion) { for (int y = 0; y < height; y += (1 << log2_filter_unit_size)) { for (int x = 0; x < width; x += (1 << log2_filter_unit_size)) { const uint64_t ssd =
diff --git a/av1/encoder/pickccso.h b/av1/encoder/pickccso.h index 725311d..d455980 100644 --- a/av1/encoder/pickccso.h +++ b/av1/encoder/pickccso.h
@@ -46,13 +46,6 @@ const uint8_t shift_bits); #endif -void compute_distortion(const uint16_t *org, const int org_stride, - const uint16_t *rec16, const int rec_stride, - const int log2_filter_unit_size, const int height, - const int width, uint64_t *distortion_buf, - const int distortion_buf_stride, - uint64_t *total_distortion); - void derive_ccso_filter(AV1_COMMON *cm, const int plane, MACROBLOCKD *xd, const uint16_t *org_uv, const uint16_t *ext_rec_y, const uint16_t *rec_uv, int rdmult);
diff --git a/av1/encoder/picklpf.c b/av1/encoder/picklpf.c index 113bf17..452ab8d 100644 --- a/av1/encoder/picklpf.c +++ b/av1/encoder/picklpf.c
@@ -321,6 +321,17 @@ cpi->td.mb.rdmult = cpi->rd.RDMULT; + double no_deblocking_cost[MAX_MB_PLANE] = { DBL_MAX, DBL_MAX, DBL_MAX }; + + for (int i = 0; i < num_planes; i++) { + const int chroma_lambda_mult = i ? CHROMA_LAMBDA_MULT : 1; + const int64_t no_deblocking_sse = + aom_get_sse_plane(cpi->source, &cm->cur_frame->buf, i); + no_deblocking_cost[i] = RDCOST_DBL_WITH_NATIVE_BD_DIST( + cpi->td.mb.rdmult * chroma_lambda_mult, 0, no_deblocking_sse, + cm->seq_params.bit_depth); + } + if (method == LPF_PICK_MINIMAL_LPF) { lf->filter_level[0] = 0; lf->filter_level[1] = 0; @@ -416,7 +427,12 @@ last_frame_offsets[2] = lf->delta_q_luma[1] = lf->delta_side_luma[1]; #endif // DF_TWO_PARAM - if (best_single_cost < best_dual_cost) { + if (no_deblocking_cost[0] < AOMMIN(best_single_cost, best_dual_cost)) { + lf->filter_level[0] = 0; + lf->filter_level[1] = 0; + lf->delta_q_luma[0] = lf->delta_side_luma[0] = lf->delta_q_luma[1] = + lf->delta_side_luma[1] = 0; + } else if (best_single_cost < best_dual_cost) { lf->delta_q_luma[0] = last_frame_offsets[0] = best_single_offsets[0]; lf->delta_side_luma[0] = last_frame_offsets[1] = best_single_offsets[1]; lf->delta_q_luma[1] = last_frame_offsets[2] = best_single_offsets[2]; @@ -424,10 +440,12 @@ } if (num_planes > 1) { + double best_cost_u = DBL_MAX; + double best_cost_v = DBL_MAX; // Cb last_frame_offsets[5] = lf->delta_side_u = search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, - last_frame_offsets, NULL, 1, 1, dir); + last_frame_offsets, &best_cost_u, 1, 1, dir); #if DF_TWO_PARAM last_frame_offsets[4] = lf->delta_q_u = search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, @@ -438,7 +456,7 @@ last_frame_offsets[5] = lf->delta_side_u = search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, - last_frame_offsets, NULL, 1, 1, dir); + last_frame_offsets, &best_cost_u, 1, 1, dir); #if DF_TWO_PARAM last_frame_offsets[4] = lf->delta_q_u = search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, @@ -446,10 +464,16 @@ #else last_frame_offsets[4] = lf->delta_q_u = lf->delta_side_u; #endif // DF_TWO_PARAM + + if (no_deblocking_cost[1] < best_cost_u) { + lf->filter_level_u = 0; + lf->delta_q_u = lf->delta_side_u = 0; + } + // Cr last_frame_offsets[7] = lf->delta_side_v = search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, - last_frame_offsets, NULL, 2, 1, dir); + last_frame_offsets, &best_cost_v, 2, 1, dir); #if DF_TWO_PARAM last_frame_offsets[6] = lf->delta_q_v = search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, @@ -459,7 +483,7 @@ #endif // DF_TWO_PARAM last_frame_offsets[7] = lf->delta_side_v = search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, - last_frame_offsets, NULL, 2, 1, dir); + last_frame_offsets, &best_cost_v, 2, 1, dir); #if DF_TWO_PARAM last_frame_offsets[6] = lf->delta_q_v = search_filter_offsets(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, @@ -468,6 +492,11 @@ last_frame_offsets[6] = lf->delta_q_v = lf->delta_side_v; #endif // DF_TWO_PARAM + if (no_deblocking_cost[2] < best_cost_v) { + lf->filter_level_v = 0; + lf->delta_q_v = lf->delta_side_v = 0; + } + // to switch off filters if offsets are zero if (!df_quant_from_qindex(cm->quant_params.base_qindex + cm->lf.delta_q_luma[0] * DF_DELTA_SCALE,
diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c index 2e7c780..5c49b56 100644 --- a/av1/encoder/pickrst.c +++ b/av1/encoder/pickrst.c
@@ -180,6 +180,10 @@ Vector *unit_indices; #endif // CONFIG_LR_MERGE_COEFFS +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + // To indicate whether it's encoder process for cross-component wiener filter + bool is_cross_filter_round; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER AV1PixelRect tile_rect; } RestSearchCtxt; @@ -224,12 +228,17 @@ #endif // CONFIG_LR_MERGE_COEFFS static AOM_INLINE void reset_all_banks(RestSearchCtxt *rsc) { - av1_reset_wiener_bank(&rsc->wiener_bank); + av1_reset_wiener_bank(&rsc->wiener_bank, rsc->plane != AOM_PLANE_Y); av1_reset_sgrproj_bank(&rsc->sgrproj_bank); #if CONFIG_WIENER_NONSEP av1_reset_wienerns_bank(&rsc->wienerns_bank, rsc->cm->quant_params.base_qindex, - rsc->num_filter_classes, rsc->plane != AOM_PLANE_Y); + rsc->num_filter_classes, rsc->plane != AOM_PLANE_Y +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + rsc->is_cross_filter_round +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ); #endif // CONFIG_WIENER_NONSEP } @@ -302,13 +311,34 @@ // TODO(yunqing): For now, only use optimized LR filter in decoder. Can be // also used in encoder. const int optimized_lr = 0; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (rsc->is_cross_filter_round) { + // copy the pre-filtered data to dst buffer, this implementation could be + // improved + int unit_h = limits->v_end - limits->v_start; + int unit_w = limits->h_end - limits->h_start; + uint16_t *data_tl = fts->buffers[plane] + + limits->v_start * fts->strides[is_uv] + limits->h_start; + uint16_t *dst_tl = rsc->dst->buffers[plane] + + limits->v_start * rsc->dst->strides[is_uv] + + limits->h_start; + copy_tile(unit_w, unit_h, data_tl, fts->strides[is_uv], dst_tl, + rsc->dst->strides[is_uv]); - av1_loop_restoration_filter_unit( - limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0, - is_uv && cm->seq_params.subsampling_x, - is_uv && cm->seq_params.subsampling_y, bit_depth, fts->buffers[plane], - fts->strides[is_uv], rsc->dst->buffers[plane], rsc->dst->strides[is_uv], - cm->rst_tmpbuf, optimized_lr); + av1_wiener_ns_cross_filter_unit( + limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0, + is_uv && cm->seq_params.subsampling_x, + is_uv && cm->seq_params.subsampling_y, bit_depth, fts->buffers[plane], + fts->strides[is_uv], rsc->dst->buffers[plane], rsc->dst->strides[is_uv], + cm->rst_tmpbuf, optimized_lr); + } else +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + av1_loop_restoration_filter_unit( + limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0, + is_uv && cm->seq_params.subsampling_x, + is_uv && cm->seq_params.subsampling_y, bit_depth, fts->buffers[plane], + fts->strides[is_uv], rsc->dst->buffers[plane], rsc->dst->strides[is_uv], + cm->rst_tmpbuf, optimized_lr); return sse_restoration_unit(limits, rsc->src, rsc->dst, plane); } @@ -637,7 +667,7 @@ // Iterate over the stripe in blocks of width pu_width for (int j = 0; j < width; j += pu_width) { const int w = AOMMIN(pu_width, width - j); - const int ret = av1_selfguided_restoration( + const int ret = av1_selfguided_restoration_c( dat_row + j, w, h, dat_stride, flt0_row + j, flt1_row + j, flt_stride, sgr_params_idx, bit_depth); (void)ret; @@ -1685,6 +1715,9 @@ const AV1PixelRect *tile, RestorationUnitInfo *rui) { int64_t err = 0; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (rsc->is_cross_filter_round) rui->wienerns_cross_info = rui->wienerns_info; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #if CONFIG_LR_MERGE_COEFFS if (limits != NULL) { err = try_restoration_unit(rsc, limits, tile, rui); @@ -1749,7 +1782,12 @@ #ifndef NDEBUG { const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters( - rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y); + rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + rsc->is_cross_filter_round +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ); assert(check_wienerns_eq(&rui->wienerns_info, &last_unit_filters, nsfilter_params->ncoeffs, ALL_WIENERNS_CLASSES)); } @@ -2981,7 +3019,11 @@ } copy_nsfilter_taps(&rui->wienerns_info, &best); - +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (rsc->is_cross_filter_round) { + rui->wienerns_cross_info = rui->wienerns_info; + } +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #if CONFIG_LR_MERGE_COEFFS (void)count_wienerns_bits_set(rsc->plane, &x->mode_costs, &rui->wienerns_info, ref_wienerns_bank, nsfilter_params, @@ -3114,6 +3156,9 @@ } static int64_t compute_stats_for_wienerns_filter( +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + RestSearchCtxt *rsc, +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER const uint16_t *dgd_hbd, const uint16_t *src_hbd, const RestorationTileLimits *limits, int dgd_stride, int src_stride, const RestorationUnitInfo *rui, int bit_depth, double *A, double *b, @@ -3137,9 +3182,14 @@ int is_uv = (rui->plane != AOM_PLANE_Y); const int(*wienerns_config2)[3] = is_uv ? nsfilter_params->nsfilter_config.config2 : NULL; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + const int end_pixel = is_uv && !rsc->is_cross_filter_round + ? nsfilter_params->nsfilter_config.num_pixels + +#else const int end_pixel = is_uv ? nsfilter_params->nsfilter_config.num_pixels + - nsfilter_params->nsfilter_config.num_pixels2 - : nsfilter_params->nsfilter_config.num_pixels; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + nsfilter_params->nsfilter_config.num_pixels2 + : nsfilter_params->nsfilter_config.num_pixels; #else const int end_pixel = nsfilter_params->nsfilter_config.num_pixels; #endif // CONFIG_WIENER_NONSEP_CROSS_FILT @@ -3157,8 +3207,14 @@ memset(buf, 0, sizeof(buf)); for (int k = 0; k < end_pixel; ++k) { #if CONFIG_WIENER_NONSEP_CROSS_FILT +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + int cross = + rsc->is_cross_filter_round || + (is_uv && k >= nsfilter_params->nsfilter_config.num_pixels); +#else const int cross = (is_uv && k >= nsfilter_params->nsfilter_config.num_pixels); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #else const int cross = 0; #endif // CONFIG_WIENER_NONSEP_CROSS_FILT @@ -3176,14 +3232,32 @@ bit_depth); } else { #if CONFIG_WIENER_NONSEP_CROSS_FILT - const int k2 = k - nsfilter_params->nsfilter_config.num_pixels; - const int pos = wienerns_config2[k2][WIENERNS_BUF_POS]; - const int r = wienerns_config2[k2][WIENERNS_ROW_ID]; - const int c = wienerns_config2[k2][WIENERNS_COL_ID]; - buf[pos] += clip_base( - (int16_t)luma_hbd[(i + r) * rui->luma_stride + (j + c)] - - (int16_t)luma_hbd[luma_id], - bit_depth); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (rsc->is_cross_filter_round) { + const int pos = wienerns_config[k][WIENERNS_BUF_POS]; + const int r = wienerns_config[k][WIENERNS_ROW_ID]; + const int c = wienerns_config[k][WIENERNS_COL_ID]; + int sign = k % 2 ? -1 : 1; + buf[pos] += + clip_base( + (int16_t)luma_hbd[(i + r) * rui->luma_stride + (j + c)] - + (int16_t)luma_hbd[luma_id], + bit_depth) * + sign; + } else { +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + const int k2 = k - nsfilter_params->nsfilter_config.num_pixels; + const int pos = wienerns_config2[k2][WIENERNS_BUF_POS]; + const int r = wienerns_config2[k2][WIENERNS_ROW_ID]; + const int c = wienerns_config2[k2][WIENERNS_COL_ID]; + + buf[pos] += clip_base( + (int16_t)luma_hbd[(i + r) * rui->luma_stride + (j + c)] - + (int16_t)luma_hbd[luma_id], + bit_depth); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + } +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #else assert(0 && "Incorrect CONFIG_WIENER_NONSEP configuration"); #endif // CONFIG_WIENER_NONSEP_CROSS_FILT @@ -3507,12 +3581,20 @@ initialize_rui_for_nonsep_search(rsc, &rui); rui.restoration_type = RESTORE_WIENER_NONSEP; const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters( - rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y); + rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + rsc->is_cross_filter_round +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ); assert(rsc->num_filter_classes == rsc->wienerns_bank.filter[0].num_classes); // Calculate and save this RU's stats. RstUnitStats unit_stats; unit_stats.real_sse = compute_stats_for_wienerns_filter( +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + rsc, +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER rsc->dgd_buffer, rsc->src_buffer, limits, rsc->dgd_stride, rsc->src_stride, &rui, rsc->cm->seq_params.bit_depth, unit_stats.A, unit_stats.b, nsfilter_params, rsc->num_stat_classes); @@ -3547,8 +3629,16 @@ RestorationUnitInfo rui; initialize_rui_for_nonsep_search(rsc, &rui); rui.restoration_type = RESTORE_WIENER_NONSEP; +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + rui.cross_restoration_type = RESTORE_WIENER_NONSEP; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters( - rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y); + rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + rsc->is_cross_filter_round +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ); const RstUnitStats *unit_stats = (const RstUnitStats *)aom_vector_const_get( rsc->wienerns_stats, rest_unit_idx_in_rutile); @@ -3578,6 +3668,11 @@ const int num_classes = rsc->num_filter_classes; assert(num_classes == rsc->wienerns_bank.filter[0].num_classes); if (num_classes > 1) { +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (rsc->is_cross_filter_round) { + rui.wienerns_cross_info = rui.wienerns_info; + } +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER rui.wiener_class_id_restrict = -1; calc_finer_tile_search_error(rsc, limits, &rsc->tile_rect, &rui); } @@ -3949,7 +4044,12 @@ const MACROBLOCK *const x = rsc->x; #if CONFIG_WIENER_NONSEP const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters( - rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y); + rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + rsc->is_cross_filter_round +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ); #endif // CONFIG_WIENER_NONSEP const int wiener_win = (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA; @@ -4083,7 +4183,12 @@ } else if (best_rtype == RESTORE_WIENER_NONSEP) { #if CONFIG_LR_MERGE_COEFFS const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters( - rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y); + rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + rsc->is_cross_filter_round +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ); int equal_ref_for_class[WIENERNS_MAX_CLASSES] = { 0 }; for (int c_id = 0; c_id < rusi->wienerns_info.num_classes; ++c_id) { const int is_equal = check_wienerns_bank_eq( @@ -4207,7 +4312,12 @@ rui->wienerns_info = rusi->wienerns_info; #if CONFIG_LR_MERGE_COEFFS const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters( - rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y); + rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + , + rsc->is_cross_filter_round +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + ); int equal_ref_for_class[WIENERNS_MAX_CLASSES] = { 0 }; count_wienerns_bits_set(rsc->plane, mode_costs, &rui->wienerns_info, &rsc->wienerns_bank, nsfilter_params, @@ -4352,15 +4462,24 @@ RestSearchCtxt *rsc = (RestSearchCtxt *)priv; const RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx]; const RestorationInfo *rsi = &rsc->cm->rst_info[rsc->plane]; - copy_unit_info(rsi->frame_restoration_type, rusi, - &rsi->unit_info[rest_unit_idx], rsc); + +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + rsi->unit_info[rest_unit_idx].restoration_type = RESTORE_NONE; + rsi->unit_info[rest_unit_idx].cross_restoration_type = RESTORE_NONE; + if (rsi->frame_restoration_type != RESTORE_NONE) +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + copy_unit_info(rsi->frame_restoration_type, rusi, + &rsi->unit_info[rest_unit_idx], rsc); } static void finalize_frame_and_unit_info(RestorationType frame_rtype, RestorationInfo *rsi, RestSearchCtxt *rsc) { rsi->frame_restoration_type = frame_rtype; - if (frame_rtype != RESTORE_NONE) { +#if !CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + if (frame_rtype != RESTORE_NONE) +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + { process_by_rutile(rsc, copy_unit_info_visitor); } } @@ -4370,6 +4489,31 @@ return rsi->units_per_tile; } +#if CONFIG_FLEXIBLE_RU_SIZE +// Set the value of number of units, for a given unit size. +void av1_reset_restoration_struct(AV1_COMMON *cm, RestorationInfo *rsi, + int is_uv) { + const AV1PixelRect tile_rect = av1_whole_frame_rect(cm, is_uv); + const int max_tile_w = tile_rect.right - tile_rect.left; + const int max_tile_h = tile_rect.bottom - tile_rect.top; + + // To calculate hpertile and vpertile (horizontal and vertical units per + // tile), we basically want to divide the largest tile width or height by the + // size of a restoration unit. Rather than rounding up unconditionally as you + // might expect, we round to nearest, which models the way a right or bottom + // restoration unit can extend to up to 150% its normal width or height. The + // max with 1 is to deal with tiles that are smaller than half of a + // restoration unit. + const int unit_size = rsi->restoration_unit_size; + const int hpertile = av1_lr_count_units_in_tile(unit_size, max_tile_w); + const int vpertile = av1_lr_count_units_in_tile(unit_size, max_tile_h); + + rsi->units_per_tile = hpertile * vpertile; + rsi->horz_units_per_tile = hpertile; + rsi->vert_units_per_tile = vpertile; +} +#endif // CONFIG_FLEXIBLE_RU_SIZE + void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->td.mb; @@ -4425,11 +4569,20 @@ dgd->buffers[AOM_PLANE_Y], dgd->crop_heights[AOM_PLANE_Y], dgd->crop_widths[AOM_PLANE_Y], dgd->strides[AOM_PLANE_Y], &luma, dgd->crop_heights[1], dgd->crop_widths[1], WIENERNS_UV_BRD, - rsc.luma_stride, cm->seq_params.bit_depth); + rsc.luma_stride, cm->seq_params.bit_depth +#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2 + , + cm->seq_params.enable_cfl_ds_filter == 1 +#endif + ); assert(luma_buf != NULL); rsc.luma = luma; #endif // CONFIG_WIENER_NONSEP_CROSS_FILT #endif // CONFIG_WIENER_NONSEP + +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + rsc.is_cross_filter_round = 0; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER for (int plane = plane_start; plane <= plane_end; ++plane) { init_rsc(src, &cpi->common, x, &cpi->sf.lpf_sf, plane, rusi, #if CONFIG_LR_MERGE_COEFFS @@ -4437,21 +4590,49 @@ #endif // CONFIG_LR_MERGE_COEFFS &cpi->trial_frame_rst, &rsc); +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + cm->rst_info[plane].frame_restoration_type = RESTORE_NONE; + cm->rst_info[plane].frame_cross_restoration_type = RESTORE_NONE; +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER + const int plane_ntiles = ntiles[plane > 0]; const RestorationType num_rtypes = (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES; +#if CONFIG_FLEXIBLE_RU_SIZE + double best_cost = DBL_MAX; +#else double best_cost = 0; +#endif // CONFIG_FLEXIBLE_RU_SIZE RestorationType best_rtype = RESTORE_NONE; - if (!cpi->sf.lpf_sf.disable_loop_restoration_chroma || !plane) { - av1_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height, - rsc.dgd_stride, RESTORATION_BORDER, RESTORATION_BORDER); +#if CONFIG_FLEXIBLE_RU_SIZE + RestorationInfo *rsi = &cm->rst_info[plane]; + const int max_unit_size = rsi->max_restoration_unit_size; + const int min_unit_size = rsi->min_restoration_unit_size; - for (RestorationType r = 0; r < num_rtypes; ++r) { + int best_unit_size = min_unit_size; + + for (int unit_size = min_unit_size; unit_size <= max_unit_size; + unit_size <<= 1) { + if (plane == 2 && unit_size != cm->rst_info[1].restoration_unit_size) { + continue; + } + aom_vector_clear(&wienerns_stats); + + rsi->restoration_unit_size = unit_size; + + av1_reset_restoration_struct(cm, rsi, plane > 0); +#endif // CONFIG_FLEXIBLE_RU_SIZE + if (!cpi->sf.lpf_sf.disable_loop_restoration_chroma || !plane) { + av1_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height, + rsc.dgd_stride, RESTORATION_BORDER, + RESTORATION_BORDER); + + for (RestorationType r = 0; r < num_rtypes; ++r) { #if CONFIG_LR_FLEX_SYNTAX - if (cpi->common.features.lr_tools_disable_mask[plane > 0] & (1 << r)) - continue; + if (cpi->common.features.lr_tools_disable_mask[plane > 0] & (1 << r)) + continue; #else const ToolCfg *const tool_cfg = &cpi->oxcf.tool_cfg; switch (r) { @@ -4476,36 +4657,57 @@ #endif // CONFIG_LR_FLEX_SYNTAX #if CONFIG_PC_WIENER - if (plane != AOM_PLANE_Y && r == RESTORE_PC_WIENER) continue; + if (plane != AOM_PLANE_Y && r == RESTORE_PC_WIENER) continue; #endif // CONFIG_PC_WIENER - gather_stats_rest_type(&rsc, r); + gather_stats_rest_type(&rsc, r); #if CONFIG_WIENER_NONSEP - if (r == RESTORE_WIENER_NONSEP) { - rsc.num_filter_classes = rsc.plane == AOM_PLANE_Y - ? NUM_WIENERNS_CLASS_INIT_LUMA - : NUM_WIENERNS_CLASS_INIT_CHROMA; - } + if (r == RESTORE_WIENER_NONSEP) { + rsc.num_filter_classes = rsc.plane == AOM_PLANE_Y + ? NUM_WIENERNS_CLASS_INIT_LUMA + : NUM_WIENERNS_CLASS_INIT_CHROMA; + } #endif // CONFIG_WIENER_NONSEP - double cost = search_rest_type(&rsc, r); + double cost = search_rest_type(&rsc, r); +#if CONFIG_FLEXIBLE_RU_SIZE + if (cost < best_cost) { + best_cost = cost; + best_rtype = r; + best_unit_size = unit_size; + } +#else if (r == 0 || cost < best_cost) { best_cost = cost; best_rtype = r; } +#endif // CONFIG_FLEXIBLE_RU_SIZE + } + } +#if CONFIG_FLEXIBLE_RU_SIZE + if (rsi->restoration_unit_size == min_unit_size || + best_unit_size == rsi->restoration_unit_size) { +#endif // CONFIG_FLEXIBLE_RU_SIZE + finalize_frame_and_unit_info(best_rtype, &cm->rst_info[plane], &rsc); +#if CONFIG_FLEXIBLE_RU_SIZE } } - - finalize_frame_and_unit_info(best_rtype, &cm->rst_info[plane], &rsc); - +#endif // CONFIG_FLEXIBLE_RU_SIZE #if CONFIG_LR_FLEX_SYNTAX assert(IMPLIES( cm->features.lr_tools_count[plane] < 2, cm->rst_info[plane].frame_restoration_type != RESTORE_SWITCHABLE)); #endif // CONFIG_LR_FLEX_SYNTAX +#if CONFIG_FLEXIBLE_RU_SIZE + rsi->restoration_unit_size = best_unit_size; + av1_reset_restoration_struct(cm, rsi, plane > 0); + int ru_num = rest_tiles_in_plane(cm, plane > 0); + adjust_frame_rtype(&cm->rst_info[plane], ru_num, &rsc, &cpi->oxcf.tool_cfg); +#else adjust_frame_rtype(&cm->rst_info[plane], plane_ntiles, &rsc, &cpi->oxcf.tool_cfg); +#endif // CONFIG_FLEXIBLE_RU_SIZE } #if CONFIG_WIENER_NONSEP_CROSS_FILT @@ -4521,3 +4723,230 @@ aom_vector_destroy(&unit_indices); #endif // CONFIG_LR_MERGE_COEFFS } + +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +static AOM_INLINE void copy_unit_cross_filter_info( + RestorationType frame_cross_rtype, const RestUnitSearchInfo *rusi, + RestorationUnitInfo *rui, RestSearchCtxt *rsc) { +#if CONFIG_LR_MERGE_COEFFS + const ModeCosts *mode_costs = &rsc->x->mode_costs; +#else + (void)rsc; +#endif // CONFIG_LR_MERGE_COEFFS + assert(frame_cross_rtype > 0); + rui->cross_restoration_type = frame_cross_rtype == RESTORE_NONE + ? RESTORE_NONE + : rusi->best_rtype[frame_cross_rtype - 1]; + if (rui->cross_restoration_type == RESTORE_WIENER_NONSEP) { + rui->wienerns_cross_info = rusi->wienerns_info; +#if CONFIG_LR_MERGE_COEFFS + const WienernsFilterParameters *nsfilter_params = get_wienerns_parameters( + rsc->cm->quant_params.base_qindex, rsc->plane != AOM_PLANE_Y, + rsc->is_cross_filter_round); + + int equal_ref_for_class[WIENERNS_MAX_CLASSES] = { 0 }; + count_wienerns_bits_set(rsc->plane, mode_costs, &rui->wienerns_cross_info, + &rsc->wienerns_bank, nsfilter_params, + ALL_WIENERNS_CLASSES); + for (int c_id = 0; c_id < rui->wienerns_cross_info.num_classes; ++c_id) { + const int is_equal = check_wienerns_bank_eq( + &rsc->wienerns_bank, &rui->wienerns_cross_info, + nsfilter_params->ncoeffs, c_id, equal_ref_for_class); + if (is_equal == -1) { + av1_add_to_wienerns_bank(&rsc->wienerns_bank, &rui->wienerns_cross_info, + c_id); + } + } +#endif // CONFIG_LR_MERGE_COEFFS + } else if (rui->cross_restoration_type == RESTORE_NONE) { + // do nothing + } else { + assert(0); + } +} + +// copy cross-component filter data from rusi to rsi for one RU +static void copy_unit_cross_filter_info_visitor( + const RestorationTileLimits *limits, const AV1PixelRect *tile_rect, + int rest_unit_idx, int rest_unit_idx_seq, void *priv, int32_t *tmpbuf, + RestorationLineBuffers *rlbs) { + (void)limits; + (void)tile_rect; + (void)rest_unit_idx_seq; + (void)tmpbuf; + (void)rlbs; + + RestSearchCtxt *rsc = (RestSearchCtxt *)priv; + const RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx]; + const RestorationInfo *rsi = &rsc->cm->rst_info[rsc->plane]; + + rsi->unit_info[rest_unit_idx].cross_restoration_type = RESTORE_NONE; + if (rsi->frame_cross_restoration_type != RESTORE_NONE) + copy_unit_cross_filter_info(rsi->frame_cross_restoration_type, rusi, + &rsi->unit_info[rest_unit_idx], rsc); + rsi->unit_info[rest_unit_idx].wienerns_cross_info.is_cross_filter = 0; +} + +// copy cross-component filter data from rusi to rsi for one frame +static void finalize_frame_and_unit_cross_filter_info( + RestorationType frame_cross_rtype, RestorationInfo *rsi, + RestSearchCtxt *rsc) { + rsi->frame_cross_restoration_type = frame_cross_rtype; + process_by_rutile(rsc, copy_unit_cross_filter_info_visitor); +} + +// RD process to find the best mode of cross-component wiener filter +// for each RU within the current frame +void av1_pick_cross_filter_restoration(const YV12_BUFFER_CONFIG *src, + AV1_COMP *cpi) { + AV1_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->td.mb; + const int num_planes = av1_num_planes(cm); + assert(!cm->features.all_lossless); + if (num_planes <= 1) return; + + av1_fill_lr_rates(&x->mode_costs, x->e_mbd.tile_ctx); + + int ntiles = rest_tiles_in_plane(cm, 1); + + RestUnitSearchInfo *rusi = + (RestUnitSearchInfo *)aom_memalign(16, sizeof(*rusi) * ntiles); + + // If the restoration unit dimensions are not multiples of + // rsi->restoration_unit_size then some elements of the rusi array may be + // left uninitialised when we reach copy_unit_info(...). This is not a + // problem, as these elements are ignored later, but in order to quiet + // Valgrind's warnings we initialise the array below. + memset(rusi, 0, sizeof(*rusi) * ntiles); + x->rdmult = cpi->rd.RDMULT; + +#if CONFIG_LR_MERGE_COEFFS + Vector unit_stack; + aom_vector_setup(&unit_stack, + 1, // resizable capacity + sizeof(struct RstUnitSnapshot)); // element size + Vector unit_indices; + aom_vector_setup(&unit_indices, + 1, // resizable capacity + sizeof(int)); // element size +#endif // CONFIG_LR_MERGE_COEFFS + + RestSearchCtxt rsc; + const int plane_start = AOM_PLANE_U; + const int plane_end = AOM_PLANE_V; + + Vector wienerns_stats; + aom_vector_setup(&wienerns_stats, + 1, // resizable capacity + sizeof(struct RstUnitStats)); // element size + rsc.wienerns_stats = &wienerns_stats; + + uint16_t *luma = NULL; + uint16_t *luma_buf; + const YV12_BUFFER_CONFIG *dgd = &cpi->common.cur_frame->buf; + rsc.luma_stride = dgd->crop_widths[1] + 2 * WIENERNS_UV_BRD; + luma_buf = wienerns_copy_luma_highbd( + dgd->buffers[AOM_PLANE_Y], dgd->crop_heights[AOM_PLANE_Y], + dgd->crop_widths[AOM_PLANE_Y], dgd->strides[AOM_PLANE_Y], &luma, + dgd->crop_heights[1], dgd->crop_widths[1], WIENERNS_UV_BRD, + rsc.luma_stride, cm->seq_params.bit_depth +#if WIENERNS_CROSS_FILT_LUMA_TYPE == 2 + , + cm->seq_params.enable_cfl_ds_filter == 1 +#endif + ); + assert(luma_buf != NULL); + rsc.luma = luma; + + rsc.is_cross_filter_round = 1; + + for (int plane = plane_start; plane <= plane_end; ++plane) { + init_rsc(src, &cpi->common, x, &cpi->sf.lpf_sf, plane, rusi, +#if CONFIG_LR_MERGE_COEFFS + &unit_stack, &unit_indices, +#endif // CONFIG_LR_MERGE_COEFFS + &cpi->trial_frame_rst, &rsc); + + const int plane_ntiles = ntiles; + const RestorationType num_rtypes = + (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES; + + double best_cost = DBL_MAX; + RestorationType best_cross_rtype = RESTORE_NONE; + +#if CONFIG_FLEXIBLE_RU_SIZE + RestorationInfo *rsi = &cm->rst_info[plane]; + int min_unit_size = rsi->restoration_unit_size; + int max_unit_size = rsi->restoration_unit_size; + int best_unit_size = + min_unit_size; // the best unit_size has been determined at the RD of + // restoring filter, to be optimized. + for (int unit_size = min_unit_size; unit_size <= max_unit_size; + unit_size <<= 1) { + assert(rsi->restoration_unit_size == unit_size); + assert(cm->rst_info[1].restoration_unit_size == + cm->rst_info[2].restoration_unit_size); + + aom_vector_clear(&wienerns_stats); + + av1_reset_restoration_struct(cm, rsi, plane > 0); +#endif // CONFIG_FLEXIBLE_RU_SIZE + if (!cpi->sf.lpf_sf.disable_loop_restoration_chroma || !plane) { + av1_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height, + rsc.dgd_stride, RESTORATION_BORDER, + RESTORATION_BORDER); + + for (RestorationType r = 0; r < num_rtypes; ++r) { + //??????????? to be updated with tool on/off setting + if (r != RESTORE_NONE && r != RESTORE_WIENER_NONSEP) { + // to be updated with flexible tool on/off setting. + continue; + }; + + gather_stats_rest_type(&rsc, r); + + // if (r == RESTORE_WIENER_NONSEP) { + rsc.num_filter_classes = rsc.plane == AOM_PLANE_Y + ? NUM_WIENERNS_CLASS_INIT_LUMA + : NUM_WIENERNS_CLASS_INIT_CHROMA; + // } + + double cost = search_rest_type(&rsc, r); + +#if CONFIG_FLEXIBLE_RU_SIZE + if (cost < best_cost) { + best_cost = cost; + best_cross_rtype = r; + best_unit_size = unit_size; + } +#else + if (r == 0 || cost < best_cost) { + best_cost = cost; + best_cross_rtype = r; + } +#endif // CONFIG_FLEXIBLE_RU_SIZE + } + } +#if CONFIG_FLEXIBLE_RU_SIZE + if (rsi->restoration_unit_size == min_unit_size || + best_unit_size == rsi->restoration_unit_size) { + assert(rsi->restoration_unit_size == min_unit_size); +#endif // CONFIG_FLEXIBLE_RU_SIZE + cm->rst_info[plane].frame_cross_restoration_type = best_cross_rtype; + finalize_frame_and_unit_cross_filter_info(best_cross_rtype, + &cm->rst_info[plane], &rsc); +#if CONFIG_FLEXIBLE_RU_SIZE + } + } +#endif // CONFIG_FLEXIBLE_RU_SIZE + } + free(luma_buf); + aom_free(rusi); + aom_vector_destroy(&wienerns_stats); + +#if CONFIG_LR_MERGE_COEFFS + aom_vector_destroy(&unit_stack); + aom_vector_destroy(&unit_indices); +#endif // CONFIG_LR_MERGE_COEFFS +} +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER
diff --git a/av1/encoder/pickrst.h b/av1/encoder/pickrst.h index 8fa2461..d536deb 100644 --- a/av1/encoder/pickrst.h +++ b/av1/encoder/pickrst.h
@@ -191,7 +191,10 @@ * */ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi); - +#if CONFIG_HIGH_PASS_CROSS_WIENER_FILTER +void av1_pick_cross_filter_restoration(const YV12_BUFFER_CONFIG *sd, + AV1_COMP *cpi); +#endif // CONFIG_HIGH_PASS_CROSS_WIENER_FILTER #ifdef __cplusplus } // extern "C" #endif
diff --git a/av1/encoder/rd.c b/av1/encoder/rd.c index 4a70347..23eb416 100644 --- a/av1/encoder/rd.c +++ b/av1/encoder/rd.c
@@ -64,12 +64,12 @@ static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA] [EXT_TX_SIZES] = { { 1, 1, 1, 1 }, // unused -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC { 1, 1, 1, 0 }, #else { 1, 1, 0, 0 }, { 0, 0, 1, 0 }, -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC }; static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER] @@ -85,12 +85,12 @@ { // Intra EXT_TX_SET_DCTONLY, -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC EXT_NEW_TX_SET, #else EXT_TX_SET_DTT4_IDTX_1DDCT, EXT_TX_SET_DTT4_IDTX, -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC }, { // Inter @@ -133,6 +133,17 @@ av1_cost_tokens_from_cdf( mode_costs->do_ext_partition_cost[plane_index][rect_type][i], fc->do_ext_partition_cdf[plane_index][rect_type][i], NULL); +#if CONFIG_UNEVEN_4WAY + av1_cost_tokens_from_cdf( + mode_costs + ->do_uneven_4way_partition_cost[plane_index][rect_type][i], + fc->do_uneven_4way_partition_cdf[plane_index][rect_type][i], NULL); + av1_cost_tokens_from_cdf( + mode_costs + ->uneven_4way_partition_type_cost[plane_index][rect_type][i], + fc->uneven_4way_partition_type_cdf[plane_index][rect_type][i], + NULL); +#endif // CONFIG_UNEVEN_4WAY } } } @@ -164,15 +175,36 @@ mode_costs->partition_cost[plane_index][ctx][part] += mode_costs->rect_type_cost[plane_index][ctx][rect_type]; } - const bool disable_ext_part = !cm->seq_params.enable_ext_partitions; const bool ext_partition_allowed = - !disable_ext_part && + cm->seq_params.enable_ext_partitions && is_ext_partition_allowed(bsize, rect_type, tree_type); if (ext_partition_allowed) { const bool do_ext_partition = (part >= PARTITION_HORZ_3); mode_costs->partition_cost[plane_index][ctx][part] += mode_costs->do_ext_partition_cost[plane_index][rect_type][ctx] [do_ext_partition]; +#if CONFIG_UNEVEN_4WAY + if (do_ext_partition) { + const bool uneven_4way_partition_allowed = + is_uneven_4way_partition_allowed(bsize, rect_type, tree_type); + if (uneven_4way_partition_allowed) { + const bool do_uneven_4way_partition = + (part >= PARTITION_HORZ_4A); + mode_costs->partition_cost[plane_index][ctx][part] += + mode_costs->do_uneven_4way_partition_cost + [plane_index][rect_type][ctx][do_uneven_4way_partition]; + if (do_uneven_4way_partition) { + const UNEVEN_4WAY_PART_TYPE uneven_4way_type = + (part == PARTITION_HORZ_4A || part == PARTITION_VERT_4A) + ? UNEVEN_4A + : UNEVEN_4B; + mode_costs->partition_cost[plane_index][ctx][part] += + mode_costs->uneven_4way_partition_type_cost + [plane_index][rect_type][ctx][uneven_4way_type]; + } + } + } +#endif // CONFIG_UNEVEN_4WAY } } } @@ -200,7 +232,14 @@ fc->skip_txfm_cdfs[i], NULL); } +#if CONFIG_EXT_DIR + for (i = 0; i < MRL_INDEX_CONTEXTS; ++i) { + av1_cost_tokens_from_cdf(mode_costs->mrl_index_cost[i], + fc->mrl_index_cdf[i], NULL); + } +#else av1_cost_tokens_from_cdf(mode_costs->mrl_index_cost, fc->mrl_index_cdf, NULL); +#endif // CONFIG_EXT_DIR for (i = 0; i < FSC_MODE_CONTEXTS; ++i) { for (j = 0; j < FSC_BSIZE_CONTEXTS; ++j) { @@ -278,7 +317,7 @@ } } -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS for (i = 0; i < PALETTE_ROW_FLAG_CONTEXTS; ++i) { av1_cost_tokens_from_cdf(mode_costs->palette_y_row_flag_cost[i], fc->identity_row_cdf_y[i], NULL); @@ -344,6 +383,24 @@ for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { int s; +#if CONFIG_ATC_DCTX_ALIGNED + int k; + for (k = 0; k < EOB_TX_CTXS; ++k) { + for (s = 1; s < EXT_TX_SETS_INTER; ++s) { +#if CONFIG_ATC_REDUCED_TXSET + if (cm->features.reduced_tx_set_used || + use_inter_ext_tx_for_txsize[s][i]) { +#else + if (use_inter_ext_tx_for_txsize[s][i]) { +#endif // CONFIG_ATC_REDUCED_TXSET + av1_cost_tokens_from_cdf( + mode_costs->inter_tx_type_costs[s][k][i], + fc->inter_ext_tx_cdf[s][k][i], + av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]); + } + } + } +#else for (s = 1; s < EXT_TX_SETS_INTER; ++s) { #if CONFIG_ATC_REDUCED_TXSET if (cm->features.reduced_tx_set_used || @@ -356,17 +413,18 @@ av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]); } } +#endif // CONFIG_ATC_DCTX_ALIGNED for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC int tx_set_type = av1_ext_tx_set_idx_to_type[0][s]; #if CONFIG_ATC_REDUCED_TXSET const int cdf_offset = cm->features.reduced_tx_set_used ? 1 : 0; #endif // CONFIG_ATC_REDUCED_TXSET -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC if (use_intra_ext_tx_for_txsize[s][i]) { for (j = 0; j < INTRA_MODES; ++j) { av1_cost_tokens_from_cdf( -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC mode_costs->intra_tx_type_costs[s][i][j], #if CONFIG_ATC_REDUCED_TXSET fc->intra_ext_tx_cdf[s + cdf_offset][i][j], @@ -380,7 +438,7 @@ mode_costs->intra_tx_type_costs[s][i][j], fc->intra_ext_tx_cdf[s][i][j], av1_ext_tx_inv_intra[av1_ext_tx_set_idx_to_type[0][s]]); -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC } } } @@ -402,14 +460,14 @@ #else av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL); #endif // CONFIG_NEW_CONTEXT_MODELING -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT av1_cost_tokens_from_cdf(mode_costs->intrabc_mode_cost, fc->intrabc_mode_cdf, NULL); for (i = 0; i < MAX_REF_BV_STACK_SIZE - 1; ++i) { av1_cost_tokens_from_cdf(mode_costs->intrabc_drl_idx_cost[i], fc->intrabc_drl_idx_cdf[i], NULL); } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT for (i = 0; i < TX_SIZES; ++i) { av1_cost_tokens_from_cdf(mode_costs->stx_flag_cost[i], fc->stx_cdf[i], @@ -467,7 +525,7 @@ } } -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT for (j = 0; j < INTRA_INTER_SKIP_TXFM_CONTEXTS; ++j) { for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) { av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[j][i], @@ -479,7 +537,7 @@ av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i], fc->intra_inter_cdf[i], NULL); } -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT for (i = 0; i < INTER_SINGLE_MODE_CONTEXTS; ++i) { av1_cost_tokens_from_cdf(mode_costs->inter_single_mode_cost[i], @@ -502,12 +560,12 @@ fc->drl_cdf[2][i], NULL); } -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT for (i = 0; i < 3; ++i) { av1_cost_tokens_from_cdf(mode_costs->skip_drl_mode_cost[i], fc->skip_drl_cdf[i], NULL); } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_OPTFLOW_REFINEMENT for (i = 0; i < INTER_COMPOUND_MODE_CONTEXTS; ++i) @@ -570,6 +628,14 @@ av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i], fc->wedge_interintra_cdf[i], NULL); } + +#if CONFIG_REFINEMV + for (i = 0; i < NUM_REFINEMV_CTX; ++i) { + av1_cost_tokens_from_cdf(mode_costs->refinemv_flag_cost[i], + fc->refinemv_flag_cdf[i], NULL); + } +#endif // CONFIG_REFINEMV + #if CONFIG_EXTENDED_WARP_PREDICTION for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) { av1_cost_tokens_from_cdf(mode_costs->obmc_cost[i], fc->obmc_cdf[i], NULL); @@ -584,6 +650,12 @@ fc->warped_causal_warpmv_cdf[i], NULL); } #endif // CONFIG_WARPMV +#if CONFIG_CWG_D067_IMPROVED_WARP + for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) { + av1_cost_tokens_from_cdf(mode_costs->warpmv_with_mvd_flag_cost[i], + fc->warpmv_with_mvd_flag_cdf[i], NULL); + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP #if CONFIG_WARP_REF_LIST for (i = 0; i < 3; i++) { @@ -625,6 +697,14 @@ av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i], fc->comp_group_idx_cdf[i], NULL); } +#if CONFIG_CWP + for (j = 0; j < MAX_CWP_CONTEXTS; j++) { + for (i = 0; i < MAX_CWP_NUM - 1; ++i) { + av1_cost_tokens_from_cdf(mode_costs->cwp_idx_cost[j][i], + fc->cwp_idx_cdf[j][i], NULL); + } + } +#endif // CONFIG_CWP } } @@ -868,6 +948,20 @@ for (int plane = 0; plane < nplanes; ++plane) { LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane]; +#if CONFIG_ATC_DCTX_ALIGNED + aom_cdf_prob *pcdf; + switch (eob_multi_size) { + case 0: pcdf = fc->eob_flag_cdf16[plane]; break; + case 1: pcdf = fc->eob_flag_cdf32[plane]; break; + case 2: pcdf = fc->eob_flag_cdf64[plane]; break; + case 3: pcdf = fc->eob_flag_cdf128[plane]; break; + case 4: pcdf = fc->eob_flag_cdf256[plane]; break; + case 5: pcdf = fc->eob_flag_cdf512[plane]; break; + case 6: pcdf = fc->eob_flag_cdf1024[plane]; break; + default: assert(0 && "Invalid eob_multi_size"); + } + av1_cost_tokens_from_cdf(pcost->eob_cost, pcdf, NULL); +#else for (int ctx = 0; ctx < 2; ++ctx) { aom_cdf_prob *pcdf; switch (eob_multi_size) { @@ -882,6 +976,7 @@ } av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL); } +#endif // CONFIG_ATC_DCTX_ALIGNED } } for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) { @@ -900,7 +995,7 @@ av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx], fc->coeff_base_eob_cdf[tx_size][plane][ctx], NULL); -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx) av1_cost_tokens_from_cdf(pcost->base_lf_eob_cost[ctx], fc->coeff_base_lf_eob_cdf[tx_size][plane][ctx], @@ -929,7 +1024,12 @@ pcost->base_cost[ctx][7] = pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2]; } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC +#if CONFIG_ATC_DCTX_ALIGNED + for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_BOB; ++ctx) + av1_cost_tokens_from_cdf(pcost->base_bob_cost[ctx], + fc->coeff_base_bob_cdf[ctx], NULL); +#endif // CONFIG_ATC_DCTX_ALIGNED for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx) av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx], fc->eob_extra_cdf[tx_size][plane][ctx], NULL); @@ -949,7 +1049,7 @@ } #endif // CONFIG_CONTEXT_DERIVATION -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC for (int ctx = 0; ctx < LF_LEVEL_CONTEXTS; ++ctx) { int br_lf_rate[BR_CDF_SIZE]; int prev_cost_lf = 0; @@ -972,17 +1072,17 @@ pcost->lps_lf_cost[ctx][i] - pcost->lps_lf_cost[ctx][i - 1]; } } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) { int br_rate[BR_CDF_SIZE]; int prev_cost = 0; int i, j; av1_cost_tokens_from_cdf( -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC br_rate, fc->coeff_br_cdf[plane][ctx], #else br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx], -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC NULL); // printf("br_rate: "); // for(j = 0; j < BR_CDF_SIZE; j++) @@ -1099,7 +1199,7 @@ #endif ); -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT // Copy the pointer of the dv cost to the mvcost mv_costs->dv_joint_cost = &dv_costs->joint_mv[0]; mv_costs->dv_nmv_cost[0] = dv_costs->dv_costs[0]; @@ -1108,7 +1208,7 @@ (void)mv_costs; #endif } -#elif CONFIG_BVCOST_UPDATE +#elif CONFIG_IBC_BV_IMPROVEMENT void av1_fill_dv_costs(const FRAME_CONTEXT *fc, IntraBCMVCosts *dv_costs) { int *dvcost[2] = { &dv_costs->mv_component[0][MV_MAX], &dv_costs->mv_component[1][MV_MAX] }; @@ -1223,14 +1323,14 @@ #endif if (cm->features.allow_screen_content_tools && -#if !CONFIG_BVCOST_UPDATE +#if !CONFIG_IBC_BV_IMPROVEMENT frame_is_intra_only(cm) && -#endif // !CONFIG_BVCOST_UPDATE +#endif // !CONFIG_IBC_BV_IMPROVEMENT !is_stat_generation_stage(cpi)) { #if CONFIG_FLEX_MVRES fill_dv_costs(&x->dv_costs, cm->fc, mv_costs); #else -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT IntraBCMVCosts *const dv_costs = &x->dv_costs; #else IntraBCMVCosts *const dv_costs = &cpi->dv_costs; @@ -1244,16 +1344,6 @@ dvcost, &cm->fc->ndvc, MV_SUBPEL_NONE); #endif } - - if (!is_stat_generation_stage(cpi)) { - for (int i = 0; i < TRANS_TYPES; ++i) - // IDENTITY: 1 bit - // TRANSLATION: 3 bits - // ROTZOOM: 2 bits - // AFFINE: 3 bits - cpi->gm_info.type_cost[i] = (1 + (i > 0 ? (i == ROTZOOM ? 1 : 2) : 0)) - << AV1_PROB_COST_SHIFT; - } } static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { @@ -1635,10 +1725,19 @@ } #endif // CONFIG_TIP const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME }; + +#if CONFIG_SEP_COMP_DRL + const MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; + const int_mv ref_mv = + av1_get_ref_mv_from_stack(0, ref_frames, 0, x->mbmi_ext, mbmi); + const int_mv ref_mv1 = + av1_get_ref_mv_from_stack(0, ref_frames, 1, x->mbmi_ext, mbmi); +#else const int_mv ref_mv = av1_get_ref_mv_from_stack(0, ref_frames, 0, x->mbmi_ext); const int_mv ref_mv1 = av1_get_ref_mv_from_stack(0, ref_frames, 1, x->mbmi_ext); +#endif // CONFIG_SEP_COMP_DRL MV pred_mv[MAX_MV_REF_CANDIDATES + 1]; int num_mv_refs = 0; pred_mv[num_mv_refs++] = ref_mv.as_mv;
diff --git a/av1/encoder/rd.h b/av1/encoder/rd.h index 3620853..be21280 100644 --- a/av1/encoder/rd.h +++ b/av1/encoder/rd.h
@@ -81,7 +81,7 @@ double r0; } RD_OPT; -#if !CONFIG_FLEX_MVRES && !CONFIG_BVCOST_UPDATE +#if !CONFIG_FLEX_MVRES && !CONFIG_IBC_BV_IMPROVEMENT typedef struct { // Cost of transmitting the actual motion vector. // mv_component[0][i] is the cost of motion vector with horizontal component @@ -154,6 +154,9 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst, const RD_STATS *rd_stats_src) { assert(rd_stats_dst->rate != INT_MAX && rd_stats_src->rate != INT_MAX); +#if CONFIG_ATC_DCTX_ALIGNED + if (rd_stats_src->dist == INT64_MAX || rd_stats_src->rate == INT_MAX) return; +#endif // CONFIG_ATC_DCTX_ALIGNED rd_stats_dst->rate = (int)AOMMIN( ((int64_t)rd_stats_dst->rate + (int64_t)rd_stats_src->rate), INT_MAX); if (!rd_stats_dst->zero_rate) @@ -375,7 +378,7 @@ #if CONFIG_FLEX_MVRES void fill_dv_costs(IntraBCMvCosts *dv_costs, const FRAME_CONTEXT *fc, MvCosts *mv_costs); -#elif CONFIG_BVCOST_UPDATE +#elif CONFIG_IBC_BV_IMPROVEMENT void av1_fill_dv_costs(const FRAME_CONTEXT *fc, IntraBCMVCosts *dv_costs); #endif
diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 88c474d..9c33f8b 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c
@@ -584,7 +584,7 @@ REF_FRAMES * sizeof((*ref_costs_comp)[0])); } else { int intra_inter_ctx = av1_get_intra_inter_context(xd); -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT const int skip_txfm = xd->mi[0]->skip_txfm[xd->tree_type == CHROMA_PART]; ref_costs_single[INTRA_FRAME_INDEX] = mode_costs->intra_inter_cost[skip_txfm][intra_inter_ctx][0]; @@ -594,7 +594,7 @@ ref_costs_single[INTRA_FRAME_INDEX] = mode_costs->intra_inter_cost[intra_inter_ctx][0]; unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1]; -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT #if CONFIG_TIP if (cm->features.tip_frame_mode) { @@ -786,9 +786,12 @@ if (xd->tree_type != CHROMA_PART) av1_copy_mbmi_ext_to_mbmi_ext_frame( &ctx->mbmi_ext_best, x->mbmi_ext, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SEP_COMP_DRL + xd->mi[0], +#endif // CONFIG_SEP_COMP_DRL +#if CONFIG_SKIP_MODE_ENHANCEMENT xd->mi[0]->skip_mode, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT av1_ref_frame_type(xd->mi[0]->ref_frame)); ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; @@ -826,9 +829,9 @@ av1_setup_pred_block(xd, yv12_mb[ref_frame_idx], yv12, sf, sf, num_planes); } -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode) return; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT // Gets an initial list of candidate vectors from neighbours and orders them av1_find_mv_refs( @@ -1006,7 +1009,10 @@ const PREDICTION_MODE this_mode = mbmi->mode; const MV_REFERENCE_FRAME refs[2] = { COMPACT_INDEX0_NRS(mbmi->ref_frame[0]), COMPACT_INDEX1_NRS(mbmi->ref_frame[1]) }; + +#if !CONFIG_SEP_COMP_DRL const int ref_mv_idx = mbmi->ref_mv_idx; +#endif // !CONFIG_SEP_COMP_DRL #if CONFIG_FLEX_MVRES const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision; #endif @@ -1017,7 +1023,12 @@ int valid_precision_mv0 = NUM_MV_PRECISIONS; for (int prev_mv_precision = pb_mv_precision; prev_mv_precision <= mbmi->max_mv_precision; prev_mv_precision++) { +#if CONFIG_SEP_COMP_DRL + if (args->single_newmv_valid[prev_mv_precision][get_ref_mv_idx(mbmi, 0)] + [refs[0]]) { +#else if (args->single_newmv_valid[prev_mv_precision][ref_mv_idx][refs[0]]) { +#endif // CONFIG_SEP_COMP_DRL valid_mv0_found = 1; valid_precision_mv0 = prev_mv_precision; break; @@ -1028,7 +1039,12 @@ int valid_precision_mv1 = NUM_MV_PRECISIONS; for (int prev_mv_precision = pb_mv_precision; prev_mv_precision <= mbmi->max_mv_precision; prev_mv_precision++) { +#if CONFIG_SEP_COMP_DRL + if (args->single_newmv_valid[prev_mv_precision][get_ref_mv_idx(mbmi, 1)] + [refs[1]]) { +#else if (args->single_newmv_valid[prev_mv_precision][ref_mv_idx][refs[1]]) { +#endif // CONFIG_SEP_COMP_DRL valid_mv1_found = 1; valid_precision_mv1 = prev_mv_precision; break; @@ -1043,64 +1059,37 @@ #if CONFIG_OPTFLOW_REFINEMENT if (this_mode == NEW_NEWMV || this_mode == NEW_NEWMV_OPTFLOW) { +#if CONFIG_SKIP_ME_FOR_OPFL_MODES + if (this_mode == NEW_NEWMV_OPTFLOW && + args->comp_newmv_valid[av1_ref_frame_type(mbmi->ref_frame)] +#if CONFIG_SEP_COMP_DRL + [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)] #else - if (this_mode == NEW_NEWMV) { -#endif // CONFIG_OPTFLOW_REFINEMENT - if (valid_mv0) { -#if CONFIG_FLEX_MVRES + [mbmi->ref_mv_idx] +#endif + [pb_mv_precision]) { cur_mv[0].as_int = - args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int; + args->comp_newmv[av1_ref_frame_type(mbmi->ref_frame)] +#if CONFIG_SEP_COMP_DRL + [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)] #else - cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int; + [mbmi->ref_mv_idx] #endif - -#if CONFIG_FLEX_MVRES - lower_mv_precision(&cur_mv[0].as_mv, pb_mv_precision); -#endif - clamp_mv_in_range(x, &cur_mv[0], 0 -#if CONFIG_FLEX_MVRES - , - pb_mv_precision -#endif - - ); - } - if (valid_mv1) { -#if CONFIG_FLEX_MVRES + [pb_mv_precision][0] + .as_int; cur_mv[1].as_int = - args->single_newmv[valid_precision_mv1][ref_mv_idx][refs[1]].as_int; + args->comp_newmv[av1_ref_frame_type(mbmi->ref_frame)] +#if CONFIG_SEP_COMP_DRL + [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)] #else - cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int; + [mbmi->ref_mv_idx] #endif -#if CONFIG_FLEX_MVRES - lower_mv_precision(&cur_mv[1].as_mv, pb_mv_precision); -#endif - clamp_mv_in_range(x, &cur_mv[1], 1 -#if CONFIG_FLEX_MVRES - , - pb_mv_precision -#endif - ); - } + [pb_mv_precision][1] + .as_int; - // aomenc1 - if (cpi->sf.inter_sf.comp_inter_joint_search_thresh <= bsize || - !valid_mv0 || !valid_mv1) { - // uint8_t mask_value = 32; - av1_joint_motion_search(cpi, x, bsize, cur_mv, NULL, 0, rate_mv); - } else { *rate_mv = 0; for (int i = 0; i < 2; ++i) { const int_mv ref_mv = av1_get_ref_mv(x, i); -#if CONFIG_C071_SUBBLK_WARPMV - update_mv_precision(ref_mv.as_mv, -#if CONFIG_FLEX_MVRES - pb_mv_precision, -#else - cm->features.allow_high_precision_mv, -#endif - &cur_mv[i].as_mv); -#endif // CONFIG_C071_SUBBLK_WARPMV #if CONFIG_FLEX_MVRES *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv, pb_mv_precision, @@ -1116,7 +1105,122 @@ x->mv_costs.mv_cost_stack, MV_COST_WEIGHT); #endif } + } else { +#endif // CONFIG_SKIP_ME_FOR_OPFL_MODES +#else + if (this_mode == NEW_NEWMV) { +#endif // CONFIG_OPTFLOW_REFINEMENT + if (valid_mv0) { +#if CONFIG_FLEX_MVRES + cur_mv[0].as_int = +#if CONFIG_SEP_COMP_DRL + args->single_newmv[valid_precision_mv0][get_ref_mv_idx(mbmi, 0)] + [refs[0]] + .as_int; +#else + args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int; +#endif // CONFIG_SEP_COMP_DRL +#else + cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int; +#endif + +#if CONFIG_FLEX_MVRES + lower_mv_precision(&cur_mv[0].as_mv, pb_mv_precision); +#endif + clamp_mv_in_range(x, &cur_mv[0], 0 +#if CONFIG_FLEX_MVRES + , + pb_mv_precision +#endif + + ); + } + if (valid_mv1) { +#if CONFIG_FLEX_MVRES + cur_mv[1].as_int = +#if CONFIG_SEP_COMP_DRL + args->single_newmv[valid_precision_mv1][get_ref_mv_idx(mbmi, 1)] + [refs[1]] + .as_int; +#else + args->single_newmv[valid_precision_mv1][ref_mv_idx][refs[1]].as_int; +#endif // CONFIG_SEP_COMP_DRL +#else + cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int; +#endif +#if CONFIG_FLEX_MVRES + lower_mv_precision(&cur_mv[1].as_mv, pb_mv_precision); +#endif + clamp_mv_in_range(x, &cur_mv[1], 1 +#if CONFIG_FLEX_MVRES + , + pb_mv_precision +#endif + ); + } + + // aomenc1 + if (cpi->sf.inter_sf.comp_inter_joint_search_thresh <= bsize || + !valid_mv0 || !valid_mv1) { + // uint8_t mask_value = 32; + av1_joint_motion_search(cpi, x, bsize, cur_mv, NULL, 0, rate_mv); + } else { + *rate_mv = 0; + for (int i = 0; i < 2; ++i) { + const int_mv ref_mv = av1_get_ref_mv(x, i); +#if CONFIG_C071_SUBBLK_WARPMV + update_mv_precision(ref_mv.as_mv, +#if CONFIG_FLEX_MVRES + pb_mv_precision, +#else + cm->features.allow_high_precision_mv, +#endif + &cur_mv[i].as_mv); +#endif // CONFIG_C071_SUBBLK_WARPMV +#if CONFIG_FLEX_MVRES + *rate_mv += + av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv, + pb_mv_precision, &x->mv_costs, MV_COST_WEIGHT +#if CONFIG_ADAPTIVE_MVD + , + 0 +#endif + ); +#else + *rate_mv += av1_mv_bit_cost( + &cur_mv[i].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost, + x->mv_costs.mv_cost_stack, MV_COST_WEIGHT); +#endif + } + } +#if CONFIG_SKIP_ME_FOR_OPFL_MODES + if (this_mode == NEW_NEWMV) { + args->comp_newmv_valid[av1_ref_frame_type(mbmi->ref_frame)] +#if CONFIG_SEP_COMP_DRL + [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)] +#else + [mbmi->ref_mv_idx] +#endif + [pb_mv_precision] = 1; + args->comp_newmv[av1_ref_frame_type(mbmi->ref_frame)] +#if CONFIG_SEP_COMP_DRL + [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)] +#else + [mbmi->ref_mv_idx] +#endif + [pb_mv_precision][0] + .as_int = cur_mv[0].as_int; + args->comp_newmv[av1_ref_frame_type(mbmi->ref_frame)] +#if CONFIG_SEP_COMP_DRL + [av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx)] +#else + [mbmi->ref_mv_idx] +#endif + [pb_mv_precision][1] + .as_int = cur_mv[1].as_int; + } } +#endif // CONFIG_SKIP_ME_FOR_OPFL_MODES #if CONFIG_OPTFLOW_REFINEMENT } else if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEWMV_OPTFLOW) { #else @@ -1125,7 +1229,13 @@ if (valid_mv1) { #if CONFIG_FLEX_MVRES cur_mv[1].as_int = - args->single_newmv[valid_precision_mv1][ref_mv_idx][refs[1]].as_int; +#if CONFIG_SEP_COMP_DRL + args->single_newmv[valid_precision_mv1][get_ref_mv_idx(mbmi, 1)] + [refs[1]] + .as_int; +#else + args->single_newmv[valid_precision_mv1][ref_mv_idx][refs[1]].as_int; +#endif // CONFIG_SEP_COMP_DRL #else cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int; #endif @@ -1161,7 +1271,7 @@ #if CONFIG_FLEX_MVRES pb_mv_precision, #else - cm->features.allow_high_precision_mv, + cm->features.allow_high_precision_mv, #endif &cur_mv[1].as_mv); #endif // CONFIG_C071_SUBBLK_WARPMV @@ -1201,8 +1311,8 @@ #if IMPROVED_AMVD if (first_ref_dist != sec_ref_dist) return INT64_MAX; #else - if (first_ref_dist > 2 * sec_ref_dist) return INT64_MAX; - if (sec_ref_dist > 2 * first_ref_dist) return INT64_MAX; + if (first_ref_dist > 2 * sec_ref_dist) return INT64_MAX; + if (sec_ref_dist > 2 * first_ref_dist) return INT64_MAX; #endif // IMPROVED_AMVD const int jmvd_base_ref_list = get_joint_mvd_base_ref_list(cm, mbmi); @@ -1217,10 +1327,15 @@ #if CONFIG_FLEX_MVRES args->single_newmv[jmvd_base_ref_list == 0 ? valid_precision_mv0 : valid_precision_mv1] - [ref_mv_idx][refs[jmvd_base_ref_list]] +#if CONFIG_SEP_COMP_DRL + [get_ref_mv_idx(mbmi, 1)] +#else + [ref_mv_idx] +#endif // CONFIG_SEP_COMP_DRL + [refs[jmvd_base_ref_list]] .as_int; #else - args->single_newmv[ref_mv_idx][refs[jmvd_base_ref_list]].as_int; + args->single_newmv[ref_mv_idx][refs[jmvd_base_ref_list]].as_int; #endif #if CONFIG_FLEX_MVRES @@ -1247,7 +1362,13 @@ if (valid_mv0) { #if CONFIG_FLEX_MVRES cur_mv[0].as_int = - args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int; +#if CONFIG_SEP_COMP_DRL + args->single_newmv[valid_precision_mv0][get_ref_mv_idx(mbmi, 0)] + [refs[0]] + .as_int; +#else + args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int; +#endif // CONFIG_SEP_COMP_DRL #else cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int; #endif @@ -1285,7 +1406,7 @@ #if CONFIG_FLEX_MVRES pb_mv_precision, #else - cm->features.allow_high_precision_mv, + cm->features.allow_high_precision_mv, #endif &cur_mv[0].as_mv); #endif // CONFIG_C071_SUBBLK_WARPMV @@ -1338,7 +1459,13 @@ int valid_mv0_found = 0; for (int prev_mv_precision = pb_mv_precision; prev_mv_precision <= mbmi->max_mv_precision; prev_mv_precision++) { - if (args->single_newmv_valid[prev_mv_precision][ref_mv_idx][refs[0]]) { +#if CONFIG_SEP_COMP_DRL + assert(get_ref_mv_idx(mbmi, 1) == get_ref_mv_idx(mbmi, 0)); + if (args->single_newmv_valid[prev_mv_precision][get_ref_mv_idx(mbmi, 0)] + [refs[0]]) { +#else + if (args->single_newmv_valid[prev_mv_precision][ref_mv_idx][refs[0]]) { +#endif // CONFIG_SEP_COMP_DRL valid_mv0_found = 1; valid_precision_mv0 = prev_mv_precision; break; @@ -1353,7 +1480,13 @@ assert(valid_precision_mv0 > pb_mv_precision && valid_precision_mv0 < NUM_MV_PRECISIONS); start_mv.as_int = - args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int; +#if CONFIG_SEP_COMP_DRL + args->single_newmv[valid_precision_mv0][get_ref_mv_idx(mbmi, 0)] + [refs[0]] + .as_int; +#else + args->single_newmv[valid_precision_mv0][ref_mv_idx][refs[0]].as_int; +#endif // CONFIG_SEP_COMP_DRL lower_mv_precision(&start_mv.as_mv, pb_mv_precision); clamp_mv_in_range(x, &start_mv, 0, pb_mv_precision); @@ -1363,15 +1496,27 @@ } else { #endif int search_range = INT_MAX; - if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) { +#if CONFIG_SEP_COMP_DRL + if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx[0] > 0) { +#else + if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) { +#endif // CONFIG_SEP_COMP_DRL const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv; int min_mv_diff = INT_MAX; int best_match = -1; MV best_mv1 = { 0 }; - for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) { +#if CONFIG_SEP_COMP_DRL + assert(ref_idx == 0); + for (int idx = 0; idx < mbmi->ref_mv_idx[ref_idx]; ++idx) { MV prev_ref_mv = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, - idx, x->mbmi_ext) + idx, x->mbmi_ext, mbmi) .as_mv; +#else + for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) { + MV prev_ref_mv = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, + idx, x->mbmi_ext) + .as_mv; +#endif // CONFIG_SEP_COMP_DRL const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv.row), abs(ref_mv.col - prev_ref_mv.col)); @@ -1411,7 +1556,13 @@ } } av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range, - mode_info, &best_mv); + mode_info, &best_mv +#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + , + NULL +#endif // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + + ); #if CONFIG_FLEX_MVRES } @@ -1420,9 +1571,18 @@ if (best_mv.as_int == INVALID_MV) return INT64_MAX; #if CONFIG_FLEX_MVRES - args->single_newmv[pb_mv_precision][ref_mv_idx][refs[0]] = best_mv; - args->single_newmv_rate[pb_mv_precision][ref_mv_idx][refs[0]] = *rate_mv; - args->single_newmv_valid[pb_mv_precision][ref_mv_idx][refs[0]] = 1; +#if CONFIG_SEP_COMP_DRL + args->single_newmv[pb_mv_precision][get_ref_mv_idx(mbmi, 0)][refs[0]] = + best_mv; + args->single_newmv_rate[pb_mv_precision][get_ref_mv_idx(mbmi, 0)][refs[0]] = + *rate_mv; + args->single_newmv_valid[pb_mv_precision][get_ref_mv_idx(mbmi, 0)] + [refs[0]] = 1; +#else + args->single_newmv[pb_mv_precision][ref_mv_idx][refs[0]] = best_mv; + args->single_newmv_rate[pb_mv_precision][ref_mv_idx][refs[0]] = *rate_mv; + args->single_newmv_valid[pb_mv_precision][ref_mv_idx][refs[0]] = 1; +#endif // CONFIG_SEP_COMP_DRL #else args->single_newmv[ref_mv_idx][refs[0]] = best_mv; args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv; @@ -1451,7 +1611,11 @@ const ModeCosts *mode_costs) { #if CONFIG_WARP_REF_LIST (void)xd; - if (!allow_warp_parameter_signaling(mbmi)) { + if (!allow_warp_parameter_signaling( +#if CONFIG_CWG_D067_IMPROVED_WARP + cm, +#endif // CONFIG_CWG_D067_IMPROVED_WARP + mbmi)) { return 0; } #endif // CONFIG_WARP_REF_LIST @@ -1650,7 +1814,11 @@ #if CONFIG_CROSS_CHROMA_TX CctxType best_cctx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; #endif // CONFIG_CROSS_CHROMA_TX - const int rate_mv0 = *rate_mv; + const int rate_mv0 = +#if CONFIG_WARPMV + this_mode == WARPMV ? 0 : +#endif + *rate_mv; #if !CONFIG_EXTENDED_WARP_PREDICTION const int interintra_allowed = cm->seq_params.enable_interintra_compound && is_interintra_allowed(mbmi); @@ -1670,6 +1838,9 @@ #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST #if CONFIG_EXTENDED_WARP_PREDICTION int allowed_motion_modes = motion_mode_allowed( @@ -1700,6 +1871,13 @@ } #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWG_D067_IMPROVED_WARP + int_mv previous_mvs[MAX_WARP_REF_CANDIDATES]; + for (int w_ref_idx = 0; w_ref_idx < MAX_WARP_REF_CANDIDATES; w_ref_idx++) { + previous_mvs[w_ref_idx].as_int = INVALID_MV; + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + int num_rd_check = 0; const MB_MODE_INFO base_mbmi = *mbmi; MB_MODE_INFO best_mbmi; #if CONFIG_C071_SUBBLK_WARPMV @@ -1763,10 +1941,14 @@ || is_warpmv_warp_causal #endif // CONFIG_WARPMV ) { - max_warp_ref_idx = - (base_mbmi.mode == GLOBALMV || base_mbmi.mode == NEARMV) - ? 1 - : MAX_WARP_REF_CANDIDATES; + max_warp_ref_idx = (base_mbmi.mode == GLOBALMV || base_mbmi.mode == NEARMV +#if CONFIG_CWG_D067_IMPROVED_WARP + || base_mbmi.mode == AMVDNEWMV +#endif // CONFIG_CWG_D067_IMPROVED_WARP + + ) + ? 1 + : MAX_WARP_REF_CANDIDATES; #if CONFIG_WARPMV if (is_warpmv_warp_causal) { max_warp_ref_idx = MAX_WARP_REF_CANDIDATES; @@ -1797,33 +1979,38 @@ if (is_warpmv_warp_causal && warp_ref_idx >= valid_num_candidates) continue; #endif // CONFIG_WARPMV +#if CONFIG_CWG_D067_IMPROVED_WARP + for (int warpmv_with_mvd_flag = 0; + warpmv_with_mvd_flag < (1 + (base_mbmi.mode == WARPMV)); + warpmv_with_mvd_flag++) { +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST - int tmp_rate2 = rate2_nocoeff; - int tmp_rate_mv = rate_mv0; + int tmp_rate2 = rate2_nocoeff; + int tmp_rate_mv = rate_mv0; - *mbmi = base_mbmi; + *mbmi = base_mbmi; #if CONFIG_C071_SUBBLK_WARPMV - update_submi(xd, cm, base_submi, bsize); + update_submi(xd, cm, base_submi, bsize); #endif // CONFIG_C071_SUBBLK_WARPMV #if CONFIG_WARP_REF_LIST - mbmi->warp_ref_idx = warp_ref_idx; - mbmi->max_num_warp_candidates = (mode_index == WARP_DELTA + mbmi->warp_ref_idx = warp_ref_idx; + mbmi->max_num_warp_candidates = (mode_index == WARP_DELTA #if CONFIG_WARPMV - || is_warpmv_warp_causal + || is_warpmv_warp_causal #endif // CONFIG_WARPMV - ) - ? max_warp_ref_idx - : 0; - assert(valid_num_candidates <= mbmi->max_num_warp_candidates); + ) + ? max_warp_ref_idx + : 0; + assert(valid_num_candidates <= mbmi->max_num_warp_candidates); #endif // CONFIG_WARP_REF_LIST #if CONFIG_EXTENDED_WARP_PREDICTION - mbmi->motion_mode = (MOTION_MODE)mode_index; - if (mbmi->motion_mode != INTERINTRA) { - assert(mbmi->ref_frame[1] != INTRA_FRAME); - } + mbmi->motion_mode = (MOTION_MODE)mode_index; + if (mbmi->motion_mode != INTERINTRA) { + assert(mbmi->ref_frame[1] != INTRA_FRAME); + } #else if (is_interintra_mode) { // Only use SIMPLE_TRANSLATION for interintra @@ -1834,312 +2021,279 @@ } #endif // CONFIG_EXTENDED_WARP_PREDICTION +#if CONFIG_CWG_D067_IMPROVED_WARP + if (warpmv_with_mvd_flag && !allow_warpmv_with_mvd_coding(cm, mbmi)) + continue; + mbmi->warpmv_with_mvd_flag = warpmv_with_mvd_flag; +#endif // CONFIG_CWG_D067_IMPROVED_WARP + #if CONFIG_WARPMV - // Only WARP_DELTA and WARPED_CAUSAL are supported for WARPMV mode - assert(IMPLIES(mbmi->mode == WARPMV, - mbmi->motion_mode == WARP_DELTA || is_warpmv_warp_causal)); + // Only WARP_DELTA and WARPED_CAUSAL are supported for WARPMV mode + assert(IMPLIES(mbmi->mode == WARPMV, mbmi->motion_mode == WARP_DELTA || + is_warpmv_warp_causal)); #endif // CONFIG_WARPMV - // Do not search OBMC if the probability of selecting it is below a - // predetermined threshold for this update_type and block size. - const FRAME_UPDATE_TYPE update_type = - get_frame_update_type(&cpi->gf_group); - const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] < - cpi->sf.inter_sf.prune_obmc_prob_thresh; + // Do not search OBMC if the probability of selecting it is below a + // predetermined threshold for this update_type and block size. + const FRAME_UPDATE_TYPE update_type = + get_frame_update_type(&cpi->gf_group); + const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] < + cpi->sf.inter_sf.prune_obmc_prob_thresh; #if CONFIG_EXTENDED_WARP_PREDICTION - bool enable_obmc = - (cm->features.enabled_motion_modes & (1 << OBMC_CAUSAL)) != 0; + bool enable_obmc = + (cm->features.enabled_motion_modes & (1 << OBMC_CAUSAL)) != 0; #else bool enable_obmc = cpi->oxcf.motion_mode_cfg.enable_obmc; #endif // CONFIG_EXTENDED_WARP_PREDICTION - if ((!enable_obmc || cpi->sf.inter_sf.disable_obmc || prune_obmc) && - mbmi->motion_mode == OBMC_CAUSAL) - continue; + if ((!enable_obmc || cpi->sf.inter_sf.disable_obmc || prune_obmc) && + mbmi->motion_mode == OBMC_CAUSAL) + continue; - if (is_warp_mode(mbmi->motion_mode)) { - mbmi->interp_fltr = av1_unswitchable_filter(interp_filter); - } + if (is_warp_mode(mbmi->motion_mode)) { + mbmi->interp_fltr = av1_unswitchable_filter(interp_filter); + } #if CONFIG_EXTENDED_WARP_PREDICTION - if (mbmi->motion_mode == SIMPLE_TRANSLATION) { + if (mbmi->motion_mode == SIMPLE_TRANSLATION) { #else if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) { #endif // CONFIG_EXTENDED_WARP_PREDICTION // SIMPLE_TRANSLATION mode: no need to recalculate. // The prediction is calculated before motion_mode_rd() is called in // handle_inter_mode() - } else if (mbmi->motion_mode == OBMC_CAUSAL) { - // OBMC_CAUSAL not allowed for compound prediction - assert(!is_comp_pred); - if (this_mode == NEWMV) { - av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, - NULL, &mbmi->mv[0]); - tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; - } - // Build the inter predictor by blending the predictor corresponding to - // this MV, and the neighboring blocks using the OBMC model - av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, - 0, av1_num_planes(cm) - 1); - av1_build_obmc_inter_prediction( - cm, xd, args->above_pred_buf, args->above_pred_stride, - args->left_pred_buf, args->left_pred_stride); - } else if (mbmi->motion_mode == WARPED_CAUSAL) { - int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; + } else if (mbmi->motion_mode == OBMC_CAUSAL) { + // OBMC_CAUSAL not allowed for compound prediction + assert(!is_comp_pred); + if (this_mode == NEWMV) { + av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, + NULL, &mbmi->mv[0] +#if CONFIG_CWG_D067_IMPROVED_WARP && CONFIG_WARPMV + , + NULL +#endif // CONFIG_CWG_D067_IMPROVED_WARP && CONFIG_WARPMV + ); + tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; + } + // Build the inter predictor by blending the predictor + // corresponding to this MV, and the neighboring blocks using the + // OBMC model + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, + 0, av1_num_planes(cm) - 1); + av1_build_obmc_inter_prediction( + cm, xd, args->above_pred_buf, args->above_pred_stride, + args->left_pred_buf, args->left_pred_stride); + } else if (mbmi->motion_mode == WARPED_CAUSAL) { + int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; #if CONFIG_EXTENDED_WARP_PREDICTION - mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE; + mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE; #else mbmi->wm_params.wmtype = DEFAULT_WMTYPE; #endif // CONFIG_EXTENDED_WARP_PREDICTION #if CONFIG_WARPMV +#if CONFIG_CWG_D067_IMPROVED_WARP + int_mv warp_ref_mv = mbmi->mv[0]; +#endif // CONFIG_CWG_D067_IMPROVED_WARP // Build the motion vector of the WARPMV mode - if (mbmi->mode == WARPMV) { - WarpedMotionParams ref_model = - mbmi_ext - ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)] - [mbmi->warp_ref_idx] - .wm_params; - mbmi->mv[0] = get_mv_from_wrl(xd, &ref_model, + if (mbmi->mode == WARPMV) { + WarpedMotionParams ref_model = + mbmi_ext + ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)] + [mbmi->warp_ref_idx] + .wm_params; + mbmi->mv[0] = get_mv_from_wrl( + xd, &ref_model, #if CONFIG_FLEX_MVRES - MV_PRECISION_ONE_EIGHTH_PEL, +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag ? mbmi->pb_mv_precision : +#endif // CONFIG_CWG_D067_IMPROVED_WARP + MV_PRECISION_ONE_EIGHTH_PEL, #else - 1, 0, + 1, 0, #endif - bsize, xd->mi_col, xd->mi_row); - if (!is_warp_candidate_inside_of_frame(cm, xd, mbmi->mv[0])) continue; + bsize, xd->mi_col, xd->mi_row); + + if (!is_warp_candidate_inside_of_frame(cm, xd, mbmi->mv[0])) + continue; #if CONFIG_FLEX_MVRES - assert(mbmi->pb_mv_precision == mbmi->max_mv_precision); + assert(mbmi->pb_mv_precision == mbmi->max_mv_precision); #endif - } + +#if CONFIG_CWG_D067_IMPROVED_WARP + warp_ref_mv.as_int = mbmi->mv[0].as_int; + // search MVD if mbmi->warpmv_with_mvd_flag is used. + if (mbmi->warpmv_with_mvd_flag) { + if (previous_mvs[mbmi->warp_ref_idx].as_int == INVALID_MV) { + int tmp_trans_ratemv = 0; + av1_single_motion_search(cpi, x, bsize, 0, &tmp_trans_ratemv, + 16, NULL, &mbmi->mv[0], &warp_ref_mv); + previous_mvs[mbmi->warp_ref_idx].as_int = mbmi->mv[0].as_int; + } else { + mbmi->mv[0].as_int = previous_mvs[mbmi->warp_ref_idx].as_int; + } + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + } #endif // CONFIG_WARPMV - memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); - memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); - // Select the samples according to motion vector difference - if (mbmi->num_proj_ref > 1) { - mbmi->num_proj_ref = av1_selectSamples( - &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize); - } + memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); + memcpy(pts_inref, pts_inref0, + total_samples * 2 * sizeof(*pts_inref0)); + // Select the samples according to motion vector difference + if (mbmi->num_proj_ref > 1) { + mbmi->num_proj_ref = av1_selectSamples( + &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize); + } - // Compute the warped motion parameters with a least squares fit - // using the collected samples + // Compute the warped motion parameters with a least squares fit + // using the collected samples #if CONFIG_EXTENDED_WARP_PREDICTION - if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, - mbmi->mv[0].as_mv, &mbmi->wm_params[0], mi_row, - mi_col)) { + if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, + mbmi->mv[0].as_mv, &mbmi->wm_params[0], + mi_row, mi_col)) { #else if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, mbmi->mv[0].as_mv, &mbmi->wm_params, mi_row, mi_col)) { #endif // CONFIG_EXTENDED_WARP_PREDICTION - assert(!is_comp_pred); - if (this_mode == NEWMV + assert(!is_comp_pred); + if ((this_mode == NEWMV #if CONFIG_FLEX_MVRES - && (mbmi->pb_mv_precision >= MV_PRECISION_ONE_PEL) + && (mbmi->pb_mv_precision >= MV_PRECISION_ONE_PEL)) #endif - ) { - // Refine MV for NEWMV mode - const int_mv mv0 = mbmi->mv[0]; +#if CONFIG_CWG_D067_IMPROVED_WARP + || mbmi->warpmv_with_mvd_flag +#endif // CONFIG_CWG_D067_IMPROVED_WARP + ) { + // Refine MV for NEWMV mode + const int_mv mv0 = +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->mode == WARPMV ? warp_ref_mv : +#endif // CONFIG_CWG_D067_IMPROVED_WARP + mbmi->mv[0]; + const int_mv ref_mv = +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag ? warp_ref_mv : +#endif // CONFIG_CWG_D067_IMPROVED_WARP - const int_mv ref_mv = av1_get_ref_mv(x, 0); + av1_get_ref_mv(x, 0); #if CONFIG_FLEX_MVRES - const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision; + const MvSubpelPrecision pb_mv_precision = mbmi->pb_mv_precision; #endif - SUBPEL_MOTION_SEARCH_PARAMS ms_params; - av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, - &ref_mv.as_mv, + SUBPEL_MOTION_SEARCH_PARAMS ms_params; + av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, + &ref_mv.as_mv, #if CONFIG_FLEX_MVRES - pb_mv_precision, + pb_mv_precision, #endif - NULL); - - // Refine MV in a small range. - av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0, - total_samples, - cpi->sf.mv_sf.warp_search_method, - cpi->sf.mv_sf.warp_search_iters); - - if (mv0.as_int != mbmi->mv[0].as_int) { - // Keep the refined MV and WM parameters. + NULL); + // Refine MV in a small range. + av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0, + total_samples, + cpi->sf.mv_sf.warp_search_method, + cpi->sf.mv_sf.warp_search_iters); + if (mv0.as_int != mbmi->mv[0].as_int +#if CONFIG_CWG_D067_IMPROVED_WARP + || mbmi->warpmv_with_mvd_flag +#endif // CONFIG_CWG_D067_IMPROVED_WARP + ) { + // Keep the refined MV and WM parameters. #if CONFIG_FLEX_MVRES - tmp_rate_mv = - av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv.as_mv, - pb_mv_precision, &x->mv_costs, MV_COST_WEIGHT + tmp_rate_mv = av1_mv_bit_cost( + &mbmi->mv[0].as_mv, &ref_mv.as_mv, pb_mv_precision, + &x->mv_costs, MV_COST_WEIGHT #if CONFIG_ADAPTIVE_MVD - , - ms_params.mv_cost_params.is_adaptive_mvd + , + ms_params.mv_cost_params.is_adaptive_mvd #endif - ); + ); #else tmp_rate_mv = av1_mv_bit_cost( &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost, x->mv_costs.mv_cost_stack, MV_COST_WEIGHT); #endif - tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; + tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; +#if CONFIG_CWG_D067_IMPROVED_WARP + assert( + IMPLIES(mbmi->mode == WARPMV, mbmi->warpmv_with_mvd_flag)); +#endif // CONFIG_CWG_D067_IMPROVED_WARP + } } - } #if CONFIG_C071_SUBBLK_WARPMV #if CONFIG_EXTENDED_WARP_PREDICTION - assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row, - mi_col); + assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row, + mi_col); #else assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params, mi_row, mi_col); #endif // CONFIG_EXTENDED_WARP_PREDICTION #endif // CONFIG_C071_SUBBLK_WARPMV // Build the warped predictor - av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, - av1_num_planes(cm) - 1); - } else { - continue; - } + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, + 0, av1_num_planes(cm) - 1); + } else { + continue; + } + #if CONFIG_EXTENDED_WARP_PREDICTION - } else if (mbmi->motion_mode == INTERINTRA) { + } else if (mbmi->motion_mode == INTERINTRA) { #else } else if (is_interintra_mode) { #endif // CONFIG_EXTENDED_WARP_PREDICTION - const int ret = - av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd, - &tmp_rate_mv, &tmp_rate2, orig_dst); - if (ret < 0) continue; + const int ret = av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, + ref_best_rd, &tmp_rate_mv, + &tmp_rate2, orig_dst); + if (ret < 0) continue; #if CONFIG_EXTENDED_WARP_PREDICTION - } else if (mbmi->motion_mode == WARP_DELTA) { + } else if (mbmi->motion_mode == WARP_DELTA) { #if CONFIG_FLEX_MVRES - if (mbmi->mode == NEWMV && - mbmi->pb_mv_precision < MV_PRECISION_ONE_PEL) { - // Don't bother with warp modes for MV precisions >1px - continue; - } -#endif -#if CONFIG_WARPMV - // Build the motion vector of the WARPMV mode - if (mbmi->mode == WARPMV) { - WarpedMotionParams ref_model = - mbmi_ext - ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)] - [mbmi->warp_ref_idx] - .wm_params; - mbmi->mv[0] = get_mv_from_wrl(xd, &ref_model, -#if CONFIG_FLEX_MVRES - MV_PRECISION_ONE_EIGHTH_PEL, -#else - 1, 0, -#endif - bsize, xd->mi_col, xd->mi_row); -#if CONFIG_FLEX_MVRES - assert(mbmi->pb_mv_precision == mbmi->max_mv_precision); -#endif - if (!is_warp_candidate_inside_of_frame(cm, xd, mbmi->mv[0])) continue; - } -#endif // CONFIG_WARPMV - int_mv mv0 = mbmi->mv[0]; - const int_mv ref_mv = av1_get_ref_mv(x, 0); - SUBPEL_MOTION_SEARCH_PARAMS ms_params; - av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, - &ref_mv.as_mv, -#if CONFIG_FLEX_MVRES - mbmi->pb_mv_precision, -#endif - NULL); - int valid = 0; -#if CONFIG_WARP_REF_LIST - if (!allow_warp_parameter_signaling(mbmi)) { - // Default parameters are not searched if the delta is not signalled - if (mbmi_ext - ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)] - [mbmi->warp_ref_idx] - .proj_type == PROJ_DEFAULT) - continue; - valid = av1_refine_mv_for_base_param_warp_model( - cm, xd, mbmi, mbmi_ext, &ms_params, - cpi->sf.mv_sf.warp_search_method, - cpi->sf.mv_sf.warp_search_iters); - } else { -#endif // CONFIG_WARP_REF_LIST - - valid = av1_pick_warp_delta( - cm, xd, mbmi, mbmi_ext, &ms_params, &x->mode_costs -#if CONFIG_WARP_REF_LIST - , - mbmi_ext->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)] -#endif // CONFIG_WARP_REF_LIST - - ); - -#if CONFIG_WARP_REF_LIST - } -#endif // CONFIG_WARP_REF_LIST - - if (!valid) { - continue; - } - - // If we changed the MV, update costs - if (mv0.as_int != mbmi->mv[0].as_int) { - // Keep the refined MV and WM parameters. -#if CONFIG_FLEX_MVRES - tmp_rate_mv = av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv.as_mv, - mbmi->pb_mv_precision, &x->mv_costs, - MV_COST_WEIGHT -#if CONFIG_ADAPTIVE_MVD - , - ms_params.mv_cost_params.is_adaptive_mvd -#endif - ); -#else - tmp_rate_mv = av1_mv_bit_cost( - &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost, - x->mv_costs.mv_cost_stack, MV_COST_WEIGHT); -#endif - - tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; -#if CONFIG_WARPMV - assert(mbmi->mode == NEWMV); -#endif // CONFIG_WARPMV - } -#if CONFIG_C071_SUBBLK_WARPMV - assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row, - mi_col); -#endif // CONFIG_C071_SUBBLK_WARPMV - av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, - av1_num_planes(cm) - 1); - } else if (mbmi->motion_mode == WARP_EXTEND) { -#if CONFIG_FLEX_MVRES - if (mbmi->mode == NEWMV && - mbmi->pb_mv_precision < MV_PRECISION_ONE_PEL) { - // Don't bother with warp modes for MV precisions >1px - continue; - } -#endif - - CANDIDATE_MV *neighbor = - &mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]][mbmi->ref_mv_idx]; - POSITION base_pos = { 0, 0 }; - if (!get_extend_base_pos(cm, xd, mbmi, neighbor->row_offset, - neighbor->col_offset, &base_pos)) { - continue; - } - const MB_MODE_INFO *neighbor_mi = - xd->mi[base_pos.row * xd->mi_stride + base_pos.col]; - - if (mbmi->mode == NEARMV) { - assert(is_warp_mode(neighbor_mi->motion_mode)); - if (neighbor_mi->wm_params[0].invalid) { - // Skip invalid models + if (mbmi->mode == NEWMV && + mbmi->pb_mv_precision < MV_PRECISION_ONE_PEL) { + // Don't bother with warp modes for MV precisions >1px continue; } - mbmi->wm_params[0] = neighbor_mi->wm_params[0]; - } else { - assert(mbmi->mode == NEWMV); +#endif +#if CONFIG_WARPMV +#if CONFIG_CWG_D067_IMPROVED_WARP + int_mv wrl_ref_mv = mbmi->mv[0]; +#endif // CONFIG_CWG_D067_IMPROVED_WARP - bool neighbor_is_above = - xd->up_available && (base_pos.row == -1 && base_pos.col >= 0); + // Build the motion vector of the WARPMV mode + if (mbmi->mode == WARPMV) { + WarpedMotionParams ref_model = + mbmi_ext + ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)] + [mbmi->warp_ref_idx] + .wm_params; + mbmi->mv[0] = get_mv_from_wrl( + xd, &ref_model, +#if CONFIG_FLEX_MVRES +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag ? mbmi->pb_mv_precision : +#endif // CONFIG_CWG_D067_IMPROVED_WARP - WarpedMotionParams neighbor_params; - av1_get_neighbor_warp_model(cm, xd, neighbor_mi, &neighbor_params); - - const int_mv ref_mv = av1_get_ref_mv(x, 0); + MV_PRECISION_ONE_EIGHTH_PEL, +#else + 1, 0, +#endif + bsize, xd->mi_col, xd->mi_row); +#if CONFIG_FLEX_MVRES + assert(mbmi->pb_mv_precision == mbmi->max_mv_precision); +#endif + if (!is_warp_candidate_inside_of_frame(cm, xd, mbmi->mv[0])) + continue; +#if CONFIG_CWG_D067_IMPROVED_WARP + wrl_ref_mv = mbmi->mv[0]; +#endif // CONFIG_CWG_D067_IMPROVED_WARP + } +#endif // CONFIG_WARPMV + int_mv mv0 = mbmi->mv[0]; + const int_mv ref_mv = +#if CONFIG_CWG_D067_IMPROVED_WARP + (mbmi->mode == WARPMV) ? wrl_ref_mv : +#endif // CONFIG_CWG_D067_IMPROVED_WARP + av1_get_ref_mv(x, 0); SUBPEL_MOTION_SEARCH_PARAMS ms_params; av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, &ref_mv.as_mv, @@ -2147,97 +2301,62 @@ mbmi->pb_mv_precision, #endif NULL); - const SubpelMvLimits *mv_limits = &ms_params.mv_limits; - - // Note: The warp filter is only able to accept small deviations from - // the identity transform, up to 1/4 pel of shift per pixel. - // Especially for small blocks, it is likely that the motion vector - // estimated by the newmv search will be too distant from the - // neighbor's motion vectors for the warp filter to be applied. - // However, we don't want to give up the benefits of a good initial - // MV in the cases where a suitable one has already been found. - // - // To get the best of both worlds, we run an initial test to see - // if the motion vector found by newmv search gives a valid motion - // model. If so, we use that as the starting point for refinement. - // Otherwise, we use the MV which is predicted by the neighbor's - // warp model - // TODO(rachelbarker): Do we need this logic? - - // Backup initial motion vector and resulting warp params - int_mv mv0 = mbmi->mv[0]; - WarpedMotionParams wm_params0; - if (!av1_extend_warp_model(neighbor_is_above, bsize, - &mbmi->mv[0].as_mv, mi_row, mi_col, - &neighbor_params, &wm_params0)) { - // NEWMV search produced a valid model - mbmi->wm_params[0] = wm_params0; + int valid = 0; +#if CONFIG_WARP_REF_LIST + if (!allow_warp_parameter_signaling( +#if CONFIG_CWG_D067_IMPROVED_WARP + cm, +#endif // CONFIG_CWG_D067_IMPROVED_WARP + mbmi)) { + // Default parameters are not searched if the delta is not + // signalled + if (mbmi_ext + ->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)] + [mbmi->warp_ref_idx] + .proj_type == PROJ_DEFAULT) + continue; +#if CONFIG_CWG_D067_IMPROVED_WARP + // search MVD if mbmi->warpmv_with_mvd_flag is used. + if (mbmi->mode == WARPMV && mbmi->warpmv_with_mvd_flag) { + if (previous_mvs[mbmi->warp_ref_idx].as_int == INVALID_MV) { + int tmp_trans_ratemv = 0; + av1_single_motion_search(cpi, x, bsize, 0, &tmp_trans_ratemv, + 16, NULL, &mbmi->mv[0], &ref_mv); + previous_mvs[mbmi->warp_ref_idx].as_int = mbmi->mv[0].as_int; + } else { + mbmi->mv[0].as_int = previous_mvs[mbmi->warp_ref_idx].as_int; + } + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + valid = av1_refine_mv_for_base_param_warp_model( + cm, xd, mbmi, mbmi_ext, &ms_params, + cpi->sf.mv_sf.warp_search_method, + cpi->sf.mv_sf.warp_search_iters); } else { - // NEWMV search did not produce a valid model, so fall back to - // starting with the motion vector predicted by the neighbor's - // warp model (if any) -#if CONFIG_FLEX_MVRES - mbmi->mv[0] = get_warp_motion_vector(xd, &neighbor_params, - mbmi->pb_mv_precision, bsize, - mi_col, mi_row); -#else - mbmi->mv[0] = get_warp_motion_vector( - xd, &neighbor_params, features->allow_high_precision_mv, bsize, - mi_col, mi_row, features->cur_frame_force_integer_mv); -#endif -#if CONFIG_C071_SUBBLK_WARPMV - if ( -#if CONFIG_FLEX_MVRES - mbmi->pb_mv_precision >= MV_PRECISION_HALF_PEL -#else - !cm->features.allow_high_precision_mv -#endif - ) { - FULLPEL_MV tmp_full_mv = get_fullmv_from_mv(&mbmi->mv[0].as_mv); - MV tmp_sub_mv = get_mv_from_fullmv(&tmp_full_mv); - MV sub_mv_offset = { 0, 0 }; - get_phase_from_mv(ref_mv.as_mv, &sub_mv_offset, -#if CONFIG_FLEX_MVRES - mbmi->pb_mv_precision -#else - cm->features.allow_high_precision_mv -#endif - ); - mbmi->mv[0].as_mv.col = tmp_sub_mv.col + sub_mv_offset.col; - mbmi->mv[0].as_mv.row = tmp_sub_mv.row + sub_mv_offset.row; - } -#endif // CONFIG_C071_SUBBLK_WARPMV - // Check that the prediction is in range - if (!av1_is_subpelmv_in_range(mv_limits, mbmi->mv[0].as_mv)) { - continue; - } +#endif // CONFIG_WARP_REF_LIST - // Regenerate model with this new MV - // - // Note: This should be very close to the neighbor's warp model, - // but may be slightly different due to rounding. So it may be - // invalid even if the neighbor's warp model is valid. - // Because an exact copy will already have been tried using the - // NEARMV mode, we can just detect an invalid model and bail out. - // - // TODO(rachelbarker): Is it worth trying to search anyway in - // this case, in order to try to find a valid model? - if (av1_extend_warp_model(neighbor_is_above, bsize, - &mbmi->mv[0].as_mv, mi_row, mi_col, - &neighbor_params, &mbmi->wm_params[0])) { - continue; - } + valid = av1_pick_warp_delta( + cm, xd, mbmi, mbmi_ext, &ms_params, &x->mode_costs +#if CONFIG_WARP_REF_LIST + , + mbmi_ext->warp_param_stack[av1_ref_frame_type(mbmi->ref_frame)] +#endif // CONFIG_WARP_REF_LIST + ); + +#if CONFIG_WARP_REF_LIST + } +#endif // CONFIG_WARP_REF_LIST + + if (!valid) { + continue; } - // Refine motion vector. The final choice of MV and warp model are - // stored directly into `mbmi` - av1_refine_mv_for_warp_extend(cm, xd, &ms_params, neighbor_is_above, - bsize, &neighbor_params, - cpi->sf.mv_sf.warp_search_method, - cpi->sf.mv_sf.warp_search_iters); - // If we changed the MV, update costs - if (mv0.as_int != mbmi->mv[0].as_int) { + if (mv0.as_int != mbmi->mv[0].as_int +#if CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + || mbmi->warpmv_with_mvd_flag +#endif // CONFIG_WARPMV && CONFIG_CWG_D067_IMPROVED_WARP + ) { // Keep the refined MV and WM parameters. #if CONFIG_FLEX_MVRES tmp_rate_mv = av1_mv_bit_cost( @@ -2249,129 +2368,316 @@ #endif ); #else + tmp_rate_mv = av1_mv_bit_cost( + &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost, + x->mv_costs.mv_cost_stack, MV_COST_WEIGHT); +#endif + + tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; +#if CONFIG_WARPMV +#if CONFIG_CWG_D067_IMPROVED_WARP + assert(mbmi->mode == NEWMV || mbmi->warpmv_with_mvd_flag); +#else + assert(mbmi->mode == NEWMV); +#endif // CONFIG_CWG_D067_IMPROVED_WARP +#if CONFIG_CWG_D067_IMPROVED_WARP + assert(IMPLIES(mbmi->mode == WARPMV, rate_mv0 == 0)); +#endif // CONFIG_CWG_D067_IMPROVED_WARP + +#endif // CONFIG_WARPMV + } +#if CONFIG_C071_SUBBLK_WARPMV + assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row, + mi_col); +#endif // CONFIG_C071_SUBBLK_WARPMV + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, + av1_num_planes(cm) - 1); + } else if (mbmi->motion_mode == WARP_EXTEND) { +#if CONFIG_FLEX_MVRES + if (mbmi->mode == NEWMV && + mbmi->pb_mv_precision < MV_PRECISION_ONE_PEL) { + // Don't bother with warp modes for MV precisions >1px + continue; + } +#endif + + CANDIDATE_MV *neighbor = +#if CONFIG_SEP_COMP_DRL + &mbmi_ext + ->ref_mv_stack[mbmi->ref_frame[0]][get_ref_mv_idx(mbmi, 0)]; +#else + &mbmi_ext->ref_mv_stack[mbmi->ref_frame[0]][mbmi->ref_mv_idx]; +#endif + POSITION base_pos = { 0, 0 }; + if (!get_extend_base_pos(cm, xd, mbmi, neighbor->row_offset, + neighbor->col_offset, &base_pos)) { + continue; + } + const MB_MODE_INFO *neighbor_mi = + xd->mi[base_pos.row * xd->mi_stride + base_pos.col]; + + if (mbmi->mode == NEARMV) { + assert(is_warp_mode(neighbor_mi->motion_mode)); + if (neighbor_mi->wm_params[0].invalid) { + // Skip invalid models + continue; + } + mbmi->wm_params[0] = neighbor_mi->wm_params[0]; + } else { + assert(mbmi->mode == NEWMV); + + bool neighbor_is_above = + xd->up_available && (base_pos.row == -1 && base_pos.col >= 0); + + WarpedMotionParams neighbor_params; + av1_get_neighbor_warp_model(cm, xd, neighbor_mi, &neighbor_params); + + const int_mv ref_mv = av1_get_ref_mv(x, 0); + SUBPEL_MOTION_SEARCH_PARAMS ms_params; + av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, + &ref_mv.as_mv, +#if CONFIG_FLEX_MVRES + mbmi->pb_mv_precision, +#endif + NULL); + const SubpelMvLimits *mv_limits = &ms_params.mv_limits; + + // Note: The warp filter is only able to accept small deviations + // from the identity transform, up to 1/4 pel of shift per + // pixel. Especially for small blocks, it is likely that the + // motion vector estimated by the newmv search will be too + // distant from the neighbor's motion vectors for the warp + // filter to be applied. However, we don't want to give up the + // benefits of a good initial MV in the cases where a suitable + // one has already been found. + // + // To get the best of both worlds, we run an initial test to see + // if the motion vector found by newmv search gives a valid + // motion model. If so, we use that as the starting point for + // refinement. Otherwise, we use the MV which is predicted by + // the neighbor's warp model + // TODO(rachelbarker): Do we need this logic? + + // Backup initial motion vector and resulting warp params + int_mv mv0 = mbmi->mv[0]; + WarpedMotionParams wm_params0; + if (!av1_extend_warp_model(neighbor_is_above, bsize, + &mbmi->mv[0].as_mv, mi_row, mi_col, + &neighbor_params, &wm_params0)) { + // NEWMV search produced a valid model + mbmi->wm_params[0] = wm_params0; + } else { + // NEWMV search did not produce a valid model, so fall back to + // starting with the motion vector predicted by the neighbor's + // warp model (if any) +#if CONFIG_FLEX_MVRES + mbmi->mv[0] = get_warp_motion_vector(xd, &neighbor_params, + mbmi->pb_mv_precision, bsize, + mi_col, mi_row); +#else + mbmi->mv[0] = get_warp_motion_vector( + xd, &neighbor_params, features->allow_high_precision_mv, bsize, + mi_col, mi_row, features->cur_frame_force_integer_mv); +#endif +#if CONFIG_C071_SUBBLK_WARPMV + if ( +#if CONFIG_FLEX_MVRES + mbmi->pb_mv_precision >= MV_PRECISION_HALF_PEL +#else + !cm->features.allow_high_precision_mv +#endif + ) { + FULLPEL_MV tmp_full_mv = get_fullmv_from_mv(&mbmi->mv[0].as_mv); + MV tmp_sub_mv = get_mv_from_fullmv(&tmp_full_mv); + MV sub_mv_offset = { 0, 0 }; + get_phase_from_mv(ref_mv.as_mv, &sub_mv_offset, +#if CONFIG_FLEX_MVRES + mbmi->pb_mv_precision +#else + cm->features.allow_high_precision_mv +#endif + ); + mbmi->mv[0].as_mv.col = tmp_sub_mv.col + sub_mv_offset.col; + mbmi->mv[0].as_mv.row = tmp_sub_mv.row + sub_mv_offset.row; + } +#endif // CONFIG_C071_SUBBLK_WARPMV + // Check that the prediction is in range + if (!av1_is_subpelmv_in_range(mv_limits, mbmi->mv[0].as_mv)) { + continue; + } + + // Regenerate model with this new MV + // + // Note: This should be very close to the neighbor's warp + // model, but may be slightly different due to rounding. So it + // may be invalid even if the neighbor's warp model is valid. + // Because an exact copy will already have been tried using + // the NEARMV mode, we can just detect an invalid model and + // bail out. + // + // TODO(rachelbarker): Is it worth trying to search anyway in + // this case, in order to try to find a valid model? + if (av1_extend_warp_model( + neighbor_is_above, bsize, &mbmi->mv[0].as_mv, mi_row, + mi_col, &neighbor_params, &mbmi->wm_params[0])) { + continue; + } + } + + // Refine motion vector. The final choice of MV and warp model + // are stored directly into `mbmi` + av1_refine_mv_for_warp_extend(cm, xd, &ms_params, neighbor_is_above, + bsize, &neighbor_params, + cpi->sf.mv_sf.warp_search_method, + cpi->sf.mv_sf.warp_search_iters); + + // If we changed the MV, update costs + if (mv0.as_int != mbmi->mv[0].as_int) { + // Keep the refined MV and WM parameters. +#if CONFIG_FLEX_MVRES + tmp_rate_mv = av1_mv_bit_cost( + &mbmi->mv[0].as_mv, &ref_mv.as_mv, mbmi->pb_mv_precision, + &x->mv_costs, MV_COST_WEIGHT +#if CONFIG_ADAPTIVE_MVD + , + ms_params.mv_cost_params.is_adaptive_mvd +#endif + ); +#else tmp_rate_mv = av1_mv_bit_cost( &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost, x->mv_costs.mv_cost_stack, MV_COST_WEIGHT); #endif - tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; - } else { - // Restore the old MV and WM parameters. - mbmi->mv[0] = mv0; - mbmi->wm_params[0] = wm_params0; + tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; + } else { + // Restore the old MV and WM parameters. + mbmi->mv[0] = mv0; + mbmi->wm_params[0] = wm_params0; + } } - } #if CONFIG_C071_SUBBLK_WARPMV - assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row, - mi_col); + assign_warpmv(cm, xd->submi, bsize, &mbmi->wm_params[0], mi_row, + mi_col); #endif // CONFIG_C071_SUBBLK_WARPMV // Build the warped predictor - av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, - av1_num_planes(cm) - 1); + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0, + av1_num_planes(cm) - 1); #endif // CONFIG_EXTENDED_WARP_PREDICTION - } + } - // If we are searching newmv and the mv is the same as refmv, skip the - // current mode - if (!av1_check_newmv_joint_nonzero(cm, x)) continue; + // If we are searching newmv and the mv is the same as refmv, skip + // the current mode + if (!av1_check_newmv_joint_nonzero(cm, x)) continue; - // Update rd_stats for the current motion mode - txfm_info->skip_txfm = 0; - rd_stats->dist = 0; - rd_stats->sse = 0; - rd_stats->skip_txfm = 1; - rd_stats->rate = tmp_rate2; - const ModeCosts *mode_costs = &x->mode_costs; - if (!is_warp_mode(mbmi->motion_mode)) rd_stats->rate += switchable_rate; + // Update rd_stats for the current motion mode + txfm_info->skip_txfm = 0; + rd_stats->dist = 0; + rd_stats->sse = 0; + rd_stats->skip_txfm = 1; + rd_stats->rate = tmp_rate2; + const ModeCosts *mode_costs = &x->mode_costs; + if (!is_warp_mode(mbmi->motion_mode)) rd_stats->rate += switchable_rate; #if CONFIG_BAWP - if (cm->features.enable_bawp && av1_allow_bawp(mbmi, mi_row, mi_col)) - rd_stats->rate += mode_costs->bawp_flg_cost[mbmi->bawp_flag == 1]; + if (cm->features.enable_bawp && av1_allow_bawp(mbmi, mi_row, mi_col)) + rd_stats->rate += mode_costs->bawp_flg_cost[mbmi->bawp_flag == 1]; #endif #if CONFIG_EXTENDED_WARP_PREDICTION - MOTION_MODE motion_mode = mbmi->motion_mode; + MOTION_MODE motion_mode = mbmi->motion_mode; #if CONFIG_WARPMV - bool continue_motion_mode_signaling = (mbmi->mode != WARPMV); + bool continue_motion_mode_signaling = (mbmi->mode != WARPMV); #else bool continue_motion_mode_signaling = true; #endif // CONFIG_WARPMV - if (continue_motion_mode_signaling && - allowed_motion_modes & (1 << INTERINTRA)) { - rd_stats->rate += - mode_costs->interintra_cost[size_group_lookup[bsize]] - [motion_mode == INTERINTRA]; - if (motion_mode == INTERINTRA) { - // Note(rachelbarker): Costs for other interintra-related signaling - // are already accounted for by `av1_handle_inter_intra_mode` - continue_motion_mode_signaling = false; - } - } - - if (continue_motion_mode_signaling && - allowed_motion_modes & (1 << OBMC_CAUSAL)) { - rd_stats->rate += - mode_costs->obmc_cost[bsize][motion_mode == OBMC_CAUSAL]; - if (motion_mode == OBMC_CAUSAL) { - continue_motion_mode_signaling = false; - } - } - - if (continue_motion_mode_signaling && - allowed_motion_modes & (1 << WARP_EXTEND)) { - const int ctx1 = av1_get_warp_extend_ctx1(xd, mbmi); - const int ctx2 = av1_get_warp_extend_ctx2(xd, mbmi); - rd_stats->rate += - mode_costs - ->warp_extend_cost[ctx1][ctx2][motion_mode == WARP_EXTEND]; - if (motion_mode == WARP_EXTEND) { - continue_motion_mode_signaling = false; - } - } - - if (continue_motion_mode_signaling && - allowed_motion_modes & (1 << WARPED_CAUSAL)) { - rd_stats->rate += - mode_costs->warped_causal_cost[bsize][motion_mode == WARPED_CAUSAL]; - if (motion_mode == WARPED_CAUSAL) { - continue_motion_mode_signaling = false; - } - } - - if (continue_motion_mode_signaling && - allowed_motion_modes & (1 << WARP_DELTA)) { - rd_stats->rate += - mode_costs->warp_delta_cost[bsize][motion_mode == WARP_DELTA]; - } - -#if CONFIG_WARPMV - if (mbmi->mode == WARPMV) { - if (allowed_motion_modes & (1 << WARPED_CAUSAL)) { + if (continue_motion_mode_signaling && + allowed_motion_modes & (1 << INTERINTRA)) { rd_stats->rate += - mode_costs->warped_causal_warpmv_cost[bsize][motion_mode == - WARPED_CAUSAL]; - } else { - assert(motion_mode == WARP_DELTA); + mode_costs->interintra_cost[size_group_lookup[bsize]] + [motion_mode == INTERINTRA]; + if (motion_mode == INTERINTRA) { + // Note(rachelbarker): Costs for other interintra-related + // signaling are already accounted for by + // `av1_handle_inter_intra_mode` + continue_motion_mode_signaling = false; + } } - } + + if (continue_motion_mode_signaling && + allowed_motion_modes & (1 << OBMC_CAUSAL)) { + rd_stats->rate += + mode_costs->obmc_cost[bsize][motion_mode == OBMC_CAUSAL]; + if (motion_mode == OBMC_CAUSAL) { + continue_motion_mode_signaling = false; + } + } + + if (continue_motion_mode_signaling && + allowed_motion_modes & (1 << WARP_EXTEND)) { + const int ctx1 = av1_get_warp_extend_ctx1(xd, mbmi); + const int ctx2 = av1_get_warp_extend_ctx2(xd, mbmi); + rd_stats->rate += + mode_costs + ->warp_extend_cost[ctx1][ctx2][motion_mode == WARP_EXTEND]; + if (motion_mode == WARP_EXTEND) { + continue_motion_mode_signaling = false; + } + } + + if (continue_motion_mode_signaling && + allowed_motion_modes & (1 << WARPED_CAUSAL)) { + rd_stats->rate += + mode_costs + ->warped_causal_cost[bsize][motion_mode == WARPED_CAUSAL]; + if (motion_mode == WARPED_CAUSAL) { + continue_motion_mode_signaling = false; + } + } + + if (continue_motion_mode_signaling && + allowed_motion_modes & (1 << WARP_DELTA)) { + rd_stats->rate += + mode_costs->warp_delta_cost[bsize][motion_mode == WARP_DELTA]; + } + +#if CONFIG_WARPMV + if (mbmi->mode == WARPMV) { + if (allowed_motion_modes & (1 << WARPED_CAUSAL)) { + rd_stats->rate += + mode_costs->warped_causal_warpmv_cost[bsize][motion_mode != + WARP_DELTA]; + + } else { + assert(motion_mode == WARP_DELTA); + } +#if CONFIG_CWG_D067_IMPROVED_WARP + if (allow_warpmv_with_mvd_coding(cm, mbmi)) { + rd_stats->rate += + mode_costs + ->warpmv_with_mvd_flag_cost[bsize] + [mbmi->warpmv_with_mvd_flag]; + } +#endif // CONFIG_CWG_D067_IMPROVED_WARP + } #endif // CONFIG_WARPMV - if (motion_mode == WARP_DELTA + if (motion_mode == WARP_DELTA #if CONFIG_WARPMV - || (motion_mode == WARPED_CAUSAL && mbmi->mode == WARPMV) + || ((motion_mode == WARPED_CAUSAL) && mbmi->mode == WARPMV) #endif // CONFIG_WARPMV - ) { + ) { #if CONFIG_WARP_REF_LIST - rd_stats->rate += get_warp_ref_idx_cost(mbmi, x); + rd_stats->rate += get_warp_ref_idx_cost(mbmi, x); #endif // CONFIG_WARP_REF_LIST - rd_stats->rate += - av1_cost_warp_delta(cm, xd, mbmi, mbmi_ext, mode_costs); - // The following line is commented out to remove a spurious - // static analysis warning. Uncomment when adding a new motion mode - // continue_motion_mode_signaling = false; - } + rd_stats->rate += + av1_cost_warp_delta(cm, xd, mbmi, mbmi_ext, mode_costs); + // The following line is commented out to remove a spurious + // static analysis warning. Uncomment when adding a new motion + // mode continue_motion_mode_signaling = false; + } #else if (interintra_allowed) { rd_stats->rate += @@ -2390,45 +2696,57 @@ } #endif // CONFIG_EXTENDED_WARP_PREDICTION - if (!do_tx_search) { - // Avoid doing a transform search here to speed up the overall mode - // search. It will be done later in the mode search if the current - // motion mode seems promising. - int64_t curr_sse = -1; - int64_t sse_y = -1; - int est_residue_cost = 0; - int64_t est_dist = 0; - int64_t est_rd = 0; - if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) { - curr_sse = get_sse(cpi, x, &sse_y); - const int has_est_rd = get_est_rate_dist( - tile_data, bsize, curr_sse, &est_residue_cost, &est_dist); - (void)has_est_rd; - assert(has_est_rd); - } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2) { - model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD]( - cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, - &est_dist, NULL, &curr_sse, NULL, NULL, NULL); - sse_y = x->pred_sse[COMPACT_INDEX0_NRS(xd->mi[0]->ref_frame[0])]; - } - est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist); - if (est_rd * 0.80 > *best_est_rd) { - mbmi->ref_frame[1] = ref_frame_1; - continue; - } - const int mode_rate = rd_stats->rate; - rd_stats->rate += est_residue_cost; - rd_stats->dist = est_dist; - rd_stats->rdcost = est_rd; - if (rd_stats->rdcost < *best_est_rd) { - *best_est_rd = rd_stats->rdcost; - assert(sse_y >= 0); - ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level - ? RDCOST(x->rdmult, mode_rate, (sse_y << 4)) - : INT64_MAX; - } - if (cm->current_frame.reference_mode == SINGLE_REFERENCE) { - if (!is_comp_pred) { + if (!do_tx_search) { + // Avoid doing a transform search here to speed up the overall + // mode search. It will be done later in the mode search if the + // current motion mode seems promising. + int64_t curr_sse = -1; + int64_t sse_y = -1; + int est_residue_cost = 0; + int64_t est_dist = 0; + int64_t est_rd = 0; + if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) { + curr_sse = get_sse(cpi, x, &sse_y); + const int has_est_rd = get_est_rate_dist( + tile_data, bsize, curr_sse, &est_residue_cost, &est_dist); + (void)has_est_rd; + assert(has_est_rd); + } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2) { + model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD]( + cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, + &est_dist, NULL, &curr_sse, NULL, NULL, NULL); + sse_y = x->pred_sse[COMPACT_INDEX0_NRS(xd->mi[0]->ref_frame[0])]; + } + est_rd = + RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist); + if (est_rd * 0.80 > *best_est_rd) { + mbmi->ref_frame[1] = ref_frame_1; + continue; + } + const int mode_rate = rd_stats->rate; + rd_stats->rate += est_residue_cost; + rd_stats->dist = est_dist; + rd_stats->rdcost = est_rd; + if (rd_stats->rdcost < *best_est_rd) { + *best_est_rd = rd_stats->rdcost; + assert(sse_y >= 0); + ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level + ? RDCOST(x->rdmult, mode_rate, (sse_y << 4)) + : INT64_MAX; + } + if (cm->current_frame.reference_mode == SINGLE_REFERENCE) { + if (!is_comp_pred) { + assert(curr_sse >= 0); + inter_modes_info_push(inter_modes_info, mode_rate, curr_sse, + rd_stats->rdcost, rd_stats, rd_stats_y, + rd_stats_uv, mbmi +#if CONFIG_C071_SUBBLK_WARPMV + , + xd, cm +#endif // CONFIG_C071_SUBBLK_WARPMV + ); + } + } else { assert(curr_sse >= 0); inter_modes_info_push(inter_modes_info, mode_rate, curr_sse, rd_stats->rdcost, rd_stats, rd_stats_y, @@ -2439,95 +2757,95 @@ #endif // CONFIG_C071_SUBBLK_WARPMV ); } + mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0; } else { - assert(curr_sse >= 0); - inter_modes_info_push(inter_modes_info, mode_rate, curr_sse, - rd_stats->rdcost, rd_stats, rd_stats_y, - rd_stats_uv, mbmi -#if CONFIG_C071_SUBBLK_WARPMV - , - xd, cm -#endif // CONFIG_C071_SUBBLK_WARPMV - ); - } - mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0; - } else { - // Perform full transform search - int64_t skip_rd = INT64_MAX; - int64_t skip_rdy = INT64_MAX; - if (cpi->sf.inter_sf.txfm_rd_gate_level) { - // Check if the mode is good enough based on skip RD - int64_t sse_y = INT64_MAX; - int64_t curr_sse = get_sse(cpi, x, &sse_y); - skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse); - skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4)); - int eval_txfm = - check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd, - cpi->sf.inter_sf.txfm_rd_gate_level, 0); - if (!eval_txfm) continue; - } - - // Do transform search - if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, - rd_stats->rate, ref_best_rd)) { - if (rd_stats_y->rate == INT_MAX && mode_index == 0) { - return INT64_MAX; + // Perform full transform search + int64_t skip_rd = INT64_MAX; + int64_t skip_rdy = INT64_MAX; + if (cpi->sf.inter_sf.txfm_rd_gate_level) { + // Check if the mode is good enough based on skip RD + int64_t sse_y = INT64_MAX; + int64_t curr_sse = get_sse(cpi, x, &sse_y); + skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse); + skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4)); + int eval_txfm = + check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd, + cpi->sf.inter_sf.txfm_rd_gate_level, 0); + if (!eval_txfm) continue; } - continue; + + // Do transform search + if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, + rd_stats->rate, ref_best_rd)) { + if (rd_stats_y->rate == INT_MAX && mode_index == 0) { + return INT64_MAX; + } + continue; + } + const int64_t curr_rd = + RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); + + if (curr_rd < ref_best_rd) { + ref_best_rd = curr_rd; + ref_skip_rd[0] = skip_rd; + ref_skip_rd[1] = skip_rdy; + } + if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) { + const int skip_ctx = av1_get_skip_txfm_context(xd); + inter_mode_data_push( + tile_data, mbmi->sb_type[PLANE_TYPE_Y], rd_stats->sse, + rd_stats->dist, + rd_stats_y->rate + rd_stats_uv->rate + + mode_costs->skip_txfm_cost[skip_ctx] + [mbmi->skip_txfm[xd->tree_type == + CHROMA_PART]]); + } } - const int64_t curr_rd = + + if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) { + if (is_nontrans_global_motion(xd, xd->mi[0])) { + mbmi->interp_fltr = av1_unswitchable_filter(interp_filter); + } + } + + const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - if (curr_rd < ref_best_rd) { - ref_best_rd = curr_rd; - ref_skip_rd[0] = skip_rd; - ref_skip_rd[1] = skip_rdy; + if (num_rd_check == 0) { +#if CONFIG_SEP_COMP_DRL + args->simple_rd[this_mode][get_ref_mv_idx(mbmi, 0)] +#else + args->simple_rd[this_mode][mbmi->ref_mv_idx] +#endif + [COMPACT_INDEX0_NRS(mbmi->ref_frame[0])] = tmp_rd; } - if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) { - const int skip_ctx = av1_get_skip_txfm_context(xd); - inter_mode_data_push( - tile_data, mbmi->sb_type[PLANE_TYPE_Y], rd_stats->sse, - rd_stats->dist, - rd_stats_y->rate + rd_stats_uv->rate + - mode_costs->skip_txfm_cost[skip_ctx] - [mbmi->skip_txfm[xd->tree_type == - CHROMA_PART]]); - } - } - if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) { - if (is_nontrans_global_motion(xd, xd->mi[0])) { - mbmi->interp_fltr = av1_unswitchable_filter(interp_filter); - } - } - - const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - if (mode_index == 0) { - args->simple_rd[this_mode][mbmi->ref_mv_idx] - [COMPACT_INDEX0_NRS(mbmi->ref_frame[0])] = tmp_rd; - } - if (mode_index == 0 || tmp_rd < best_rd) { - // Update best_rd data if this is the best motion mode so far - best_mbmi = *mbmi; + if (num_rd_check == 0 || tmp_rd < best_rd) { + // Update best_rd data if this is the best motion mode so far + best_mbmi = *mbmi; #if CONFIG_C071_SUBBLK_WARPMV - if (is_warp_mode(mbmi->motion_mode)) { - store_submi(xd, cm, best_submi, bsize); - } + if (is_warp_mode(mbmi->motion_mode)) { + store_submi(xd, cm, best_submi, bsize); + } #endif // CONFIG_C071_SUBBLK_WARPMV - best_rd = tmp_rd; - best_rd_stats = *rd_stats; - best_rd_stats_y = *rd_stats_y; - best_rate_mv = tmp_rate_mv; - if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv; - memcpy(best_blk_skip, txfm_info->blk_skip, - sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width); - av1_copy_array(best_tx_type_map, xd->tx_type_map, - xd->height * xd->width); + best_rd = tmp_rd; + best_rd_stats = *rd_stats; + best_rd_stats_y = *rd_stats_y; + best_rate_mv = tmp_rate_mv; + if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv; + memcpy(best_blk_skip, txfm_info->blk_skip, + sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width); + av1_copy_array(best_tx_type_map, xd->tx_type_map, + xd->height * xd->width); #if CONFIG_CROSS_CHROMA_TX - av1_copy_array(best_cctx_type_map, xd->cctx_type_map, - xd->height * xd->width); + av1_copy_array(best_cctx_type_map, xd->cctx_type_map, + xd->height * xd->width); #endif // CONFIG_CROSS_CHROMA_TX - best_xskip_txfm = mbmi->skip_txfm[xd->tree_type == CHROMA_PART]; + best_xskip_txfm = mbmi->skip_txfm[xd->tree_type == CHROMA_PART]; + } + num_rd_check++; +#if CONFIG_CWG_D067_IMPROVED_WARP } +#endif // CONFIG_CWG_D067_IMPROVED_WARP #if CONFIG_WARP_REF_LIST } #endif // CONFIG_WARP_REF_LIST @@ -2605,8 +2923,17 @@ static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext, int ref_idx, const MV_REFERENCE_FRAME *ref_frame, +#if CONFIG_SEP_COMP_DRL + PREDICTION_MODE this_mode, +#endif PREDICTION_MODE single_mode) { +#if CONFIG_SEP_COMP_DRL + const int8_t ref_frame_type = has_second_drl_by_mode(this_mode, ref_frame) + ? ref_frame[ref_idx] + : av1_ref_frame_type(ref_frame); +#else const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame); +#endif #if CONFIG_TIP if (is_tip_ref_frame(ref_frame_type)) return 0; #endif // CONFIG_TIP @@ -2632,7 +2959,11 @@ for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) { int_mv this_mv; +#if CONFIG_SEP_COMP_DRL + if (ref_idx == 0 || has_second_drl_by_mode(this_mode, ref_frame)) +#else if (ref_idx == 0) +#endif this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; else this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv; @@ -2654,7 +2985,12 @@ this_mv->as_int = INVALID_MV; } else if (single_mode == GLOBALMV) { if (skip_repeated_ref_mv && +#if CONFIG_SEP_COMP_DRL + check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, this_mode, + single_mode)) +#else check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode)) +#endif return 0; *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]]; } @@ -2668,7 +3004,13 @@ else { assert(single_mode == NEARMV); const int ref_mv_offset = ref_mv_idx; +#if CONFIG_SEP_COMP_DRL + const int8_t ref_frame_type = has_second_drl_by_mode(this_mode, ref_frame) + ? ref_frame[ref_idx] + : av1_ref_frame_type(ref_frame); +#else const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); +#endif if (ref_frame_type > NONE_FRAME && ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) { assert(ref_mv_offset >= 0); @@ -2677,11 +3019,21 @@ mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv; } else { *this_mv = - mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv; +#if CONFIG_SEP_COMP_DRL + has_second_drl_by_mode(this_mode, ref_frame) + ? mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv + : +#endif + mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv; } } else { if (skip_repeated_ref_mv && +#if CONFIG_SEP_COMP_DRL + check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, this_mode, + single_mode)) +#else check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode)) +#endif return 0; #if CONFIG_TIP if (is_tip_ref_frame(ref_frame_type)) { @@ -2705,14 +3057,24 @@ const MB_MODE_INFO *mbmi = xd->mi[0]; const int is_comp_pred = has_second_ref(mbmi); -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (mbmi->skip_mode) { int ret = 1; +#if CONFIG_SEP_COMP_DRL + assert(get_ref_mv_idx(mbmi, 0) < xd->skip_mvp_candidate_list.ref_mv_count); + assert(get_ref_mv_idx(mbmi, 1) == get_ref_mv_idx(mbmi, 0)); +#else assert(mbmi->ref_mv_idx < xd->skip_mvp_candidate_list.ref_mv_count); +#endif int_mv this_mv; this_mv.as_int = INVALID_MV; this_mv = +#if CONFIG_SEP_COMP_DRL + xd->skip_mvp_candidate_list.ref_mv_stack[get_ref_mv_idx(mbmi, 0)] + .this_mv; +#else xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].this_mv; +#endif cur_mv[0] = this_mv; #if !CONFIG_C071_SUBBLK_WARPMV @@ -2726,7 +3088,12 @@ ret &= clamp_and_check_mv(cur_mv, this_mv, cm, x); this_mv = +#if CONFIG_SEP_COMP_DRL + xd->skip_mvp_candidate_list.ref_mv_stack[get_ref_mv_idx(mbmi, 1)] + .comp_mv; +#else xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].comp_mv; +#endif cur_mv[1] = this_mv; #if !CONFIG_C071_SUBBLK_WARPMV #if CONFIG_FLEX_MVRES @@ -2740,23 +3107,39 @@ return ret; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT int ret = 1; for (int i = 0; i < is_comp_pred + 1; ++i) { int_mv this_mv; this_mv.as_int = INVALID_MV; +#if CONFIG_SEP_COMP_DRL + int ref_mv_idx = get_ref_mv_idx(mbmi, i); + ret = get_this_mv(&this_mv, this_mode, i, ref_mv_idx, +#else ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx, +#endif skip_repeated_ref_mv, mbmi->ref_frame, x->mbmi_ext); if (!ret) return 0; const PREDICTION_MODE single_mode = get_single_mode(this_mode, i); if (single_mode == NEWMV) { const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); +#if CONFIG_SEP_COMP_DRL + if (has_second_drl(mbmi)) + cur_mv[i] = + x->mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]][ref_mv_idx].this_mv; + else + cur_mv[i] = + (i == 0) + ? x->mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv + : x->mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv; +#else cur_mv[i] = (i == 0) ? x->mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx] .this_mv : x->mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx] .comp_mv; +#endif } else { ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x); } @@ -2774,13 +3157,38 @@ #if IMPROVED_AMVD if (mbmi->mode == AMVDNEWMV) max_drl_bits = AOMMIN(max_drl_bits, 1); #endif // IMPROVED_AMVD +#if CONFIG_SEP_COMP_DRL + assert(get_ref_mv_idx(mbmi, 0) < max_drl_bits + 1); + assert(get_ref_mv_idx(mbmi, 1) < max_drl_bits + 1); +#else assert(mbmi->ref_mv_idx < max_drl_bits + 1); +#endif if (!have_drl_index(mbmi->mode)) { return 0; } int16_t mode_ctx_pristine = av1_mode_context_pristine(mbmi_ext->mode_context, mbmi->ref_frame); int cost = 0; +#if CONFIG_SEP_COMP_DRL + for (int ref_idx = 0; ref_idx < 1 + has_second_drl(mbmi); ref_idx++) { + for (int idx = 0; idx < max_drl_bits; ++idx) { + int drl_ctx = av1_drl_ctx(mode_ctx_pristine); + int ref_mv_idx = get_ref_mv_idx(mbmi, ref_idx); + switch (idx) { + case 0: + cost += x->mode_costs.drl_mode_cost[0][drl_ctx][ref_mv_idx != idx]; + break; + case 1: + cost += x->mode_costs.drl_mode_cost[1][drl_ctx][ref_mv_idx != idx]; + break; + default: + cost += x->mode_costs.drl_mode_cost[2][drl_ctx][ref_mv_idx != idx]; + break; + } + if (ref_mv_idx == idx) break; + } + } +#else for (int idx = 0; idx < max_drl_bits; ++idx) { int drl_ctx = av1_drl_ctx(mode_ctx_pristine); switch (idx) { @@ -2799,18 +3207,40 @@ } if (mbmi->ref_mv_idx == idx) return cost; } +#endif return cost; } -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT static INLINE int get_skip_drl_cost(int max_drl_bits, const MB_MODE_INFO *mbmi, const MACROBLOCK *x) { +#if CONFIG_SEP_COMP_DRL + assert(get_ref_mv_idx(mbmi, 0) < max_drl_bits + 1); +#else assert(mbmi->ref_mv_idx < max_drl_bits + 1); +#endif assert(mbmi->skip_mode); if (!have_drl_index(mbmi->mode)) { return 0; } int cost = 0; +#if CONFIG_SEP_COMP_DRL + int ref_mv_idx = get_ref_mv_idx(mbmi, 0); + for (int idx = 0; idx < max_drl_bits; ++idx) { + switch (idx) { + case 0: + cost += x->mode_costs.skip_drl_mode_cost[0][ref_mv_idx != idx]; + break; + case 1: + cost += x->mode_costs.skip_drl_mode_cost[1][ref_mv_idx != idx]; + break; + default: + cost += x->mode_costs.skip_drl_mode_cost[2][ref_mv_idx != idx]; + break; + } + if (ref_mv_idx == idx) return cost; + } +#else for (int idx = 0; idx < max_drl_bits; ++idx) { switch (idx) { case 0: @@ -2825,9 +3255,10 @@ } if (mbmi->ref_mv_idx == idx) return cost; } +#endif return cost; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args, const MB_MODE_INFO *const mbmi, @@ -2837,8 +3268,13 @@ const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx]; if (single_mode == NEWMV && #if CONFIG_FLEX_MVRES +#if CONFIG_SEP_COMP_DRL + args->single_newmv_valid[mbmi->pb_mv_precision] + [get_ref_mv_idx(mbmi, ref_idx)][ref] == 0) { +#else args->single_newmv_valid[mbmi->pb_mv_precision][mbmi->ref_mv_idx] [ref] == 0) { +#endif #else args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) { #endif @@ -2850,7 +3286,12 @@ static int get_drl_refmv_count(int max_drl_bits, const MACROBLOCK *const x, const MV_REFERENCE_FRAME *ref_frame, - PREDICTION_MODE mode) { + PREDICTION_MODE mode +#if CONFIG_SEP_COMP_DRL + , + int ref_idx +#endif +) { MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; int has_drl = have_drl_index(mode); if (!has_drl) { @@ -2861,17 +3302,25 @@ #endif // CONFIG_WARPMV return 1; } +#if CONFIG_SEP_COMP_DRL + MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; + if (has_second_drl(mbmi)) { + return AOMMIN(max_drl_bits + 1, mbmi_ext->ref_mv_count[ref_frame[ref_idx]]); + } +#endif + const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); + int ref_mv_count = ref_frame_type > NONE_FRAME ? mbmi_ext->ref_mv_count[ref_frame_type] : 0; #if IMPROVED_AMVD if (mode == AMVDNEWMV) ref_mv_count = AOMMIN(ref_mv_count, 2); #endif // IMPROVED_AMVD -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (x->e_mbd.mi[0]->skip_mode) ref_mv_count = mbmi_ext->skip_mvp_candidate_list.ref_mv_count; -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT return AOMMIN(max_drl_bits + 1, ref_mv_count); } @@ -2882,7 +3331,11 @@ const AV1_COMP *const cpi, const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x, const HandleInterModeArgs *const args, int64_t ref_best_rd, +#if CONFIG_SEP_COMP_DRL + int *ref_mv_idx) { +#else int ref_mv_idx) { +#endif (void)ref_frame_dist_info; const AV1_COMMON *const cm = &cpi->common; const SPEED_FEATURES *const sf = &cpi->sf; @@ -2891,7 +3344,12 @@ const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); const int is_comp_pred = has_second_ref(mbmi); +#if CONFIG_SEP_COMP_DRL + if (sf->inter_sf.reduce_inter_modes && + (ref_mv_idx[0] > 0 || ref_mv_idx[1] > 0)) { +#else if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) { +#endif // NOTE: This section changes the stats. int ranks[2][2], dir[2] = { -1, -1 }; if (mbmi->ref_frame[0] != INTRA_FRAME) @@ -2900,9 +3358,21 @@ dir[1] = get_dir_rank(cm, mbmi->ref_frame[1], ranks[1]); if ((dir[0] != -1 && ranks[0][dir[0]] > 3) || (dir[1] != -1 && ranks[1][dir[1]] > 2)) { +#if CONFIG_SEP_COMP_DRL //????????? to be updated + if (has_second_drl(mbmi)) { + if (mbmi_ext->weight[mbmi->ref_frame[0]][ref_mv_idx[0]] < + REF_CAT_LEVEL && + mbmi_ext->weight[mbmi->ref_frame[1]][ref_mv_idx[1]] < REF_CAT_LEVEL) + return true; + } else { + if (mbmi_ext->weight[ref_frame_type][ref_mv_idx[0]] < REF_CAT_LEVEL) + return true; + } +#else if (mbmi_ext->weight[ref_frame_type][ref_mv_idx] < REF_CAT_LEVEL) { return true; } +#endif } // TODO(any): Experiment with reduce_inter_modes for compound prediction if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred && @@ -2911,14 +3381,32 @@ mbmi->ref_frame[0] != cm->ref_frames_info.future_refs[0]) && (cm->ref_frames_info.num_past_refs == 0 || mbmi->ref_frame[0] != cm->ref_frames_info.past_refs[0])) { +#if CONFIG_SEP_COMP_DRL //????????? to be updated + if (has_second_drl(mbmi)) { + if (mbmi_ext->weight[mbmi->ref_frame[0]][ref_mv_idx[0]] < + REF_CAT_LEVEL && + mbmi_ext->weight[mbmi->ref_frame[1]][ref_mv_idx[1]] < + REF_CAT_LEVEL) + return true; + } else { + if (mbmi_ext->weight[ref_frame_type][ref_mv_idx[0]] < REF_CAT_LEVEL) + return true; + } +#else if (mbmi_ext->weight[ref_frame_type][ref_mv_idx] < REF_CAT_LEVEL) { return true; } +#endif } } } +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = ref_mv_idx[0]; + mbmi->ref_mv_idx[1] = ref_mv_idx[1]; +#else mbmi->ref_mv_idx = ref_mv_idx; +#endif if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) { return true; } @@ -2936,7 +3424,11 @@ static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats, HandleInterModeArgs *args, +#if CONFIG_SEP_COMP_DRL + int *ref_mv_idx, +#else int ref_mv_idx, +#endif inter_mode_info *mode_info, int64_t ref_best_rd, BLOCK_SIZE bsize #if CONFIG_FLEX_MVRES @@ -2968,16 +3460,27 @@ mbmi->num_proj_ref = 0; mbmi->motion_mode = SIMPLE_TRANSLATION; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = ref_mv_idx[0]; + mbmi->ref_mv_idx[1] = ref_mv_idx[1]; + int ref_mv_idx_type = av1_ref_mv_idx_type(mbmi, ref_mv_idx); +#else mbmi->ref_mv_idx = ref_mv_idx; +#endif rd_stats->rate += args->ref_frame_cost + args->single_comp_cost; #if CONFIG_FLEX_MVRES rd_stats->rate += flex_mv_cost; #endif + const int drl_cost = get_drl_cost(cpi->common.features.max_drl_bits, mbmi, mbmi_ext, x); rd_stats->rate += drl_cost; +#if CONFIG_SEP_COMP_DRL + mode_info[ref_mv_idx_type].drl_cost = drl_cost; +#else mode_info[ref_mv_idx].drl_cost = drl_cost; +#endif int_mv cur_mv[2]; if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) { @@ -3018,6 +3521,9 @@ } mbmi->motion_mode = SIMPLE_TRANSLATION; +#if CONFIG_CWP + mbmi->cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP mbmi->num_proj_ref = 0; if (is_comp_pred) { // Only compound_average @@ -3032,6 +3538,7 @@ const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, AOM_PLANE_Y, AOM_PLANE_Y); int est_rate; @@ -3060,36 +3567,74 @@ if (is_pb_mv_precision_active(cm, mbmi, bsize) && (mbmi->pb_mv_precision < mbmi->max_mv_precision) && +#if CONFIG_SEP_COMP_DRL + (mbmi->ref_mv_idx[0] > 0 || mbmi->ref_mv_idx[1] > 0)) { +#else mbmi->ref_mv_idx > 0) { +#endif const int is_comp_pred = has_second_ref(mbmi); const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); int_mv this_refmv[2]; this_refmv[0].as_int = 0; this_refmv[1].as_int = 0; for (int i = 0; i < is_comp_pred + 1; ++i) { +#if CONFIG_SEP_COMP_DRL + if (has_second_drl(mbmi)) + this_refmv[i] = + mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]][mbmi->ref_mv_idx[i]] + .this_mv; + else + this_refmv[i] = + (i == 0) + ? mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx[0]] + .this_mv + : mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx[0]] + .comp_mv; +#else this_refmv[i] = (i == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx].this_mv : mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx] .comp_mv; +#endif #if CONFIG_C071_SUBBLK_WARPMV if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL) #endif // CONFIG_C071_SUBBLK_WARPMV lower_mv_precision(&this_refmv[i].as_mv, mbmi->pb_mv_precision); } +#if CONFIG_SEP_COMP_DRL + const uint8_t ref_mv_idx_type = av1_ref_mv_idx_type(mbmi, mbmi->ref_mv_idx); + for (int prev_ref_mv_idx = 0; prev_ref_mv_idx < ref_mv_idx_type; + prev_ref_mv_idx++) { +#else for (int prev_ref_mv_idx = 0; prev_ref_mv_idx < mbmi->ref_mv_idx; prev_ref_mv_idx++) { +#endif int_mv prev_refmv[2]; prev_refmv[0].as_int = INVALID_MV; prev_refmv[1].as_int = INVALID_MV; for (int i = 0; i < is_comp_pred + 1; ++i) { +#if CONFIG_SEP_COMP_DRL + if (has_second_drl(mbmi)) { + int temp_idx[2]; + av1_set_ref_mv_idx(temp_idx, prev_ref_mv_idx); + prev_refmv[i] = + mbmi_ext->ref_mv_stack[mbmi->ref_frame[i]][temp_idx[i]].this_mv; + } else + prev_refmv[i] = + (i == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][prev_ref_mv_idx] + .this_mv + : mbmi_ext->ref_mv_stack[ref_frame_type][prev_ref_mv_idx] + .comp_mv; +#else prev_refmv[i] = (i == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][prev_ref_mv_idx] .this_mv : mbmi_ext->ref_mv_stack[ref_frame_type][prev_ref_mv_idx] .comp_mv; +#endif #if CONFIG_C071_SUBBLK_WARPMV if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL) #endif // CONFIG_C071_SUBBLK_WARPMV @@ -3115,7 +3660,12 @@ RD_STATS *rd_stats, HandleInterModeArgs *const args, int64_t ref_best_rd, inter_mode_info *mode_info, - BLOCK_SIZE bsize, const int ref_set + BLOCK_SIZE bsize, +#if CONFIG_SEP_COMP_DRL + const int *ref_set +#else + const int ref_set +#endif #if CONFIG_FLEX_MVRES , const int flex_mv_cost @@ -3133,22 +3683,36 @@ // Only search indices if they have some chance of being good. int good_indices = 0; +#if CONFIG_SEP_COMP_DRL + int ref_mv_idx[2]; + for (ref_mv_idx[1] = 0; ref_mv_idx[1] < ref_set[1]; ++ref_mv_idx[1]) { + for (ref_mv_idx[0] = 0; ref_mv_idx[0] < ref_set[0]; ++ref_mv_idx[0]) { + int i = av1_ref_mv_idx_type(mbmi, ref_mv_idx); + if (ref_mv_idx_early_breakout(cpi, &cpi->ref_frame_dist_info, x, args, + ref_best_rd, ref_mv_idx)) { + continue; + } +#else for (int i = 0; i < ref_set; ++i) { if (ref_mv_idx_early_breakout(cpi, &cpi->ref_frame_dist_info, x, args, ref_best_rd, i)) { continue; } - mask_set_bit(&good_indices, i); +#endif + mask_set_bit(&good_indices, i); + } +#if CONFIG_SEP_COMP_DRL } +#endif // Always have at least one motion vector searched. if (!good_indices) { good_indices = 0x1; } - // Only prune in NEARMV mode, if the speed feature is set, and the block size - // is large enough. If these conditions are not met, return all good indices - // found so far. + // Only prune in NEARMV mode, if the speed feature is set, and the block + // size is large enough. If these conditions are not met, return all good + // indices found so far. if (!cpi->sf.inter_sf.prune_mode_search_simple_translation) return good_indices; if (!have_nearmv_in_inter_mode(this_mode)) return good_indices; @@ -3162,6 +3726,29 @@ } // Calculate the RD cost for the motion vectors using simple translation. +#if CONFIG_SEP_COMP_DRL + int64_t idx_rdcost[MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH]; + for (int i = 0; i < MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH; i++) + idx_rdcost[i] = INT64_MAX; + + for (ref_mv_idx[1] = 0; ref_mv_idx[1] < ref_set[1]; ++ref_mv_idx[1]) { + for (ref_mv_idx[0] = 0; ref_mv_idx[0] < ref_set[0]; ++ref_mv_idx[0]) { + int i = av1_ref_mv_idx_type(mbmi, ref_mv_idx); + + // If this index is bad, ignore it. + if (!mask_check_bit(good_indices, i)) { + continue; + } + idx_rdcost[i] = simple_translation_pred_rd( + cpi, x, rd_stats, args, ref_mv_idx, mode_info, ref_best_rd, bsize +#if CONFIG_FLEX_MVRES + , + flex_mv_cost +#endif + ); + } + } +#else int64_t idx_rdcost[MAX_REF_MV_SEARCH]; for (int i = 0; i < MAX_REF_MV_SEARCH; i++) idx_rdcost[i] = INT64_MAX; for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) { @@ -3177,6 +3764,7 @@ #endif ); } +#endif // Find the index with the best RD cost. int best_idx = 0; // Find the 2nd best motion vector and search motion vectors within a @@ -3203,13 +3791,22 @@ // best RD, skip it. Note that the cutoff is derived experimentally. const double ref_dth = 5; int result = 0; +#if CONFIG_SEP_COMP_DRL + for (ref_mv_idx[1] = 0; ref_mv_idx[1] < ref_set[1]; ++ref_mv_idx[1]) { + for (ref_mv_idx[0] = 0; ref_mv_idx[0] < ref_set[0]; ++ref_mv_idx[0]) { + int i = av1_ref_mv_idx_type(mbmi, ref_mv_idx); +#else for (int i = 0; i < ref_set; ++i) { - if (mask_check_bit(good_indices, i) && - (1.0 * idx_rdcost[i]) < idx_rdcost[best_idx] * dth && - (1.0 * idx_rdcost[i]) < ref_best_rd * ref_dth) { - mask_set_bit(&result, i); +#endif + if (mask_check_bit(good_indices, i) && + (1.0 * idx_rdcost[i]) < idx_rdcost[best_idx] * dth && + (1.0 * idx_rdcost[i]) < ref_best_rd * ref_dth) { + mask_set_bit(&result, i); + } } +#if CONFIG_SEP_COMP_DRL } +#endif return result; } @@ -3408,8 +4005,8 @@ // Thresholds used for pruning: // Lower value indicates aggressive pruning and higher value indicates // conservative pruning which is set based on ref_mv_idx and speed feature. - // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index - // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV + // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. + // prune_index 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV static const int tpl_inter_mode_prune_mul_factor[2][MAX_REF_MV_SEARCH + 1] = { { 3, 3, 3, 2, 2, 2, 2, 2 }, { 3, 2, 2, 2, 2, 2, 2, 2 } }; @@ -3458,9 +4055,18 @@ RD_STATS *best_rd_stats_uv, inter_mode_info *mode_info, HandleInterModeArgs *args, int drl_cost, const MV_REFERENCE_FRAME *refs, int_mv *cur_mv, int64_t *best_rd, const BUFFER_SET orig_dst, +#if CONFIG_SEP_COMP_DRL + int ref_mv_idx[2]) { +#else int ref_mv_idx) { +#endif // This feature only works for NEWMV when a previous mv has been searched +#if CONFIG_SEP_COMP_DRL + if (this_mode != NEWMV || (ref_mv_idx[0] == 0 && ref_mv_idx[1] == 0)) + return 0; +#else if (this_mode != NEWMV || ref_mv_idx == 0) return 0; +#endif MACROBLOCKD *xd = &x->e_mbd; const AV1_COMMON *cm = &cpi->common; const int num_planes = av1_num_planes(cm); @@ -3478,105 +4084,154 @@ int skip = 0; int this_rate_mv = 0; int i; +#if CONFIG_SEP_COMP_DRL + int ref_mv_idx_type = av1_ref_mv_idx_type(mbmi, ref_mv_idx); + int temp_mv_idx[2]; + for (temp_mv_idx[1] = 0; temp_mv_idx[1] <= ref_mv_idx[1]; ++temp_mv_idx[1]) { + for (temp_mv_idx[0] = 0; temp_mv_idx[0] <= ref_mv_idx[0]; + ++temp_mv_idx[0]) { + if (temp_mv_idx[0] == ref_mv_idx[0] && temp_mv_idx[1] == ref_mv_idx[1]) + continue; + i = av1_ref_mv_idx_type(mbmi, temp_mv_idx); +#else for (i = 0; i < ref_mv_idx; ++i) { - // Check if the motion search result same as previous results +#endif + // Check if the motion search result same as previous results #if CONFIG_FLEX_MVRES - if (cur_mv[0].as_int == - args->single_newmv[pb_mv_precision][i][refs[0]].as_int && - args->single_newmv_valid[pb_mv_precision][i][refs[0]]) { +#if CONFIG_SEP_COMP_DRL + if (cur_mv[0].as_int == + args->single_newmv[pb_mv_precision][temp_mv_idx[0]][refs[0]] + .as_int && + args->single_newmv_valid[pb_mv_precision][temp_mv_idx[0]][refs[0]]) { +#else + if (cur_mv[0].as_int == + args->single_newmv[pb_mv_precision][i][refs[0]].as_int && + args->single_newmv_valid[pb_mv_precision][i][refs[0]]) { +#endif #else if (cur_mv[0].as_int == args->single_newmv[i][refs[0]].as_int && args->single_newmv_valid[i][refs[0]]) { #endif - // If the compared mode has no valid rd, it is unlikely this - // mode will be the best mode - if (mode_info[i].rd == INT64_MAX) { - skip = 1; - break; - } - // Compare the cost difference including drl cost and mv cost - if (mode_info[i].mv.as_int != INVALID_MV) { - const int compare_cost = mode_info[i].rate_mv + mode_info[i].drl_cost; - const int_mv ref_mv = av1_get_ref_mv(x, 0); + // If the compared mode has no valid rd, it is unlikely this + // mode will be the best mode + if (mode_info[i].rd == INT64_MAX) { + skip = 1; + break; + } + // Compare the cost difference including drl cost and mv cost + if (mode_info[i].mv.as_int != INVALID_MV) { + const int compare_cost = mode_info[i].rate_mv + mode_info[i].drl_cost; + const int_mv ref_mv = av1_get_ref_mv(x, 0); #if CONFIG_FLEX_MVRES - // Check if this MV is within mv_limit - SubpelMvLimits mv_limits; - av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv, - pb_mv_precision); - if (!av1_is_subpelmv_in_range(&mv_limits, mode_info[i].mv.as_mv)) - continue; + // Check if this MV is within mv_limit + SubpelMvLimits mv_limits; + av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, + &ref_mv.as_mv, pb_mv_precision); + if (!av1_is_subpelmv_in_range(&mv_limits, mode_info[i].mv.as_mv)) + continue; - this_rate_mv = - av1_mv_bit_cost(&mode_info[i].mv.as_mv, &ref_mv.as_mv, - pb_mv_precision, &x->mv_costs, MV_COST_WEIGHT + this_rate_mv = + av1_mv_bit_cost(&mode_info[i].mv.as_mv, &ref_mv.as_mv, + pb_mv_precision, &x->mv_costs, MV_COST_WEIGHT #if CONFIG_ADAPTIVE_MVD - , - is_adaptive_mvd + , + is_adaptive_mvd #endif - ); + ); #else this_rate_mv = av1_mv_bit_cost( &mode_info[i].mv.as_mv, &ref_mv.as_mv, x->mv_costs.nmv_joint_cost, x->mv_costs.mv_cost_stack, MV_COST_WEIGHT); #endif - const int this_cost = this_rate_mv + drl_cost; + const int this_cost = this_rate_mv + drl_cost; - if (compare_cost <= this_cost) { - // Skip this mode if it is more expensive as the previous result - // for this MV - skip = 1; - break; - } else { - // If the cost is less than current best result, make this - // the best and update corresponding variables unless the - // best_mv is the same as ref_mv. In this case we skip and - // rely on NEAR(EST)MV instead + if (compare_cost <= this_cost) { + // Skip this mode if it is more expensive as the previous result + // for this MV + skip = 1; + break; + } else { + // If the cost is less than current best result, make this + // the best and update corresponding variables unless the + // best_mv is the same as ref_mv. In this case we skip and + // rely on NEAR(EST)MV instead +#if CONFIG_SEP_COMP_DRL + if (av1_ref_mv_idx_type(best_mbmi, best_mbmi->ref_mv_idx) == i && +#else if (best_mbmi->ref_mv_idx == i && - best_mbmi->mv[0].as_int != ref_mv.as_int +#endif + best_mbmi->mv[0].as_int != ref_mv.as_int #if CONFIG_FLEX_MVRES - && best_mbmi->pb_mv_precision == pb_mv_precision + && best_mbmi->pb_mv_precision == pb_mv_precision #endif #if CONFIG_BAWP - && best_mbmi->bawp_flag == bawp_flag + && best_mbmi->bawp_flag == bawp_flag #endif - ) { - assert(*best_rd != INT64_MAX); - assert(best_mbmi->mv[0].as_int == mode_info[i].mv.as_int); + ) { + assert(*best_rd != INT64_MAX); + assert(best_mbmi->mv[0].as_int == mode_info[i].mv.as_int); +#if CONFIG_SEP_COMP_DRL + best_mbmi->ref_mv_idx[0] = ref_mv_idx[0]; + best_mbmi->ref_mv_idx[1] = ref_mv_idx[1]; +#else best_mbmi->ref_mv_idx = ref_mv_idx; - motion_mode_cand->rate_mv = this_rate_mv; - best_rd_stats->rate += this_cost - compare_cost; - *best_rd = - RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist); - // We also need to update mode_info here because we are setting - // (ref_)best_rd here. So we will not be able to search the same - // mode again with the current configuration. +#endif + motion_mode_cand->rate_mv = this_rate_mv; + best_rd_stats->rate += this_cost - compare_cost; + *best_rd = + RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist); + // We also need to update mode_info here because we are setting + // (ref_)best_rd here. So we will not be able to search the same + // mode again with the current configuration. +#if CONFIG_SEP_COMP_DRL + mode_info[ref_mv_idx_type].mv.as_int = best_mbmi->mv[0].as_int; + mode_info[ref_mv_idx_type].rate_mv = this_rate_mv; + mode_info[ref_mv_idx_type].rd = *best_rd; +#else mode_info[ref_mv_idx].mv.as_int = best_mbmi->mv[0].as_int; mode_info[ref_mv_idx].rate_mv = this_rate_mv; mode_info[ref_mv_idx].rd = *best_rd; - if (*best_rd < *ref_best_rd) *ref_best_rd = *best_rd; - break; +#endif + if (*best_rd < *ref_best_rd) *ref_best_rd = *best_rd; + break; + } } } } } +#if CONFIG_SEP_COMP_DRL } +#endif if (skip) { // Collect mode stats for multiwinner mode processing store_winner_mode_stats( &cpi->common, x, best_mbmi, best_rd_stats, best_rd_stats_y, best_rd_stats_uv, refs, best_mbmi->mode, NULL, bsize, *best_rd, cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search); +#if CONFIG_SEP_COMP_DRL + args->modelled_rd[this_mode][ref_mv_idx[0]][refs[0]] = + args->modelled_rd[this_mode][i][refs[0]]; + args->simple_rd[this_mode][ref_mv_idx[0]][refs[0]] = + args->simple_rd[this_mode][i][refs[0]]; + mode_info[ref_mv_idx_type].rd = mode_info[i].rd; + mode_info[ref_mv_idx_type].rate_mv = this_rate_mv; +#else args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = args->modelled_rd[this_mode][i][refs[0]]; args->simple_rd[this_mode][ref_mv_idx][refs[0]] = args->simple_rd[this_mode][i][refs[0]]; mode_info[ref_mv_idx].rd = mode_info[i].rd; mode_info[ref_mv_idx].rate_mv = this_rate_mv; +#endif #if CONFIG_FLEX_MVRES int_mv temp_mv = mode_info[i].mv; clamp_mv_in_range(x, &temp_mv, 0, pb_mv_precision); +#if CONFIG_SEP_COMP_DRL + mode_info[ref_mv_idx_type].mv.as_int = temp_mv.as_int; +#else mode_info[ref_mv_idx].mv.as_int = temp_mv.as_int; +#endif #else mode_info[ref_mv_idx].mv.as_int = mode_info[i].mv.as_int; #endif @@ -3589,7 +4244,8 @@ /*!\brief High level function to select parameters for compound mode. * * \ingroup inter_mode_search - * The main search functionality is done in the call to av1_compound_type_rd(). + * The main search functionality is done in the call to + av1_compound_type_rd(). * * \param[in] cpi Top-level encoder structure. * \param[in] x Pointer to struct holding all the data for @@ -3611,27 +4267,38 @@ * search. * \param[in,out] orig_dst A prediction buffer to hold a computed * prediction. This will eventually hold the - * final prediction, and the tmp_dst info will + * final prediction, and the tmp_dst info + will * be copied here. * \param[in] tmp_dst A temporary prediction buffer to hold a * computed prediction. - * \param[in,out] rate_mv The rate associated with the motion vectors. - * This will be modified if a motion search is + * \param[in,out] rate_mv The rate associated with the motion + vectors. + * This will be modified if a motion search + is * done in the motion mode search. * \param[in,out] rd_stats Struct to keep track of the overall RD * information. - * \param[in,out] skip_rd An array of length 2 where skip_rd[0] is the + * \param[in,out] skip_rd An array of length 2 where skip_rd[0] is + the * best total RD for a skip mode so far, and - * skip_rd[1] is the best RD for a skip mode so - * far in luma. This is used as a speed feature - * to skip the transform search if the computed + * skip_rd[1] is the best RD for a skip mode + so + * far in luma. This is used as a speed + feature + * to skip the transform search if the + computed * skip RD for the current mode is not better * than the best skip_rd so far. - * \param[in,out] skip_build_pred Indicates whether or not to build the inter - * predictor. If this is 0, the inter predictor - * has already been built and thus we can avoid + * \param[in,out] skip_build_pred Indicates whether or not to build the + inter + * predictor. If this is 0, the inter + predictor + * has already been built and thus we can + avoid * repeating computation. - * \return Returns 1 if this mode is worse than one already seen and 0 if it is + * \return Returns 1 if this mode is worse than one already seen and 0 if it + is * a viable candidate. */ static int process_compound_inter_mode( @@ -3643,11 +4310,22 @@ MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = xd->mi[0]; const AV1_COMMON *cm = &cpi->common; - const int masked_compound_used = is_any_masked_compound_used(bsize) && - cm->seq_params.enable_masked_compound; + const int masked_compound_used = + is_any_masked_compound_used(bsize) && + cm->seq_params.enable_masked_compound +#if CONFIG_REFINEMV + && (!mbmi->refinemv_flag || !switchable_refinemv_flag(cm, mbmi)) +#endif // CONFIG_REFINEMV + ; int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD); +#if CONFIG_CWP + if (get_cwp_idx(mbmi) != CWP_EQUAL) { + mode_search_mask = (1 << COMPOUND_AVERAGE); + } +#endif // CONFIG_CWP + const int num_planes = av1_num_planes(cm); const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; @@ -3697,7 +4375,11 @@ // Speed feature to prune out MVs that are similar to previous MVs if they // don't achieve the best RD advantage. static int prune_ref_mv_idx_search(const FeatureFlags *const features, +#if CONFIG_SEP_COMP_DRL + int ref_mv_idx[2], int best_ref_mv_idx[2], +#else int ref_mv_idx, int best_ref_mv_idx, +#endif int_mv save_mv[MAX_REF_MV_SEARCH - 1][2], MB_MODE_INFO *mbmi, int pruning_factor) { (void)features; @@ -3706,6 +4388,40 @@ const int thr = (1 + is_comp_pred) << (pruning_factor + 1); // Skip the evaluation if an MV match is found. +#if CONFIG_SEP_COMP_DRL + if (ref_mv_idx[0] > 0 || ref_mv_idx[1] > 0) { + int idx[2]; + for (idx[1] = 0; idx[1] <= ref_mv_idx[1]; ++idx[1]) { + for (idx[0] = 0; idx[0] <= ref_mv_idx[0]; ++idx[0]) { + if (idx[1] == ref_mv_idx[1] && idx[0] == ref_mv_idx[0]) continue; + + int idx_type = av1_ref_mv_idx_type(mbmi, idx); + + if (save_mv[idx_type][0].as_int == INVALID_MV) continue; + + int mv_diff = 0; + for (i = 0; i < 1 + is_comp_pred; ++i) { + mv_diff += + abs(save_mv[idx_type][i].as_mv.row - mbmi->mv[i].as_mv.row) + + abs(save_mv[idx_type][i].as_mv.col - mbmi->mv[i].as_mv.col); + } + + // If this mode is not the best one, and current MV is similar to + // previous stored MV, terminate this ref_mv_idx evaluation. + if ((best_ref_mv_idx[0] == -1 || best_ref_mv_idx[1] == -1) && + mv_diff <= thr) + return 1; + } + } + } + + if (ref_mv_idx[0] < features->max_drl_bits && + ref_mv_idx[1] < features->max_drl_bits) { + for (i = 0; i < is_comp_pred + 1; ++i) + save_mv[av1_ref_mv_idx_type(mbmi, ref_mv_idx)][i].as_int = + mbmi->mv[i].as_int; + } +#else if (ref_mv_idx > 0) { for (int idx = 0; idx < ref_mv_idx; ++idx) { if (save_mv[idx][0].as_int == INVALID_MV) continue; @@ -3726,16 +4442,120 @@ for (i = 0; i < is_comp_pred + 1; ++i) save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int; } - +#endif return 0; } +#if CONFIG_CWP +// Calculate SSE when using compound weighted prediction +uint64_t av1_cwp_sse_from_residuals_c(const int16_t *r1, const int16_t *d, + const int8_t *m, int N) { + uint64_t csse = 0; + int i; + + for (i = 0; i < N; i++) { + int32_t t = (1 << WEDGE_WEIGHT_BITS) * r1[i] + m[i] * d[i]; + t = clamp(t, INT16_MIN, INT16_MAX); + csse += t * t; + } + return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS); +} + +// Select a subset of cwp weighting factors +static void set_cwp_search_mask(const AV1_COMP *const cpi, MACROBLOCK *const x, + const BLOCK_SIZE bsize, uint16_t *const p0, + uint16_t *const p1, int16_t *residual1, + int16_t *diff10, int stride, int *mask) { + MACROBLOCKD *xd = &x->e_mbd; + const int bw = block_size_wide[bsize]; + const int bh = block_size_high[bsize]; + // get inter predictors to use for masked compound modes + av1_build_inter_predictor_single_buf_y(xd, bsize, 0, p0, stride); + av1_build_inter_predictor_single_buf_y(xd, bsize, 1, p1, stride); + const struct buf_2d *const src = &x->plane[0].src; + + aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, p1, + bw, xd->bd); + aom_highbd_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw, xd->bd); + + MB_MODE_INFO *const mbmi = xd->mi[0]; + + const AV1_COMMON *const cm = &cpi->common; + const int same_side = is_ref_frame_same_side(cm, mbmi); + + const int N = 1 << num_pels_log2_lookup[bsize]; + int rate; + int64_t dist; + int cwp_index; + int64_t best_rd = INT64_MAX; + const int bd_round = (xd->bd - 8) * 2; + + const int8_t *tmp_mask; + int rate_cwp_idx; + + int idx_list[MAX_CWP_NUM]; + int64_t cost_list[MAX_CWP_NUM]; + + for (int i = 0; i < MAX_CWP_NUM; i++) { + idx_list[i] = i; + cost_list[i] = INT64_MAX; + } + + for (cwp_index = 0; cwp_index < MAX_CWP_NUM; cwp_index++) { + if (cwp_index == 0) continue; + + tmp_mask = av1_get_cwp_mask(same_side, cwp_index); + + // compute rd for mask + uint64_t sse = av1_cwp_sse_from_residuals_c(residual1, diff10, tmp_mask, N); + sse = ROUND_POWER_OF_TWO(sse, bd_round); + + model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N, + &rate, &dist); + int8_t cur_cwp = cwp_weighting_factor[same_side][cwp_index]; + rate_cwp_idx = av1_get_cwp_idx_cost(cur_cwp, cm, x); + const int64_t rd0 = RDCOST(x->rdmult, rate + rate_cwp_idx, dist); + if (rd0 < best_rd) { + best_rd = rd0; + } + + cost_list[cwp_index] = rd0; + } + + // sort cwp in ascending order + for (int i = 0; i < MAX_CWP_NUM - 1; i++) { + for (int j = 0; j < (MAX_CWP_NUM - 1) - i; j++) { + if (cost_list[j] > cost_list[j + 1]) { + int64_t tmp_cost = cost_list[j]; + cost_list[j] = cost_list[j + 1]; + cost_list[j + 1] = tmp_cost; + + int tmp_idx = idx_list[j]; + idx_list[j] = idx_list[j + 1]; + idx_list[j + 1] = tmp_idx; + } + } + } + + int th = 2; + for (int i = 0; i < MAX_CWP_NUM; i++) { + if (i < th) { + mask[idx_list[i]] = 1; + } else { + mask[idx_list[i]] = 0; + } + } + + return; +} +#endif // CONFIG_CWP + /*!\brief AV1 inter mode RD computation * * \ingroup inter_mode_search - * Do the RD search for a given inter mode and compute all information relevant - * to the input mode. It will compute the best MV, - * compound parameters (if the mode is a compound mode) and interpolation filter + * Do the RD search for a given inter mode and compute all information + * relevant to the input mode. It will compute the best MV, compound + * parameters (if the mode is a compound mode) and interpolation filter * parameters. * * \param[in] cpi Top-level encoder structure. @@ -3769,31 +4589,25 @@ * \param[in] do_tx_search Parameter to indicate whether or not to do * a full transform search. This will compute * an estimated RD for the modes without the - * transform search and later perform the full - * transform search on the best candidates. - * \param[in,out] inter_modes_info InterModesInfo struct to hold inter mode - * information to perform a full transform - * search only on winning candidates searched - * with an estimate for transform coding RD. - * \param[in,out] motion_mode_cand A motion_mode_candidate struct to store - * motion mode information used in a speed - * feature to search motion modes other than - * SIMPLE_TRANSLATION only on winning - * candidates. - * \param[in,out] skip_rd A length 2 array, where skip_rd[0] is the - * best total RD for a skip mode so far, and - * skip_rd[1] is the best RD for a skip mode so - * far in luma. This is used as a speed feature - * to skip the transform search if the computed - * skip RD for the current mode is not better - * than the best skip_rd so far. - * \param[in] best_ref_mode Parameter to indicate the best mode so far. - * This is used as a speed feature to skip the + * transform search and later perform the + * full transform search on the best candidates. \param[in,out] + * inter_modes_info InterModesInfo struct to hold inter mode information to + * perform a full transform search only on winning candidates searched with an + * estimate for transform coding RD. \param[in,out] motion_mode_cand A + * motion_mode_candidate struct to store motion mode information used in a + * speed feature to search motion modes other than SIMPLE_TRANSLATION only on + * winning candidates. \param[in,out] skip_rd A length 2 array, + * where skip_rd[0] is the best total RD for a skip mode so far, and + * skip_rd[1] is the best RD for a skip mode + * so far in luma. This is used as a speed feature to skip the transform + * search if the computed skip RD for the current mode is not better than the + * best skip_rd so far. \param[in] best_ref_mode Parameter to indicate + * the best mode so far. This is used as a speed feature to skip the * additional scaling factors for joint mvd * coding mode. * \param[in] inter_cost_info_from_tpl A PruneInfoFromTpl struct used to - * narrow down the search based on data - * collected in the TPL model. + * narrow down the search based on + * data collected in the TPL model. * * \return The RD cost for the mode being searched. */ @@ -3861,12 +4675,21 @@ int64_t newmv_ret_val = INT64_MAX; #if CONFIG_FLEX_MVRES #if CONFIG_BAWP +#if CONFIG_SEP_COMP_DRL + inter_mode_info mode_info[2][NUM_MV_PRECISIONS] + [MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH]; +#else inter_mode_info mode_info[2][NUM_MV_PRECISIONS][MAX_REF_MV_SEARCH]; +#endif // initialize mode_info for (int bawp = 0; bawp < 2; bawp++) { for (int prec = 0; prec < NUM_MV_PRECISIONS; prec++) { +#if CONFIG_SEP_COMP_DRL + for (int idx = 0; idx < MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH; idx++) { +#else for (int idx = 0; idx < MAX_REF_MV_SEARCH; idx++) { +#endif mode_info[bawp][prec][idx].full_search_mv.as_int = INVALID_MV; mode_info[bawp][prec][idx].mv.as_int = INVALID_MV; mode_info[bawp][prec][idx].rd = INT64_MAX; @@ -3921,6 +4744,10 @@ #if CONFIG_BAWP mbmi->bawp_flag = 0; #endif + +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV // Do not prune the mode based on inter cost from tpl if the current ref // frame is the winner ref in neighbouring blocks. int ref_match_found_in_above_nb = 0; @@ -3935,20 +4762,50 @@ // First, perform a simple translation search for each of the indices. If // an index performs well, it will be fully searched in the main loop // of this function. +#if CONFIG_SEP_COMP_DRL + int ref_set[2]; + ref_set[0] = get_drl_refmv_count(cm->features.max_drl_bits, x, + mbmi->ref_frame, this_mode, 0); + ref_set[1] = 1; + if (has_second_drl(mbmi)) { + ref_set[1] = get_drl_refmv_count(cm->features.max_drl_bits, x, + mbmi->ref_frame, this_mode, 1); + + if (mbmi->mode == NEAR_NEWMV) { + ref_set[0] = AOMMIN(ref_set[0], SEP_COMP_DRL_SIZE); + ref_set[1] = AOMMIN(ref_set[1], SEP_COMP_DRL_SIZE); + } else { + assert(mbmi->mode == NEAR_NEARMV); + } + } +#else const int ref_set = get_drl_refmv_count(cm->features.max_drl_bits, x, mbmi->ref_frame, this_mode); +#endif #if CONFIG_WARPMV +#if CONFIG_SEP_COMP_DRL + assert(IMPLIES(this_mode == WARPMV, ref_set[0] == 1)); +#else assert(IMPLIES(this_mode == WARPMV, ref_set == 1)); +#endif #endif // CONFIG_WARPMV // Save MV results from first 2 ref_mv_idx. #if CONFIG_FLEX_MVRES +#if CONFIG_SEP_COMP_DRL + int_mv save_mv[NUM_MV_PRECISIONS][MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH][2]; +#else int_mv save_mv[NUM_MV_PRECISIONS][MAX_REF_MV_SEARCH - 1][2]; +#endif #else int_mv save_mv[MAX_REF_MV_SEARCH - 1][2]; #endif +#if CONFIG_SEP_COMP_DRL + int best_ref_mv_idx[2] = { -1, -1 }; +#else int best_ref_mv_idx = -1; +#endif const int16_t mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame); #if !CONFIG_FLEX_MVRES @@ -3986,7 +4843,11 @@ #if CONFIG_FLEX_MVRES for (int pb_mv_precision = mbmi->max_mv_precision; pb_mv_precision >= MV_PRECISION_8_PEL; pb_mv_precision--) { +#if CONFIG_SEP_COMP_DRL + for (i = 0; i < MAX_REF_MV_SEARCH * MAX_REF_MV_SEARCH - 1; ++i) { +#else for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) { +#endif save_mv[pb_mv_precision][i][0].as_int = INVALID_MV; save_mv[pb_mv_precision][i][1].as_int = INVALID_MV; } @@ -4029,7 +4890,6 @@ idx_mask[0][pb_mv_precision] = ref_mv_idx_to_search( cpi, x, rd_stats, args, ref_best_rd, mode_info[0][pb_mv_precision], bsize, ref_set, flex_mv_cost[pb_mv_precision]); - if (cm->features.enable_bawp && av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col)) { mbmi->bawp_flag = 1; @@ -4079,6 +4939,10 @@ // 4.) Build the inter predictor // 5.) Pick the motion mode // 6.) Update stats if best so far +#if CONFIG_REFINEMV + mbmi->refinemv_flag = + 0; // initialize to 0; later on the default value is assigned +#endif // CONFIG_REFINEMV #if CONFIG_IMPROVED_JMVD const int jmvd_scaling_factor_num = is_joint_mvd_coding_mode(mbmi->mode) ? JOINT_NEWMV_SCALE_FACTOR_CNT : 1; @@ -4096,270 +4960,412 @@ continue; } #endif // CONFIG_IMPROVED_JMVD - for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) { -#if CONFIG_IMPROVED_JMVD - // apply early termination method to jmvd scaling factors - if (cpi->sf.inter_sf.early_terminate_jmvd_scale_factor) { - if (scale_index > 0 && ref_mv_idx > 0 && - best_mbmi.jmvd_scale_mode == 0 && best_mbmi.ref_mv_idx < ref_mv_idx) - continue; - } -#endif // CONFIG_IMPROVED_JMVD -#if CONFIG_FLEX_MVRES - - // Initialize compound mode data - mbmi->interinter_comp.type = COMPOUND_AVERAGE; - mbmi->comp_group_idx = 0; - if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME; - - mbmi->num_proj_ref = 0; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->ref_mv_idx = ref_mv_idx; - set_mv_precision(mbmi, mbmi->max_mv_precision); - if ( -#if CONFIG_WARPMV - mbmi->mode != WARPMV && -#endif // CONFIG_WARPMV - prune_modes_based_on_tpl && !ref_match_found_in_above_nb && - !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) { - // Skip mode if TPL model indicates it will not be beneficial. - if (prune_modes_based_on_tpl_stats( - &cm->features, inter_cost_info_from_tpl, refs, ref_mv_idx, - this_mode, cpi->sf.inter_sf.prune_inter_modes_based_on_tpl)) - continue; - } - const int drl_cost = - get_drl_cost(cm->features.max_drl_bits, mbmi, mbmi_ext, x); - -#if CONFIG_FLEX_MVRES - MvSubpelPrecision best_precision_so_far = mbmi->max_mv_precision; - int64_t best_precision_rd_so_far = INT64_MAX; - set_precision_set(cm, xd, mbmi, bsize, ref_mv_idx); - set_most_probable_mv_precision(cm, mbmi, bsize); - const PRECISION_SET *precision_def = - &av1_mv_precision_sets[mbmi->mb_precision_set]; - for (int precision_dx = precision_def->num_precisions - 1; - precision_dx >= 0; precision_dx--) { - MvSubpelPrecision pb_mv_precision = - precision_def->precision[precision_dx]; - mbmi->pb_mv_precision = pb_mv_precision; - if (!is_pb_mv_precision_active(cm, mbmi, bsize) && - (pb_mv_precision != mbmi->max_mv_precision)) { - continue; - } - assert(pb_mv_precision <= mbmi->max_mv_precision); +#if CONFIG_CWP + int best_cwp_idx = CWP_EQUAL; + int64_t best_cwp_cost = INT64_MAX; +#endif // CONFIG_CWP +#if CONFIG_SEP_COMP_DRL + int ref_mv_idx[2]; + for (ref_mv_idx[1] = 0; ref_mv_idx[1] < ref_set[1]; ++ref_mv_idx[1]) { + for (ref_mv_idx[0] = 0; ref_mv_idx[0] < ref_set[0]; ++ref_mv_idx[0]) { +#else + for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) { +#endif // CONFIG_SEP_COMP_DRL #if CONFIG_IMPROVED_JMVD // apply early termination method to jmvd scaling factors if (cpi->sf.inter_sf.early_terminate_jmvd_scale_factor) { - if (scale_index > 0 && (!is_inter_compound_mode(best_ref_mode)) && - mbmi->pb_mv_precision <= MV_PRECISION_HALF_PEL && +#if CONFIG_SEP_COMP_DRL + if (scale_index > 0 && (ref_mv_idx[0] > 0 || ref_mv_idx[1] > 0) && best_mbmi.jmvd_scale_mode == 0 && - best_mbmi.pb_mv_precision > MV_PRECISION_HALF_PEL) + (best_mbmi.ref_mv_idx[0] < ref_mv_idx[0] || + best_mbmi.ref_mv_idx[1] < ref_mv_idx[1])) +#else + if (scale_index > 0 && ref_mv_idx > 0 && + best_mbmi.jmvd_scale_mode == 0 && + best_mbmi.ref_mv_idx < ref_mv_idx) +#endif // CONFIG_SEP_COMP_DRL continue; } #endif // CONFIG_IMPROVED_JMVD - - if (is_pb_mv_precision_active(cm, mbmi, bsize)) { - if (cpi->sf.flexmv_sf.terminate_early_4_pel_precision && - pb_mv_precision < MV_PRECISION_FOUR_PEL && - best_precision_so_far >= MV_PRECISION_QTR_PEL) - continue; - if (mbmi->ref_mv_idx) { - if (cpi->sf.flexmv_sf.do_not_search_8_pel_precision && - mbmi->pb_mv_precision == MV_PRECISION_8_PEL) - continue; - - if (cpi->sf.flexmv_sf.do_not_search_4_pel_precision && - mbmi->pb_mv_precision == MV_PRECISION_FOUR_PEL) - continue; - } - } - -#endif -#endif - -#if !CONFIG_FLEX_MVRES && !CONFIG_BAWP - mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV; - mode_info[ref_mv_idx].mv.as_int = INVALID_MV; - mode_info[ref_mv_idx].rd = INT64_MAX; - if ( -#if CONFIG_WARPMV - mbmi->mode != WARPMV && -#endif // CONFIG_WARPMV - - !mask_check_bit(idx_mask, ref_mv_idx)) { - // MV did not perform well in simple translation search. Skip it. - continue; - } -#endif // !CONFIG_FLEX_MVRES && !CONFIG_BAWP -#if !CONFIG_FLEX_MVRES - if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb && - !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) { - // Skip mode if TPL model indicates it will not be beneficial. - if (prune_modes_based_on_tpl_stats( - &cm->features, inter_cost_info_from_tpl, refs, ref_mv_idx, - this_mode, cpi->sf.inter_sf.prune_inter_modes_based_on_tpl)) - continue; - } - av1_init_rd_stats(rd_stats); - // Initialize compound mode data - mbmi->interinter_comp.type = COMPOUND_AVERAGE; - mbmi->comp_group_idx = 0; - if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME; - - mbmi->num_proj_ref = 0; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->ref_mv_idx = ref_mv_idx; - // Compute cost for signalling this DRL index - rd_stats->rate = base_rate; - const int drl_cost = - get_drl_cost(cm->features.max_drl_bits, mbmi, mbmi_ext, x); - - rd_stats->rate += drl_cost; -#if CONFIG_BAWP - mode_info[0][ref_mv_idx].drl_cost = drl_cost; - mode_info[1][ref_mv_idx].drl_cost = drl_cost; +#if CONFIG_CWP + mbmi->cwp_idx = CWP_EQUAL; + const int same_side = is_ref_frame_same_side(cm, mbmi); + int cwp_loop_num = cm->features.enable_cwp ? MAX_CWP_NUM : 1; +#if CONFIG_SEP_COMP_DRL + if (best_cwp_idx == CWP_EQUAL && + (ref_mv_idx[0] > 0 || ref_mv_idx[1] > 0)) + cwp_loop_num = 1; #else - mode_info[ref_mv_idx].drl_cost = drl_cost; -#endif -#endif //! CONFIG_FLEX_MVRES + if (best_cwp_idx == CWP_EQUAL && ref_mv_idx > 0) cwp_loop_num = 1; +#endif // CONFIG_SEP_COMP_DRL - int rs = 0; - int compmode_interinter_cost = 0; - int_mv cur_mv[2]; - // TODO(Cherma): Extend this speed feature to support compound mode - int skip_repeated_ref_mv = - is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv; - // Generate the current mv according to the prediction mode - if ( -#if CONFIG_WARPMV - mbmi->mode != WARPMV && -#endif // CONFIG_WARPMV - !build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) { - continue; - } -#if CONFIG_WARPMV - // For WARPMV mode we will build MV in the later stage - // Currently initialize to 0 - if (mbmi->mode == WARPMV) { - cur_mv[0].as_int = 0; - cur_mv[1].as_int = 0; - assert(ref_mv_idx == 0); - } -#endif // CONFIG_WARPMV + int cwp_search_mask[MAX_CWP_NUM] = { 0 }; + av1_zero(cwp_search_mask); + // Loop all supported weighting factors for CWP + for (int cwp_search_idx = 0; cwp_search_idx < cwp_loop_num; + cwp_search_idx++) { +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[1] = ref_mv_idx[1]; + mbmi->ref_mv_idx[0] = ref_mv_idx[0]; +#else + mbmi->ref_mv_idx = ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL + mbmi->interinter_comp.type = COMPOUND_AVERAGE; + mbmi->comp_group_idx = 0; + mbmi->motion_mode = SIMPLE_TRANSLATION; + mbmi->cwp_idx = cwp_weighting_factor[same_side][cwp_search_idx]; + + if (mbmi->cwp_idx != CWP_EQUAL) { + if (!is_cwp_allowed(mbmi)) break; + if (cwp_search_mask[cwp_search_idx] == 0) { + continue; + } + } + if (mbmi->cwp_idx == -1) { + break; + } +#endif // CONFIG_CWP #if CONFIG_FLEX_MVRES -#if !CONFIG_BAWP - mode_info[mbmi->pb_mv_precision][ref_mv_idx].full_search_mv.as_int = - INVALID_MV; - mode_info[mbmi->pb_mv_precision][ref_mv_idx].mv.as_int = INVALID_MV; - mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd = INT64_MAX; - mode_info[mbmi->pb_mv_precision][ref_mv_idx].drl_cost = drl_cost; - if ( -#if CONFIG_WARPMV - mbmi->mode != WARPMV && -#endif // CONFIG_WARPMV - !mask_check_bit(idx_mask[mbmi->pb_mv_precision], ref_mv_idx)) { - // MV did not perform well in simple translation search. Skip it. - continue; - } -#endif - if ( -#if CONFIG_WARPMV - mbmi->mode != WARPMV && -#endif // CONFIG_WARPMV - cpi->sf.flexmv_sf.skip_similar_ref_mv && - skip_similar_ref_mv(cpi, x, bsize)) { - continue; - } + // Initialize compound mode data + mbmi->interinter_comp.type = COMPOUND_AVERAGE; + mbmi->comp_group_idx = 0; + if (mbmi->ref_frame[1] == INTRA_FRAME) + mbmi->ref_frame[1] = NONE_FRAME; -#if CONFIG_WARPMV - assert(IMPLIES(mbmi->mode == WARPMV, - mbmi->pb_mv_precision == mbmi->max_mv_precision)); -#endif // CONFIG_WARPMV -#endif - -#if CONFIG_BAWP - int_mv bawp_off_mv[2]; - int64_t bawp_off_newmv_ret_val = 0; - for (i = 0; i < is_comp_pred + 1; ++i) { - bawp_off_mv[i].as_int = cur_mv[i].as_int; - } - int bawp_eanbled = cm->features.enable_bawp && - av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col); - for (int bawp_flag = 0; bawp_flag <= bawp_eanbled; bawp_flag++) { - mbmi->bawp_flag = bawp_flag; - -#if CONFIG_FLEX_MVRES - mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx] - .full_search_mv.as_int = INVALID_MV; - mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].mv.as_int = - INVALID_MV; - mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd = - INT64_MAX; - mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].drl_cost = - drl_cost; - + mbmi->num_proj_ref = 0; + mbmi->motion_mode = SIMPLE_TRANSLATION; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[1] = ref_mv_idx[1]; + mbmi->ref_mv_idx[0] = ref_mv_idx[0]; + int ref_mv_idx_type = av1_ref_mv_idx_type(mbmi, ref_mv_idx); +#else + mbmi->ref_mv_idx = ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL + set_mv_precision(mbmi, mbmi->max_mv_precision); if ( #if CONFIG_WARPMV mbmi->mode != WARPMV && #endif // CONFIG_WARPMV - !mask_check_bit(idx_mask[bawp_flag][mbmi->pb_mv_precision], - ref_mv_idx)) { - // MV did not perform well in simple translation search. Skip it. - continue; - } + prune_modes_based_on_tpl && !ref_match_found_in_above_nb && + !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) { + // Skip mode if TPL model indicates it will not be beneficial. + if (prune_modes_based_on_tpl_stats( +#if CONFIG_SEP_COMP_DRL + &cm->features, inter_cost_info_from_tpl, refs, + ref_mv_idx[0], #else - mode_info[bawp_flag][ref_mv_idx].full_search_mv.as_int = INVALID_MV; - mode_info[bawp_flag][ref_mv_idx].mv.as_int = INVALID_MV; - mode_info[bawp_flag][ref_mv_idx].rd = INT64_MAX; - mode_info[bawp_flag][ref_mv_idx].drl_cost = drl_cost; + &cm->features, inter_cost_info_from_tpl, refs, ref_mv_idx, +#endif // CONFIG_SEP_COMP_DRL + this_mode, cpi->sf.inter_sf.prune_inter_modes_based_on_tpl)) + continue; + } + const int drl_cost = + get_drl_cost(cm->features.max_drl_bits, mbmi, mbmi_ext, x); - if ( -#if CONFIG_WARPMV - mbmi->mode != WARPMV && -#endif // CONFIG_WARPMV - !mask_check_bit(idx_mask[bawp_flag], ref_mv_idx)) { - // MV did not perform well in simple translation search. Skip it. - continue; - } -#endif // CONFIG_FLEX_MVRES - if (mbmi->bawp_flag == 1) { - for (i = 0; i < is_comp_pred + 1; ++i) { - mbmi->mv[i].as_int = bawp_off_mv[i].as_int; - cur_mv[i].as_int = bawp_off_mv[i].as_int; +#if CONFIG_FLEX_MVRES + MvSubpelPrecision best_precision_so_far = mbmi->max_mv_precision; + int64_t best_precision_rd_so_far = INT64_MAX; + set_precision_set(cm, xd, mbmi, bsize, ref_mv_idx); + set_most_probable_mv_precision(cm, mbmi, bsize); + const PRECISION_SET *precision_def = + &av1_mv_precision_sets[mbmi->mb_precision_set]; + for (int precision_dx = precision_def->num_precisions - 1; + precision_dx >= 0; precision_dx--) { + MvSubpelPrecision pb_mv_precision = + precision_def->precision[precision_dx]; + mbmi->pb_mv_precision = pb_mv_precision; + if (!is_pb_mv_precision_active(cm, mbmi, bsize) && + (pb_mv_precision != mbmi->max_mv_precision)) { + continue; + } + assert(pb_mv_precision <= mbmi->max_mv_precision); +#if CONFIG_IMPROVED_JMVD + // apply early termination method to jmvd scaling factors + if (cpi->sf.inter_sf.early_terminate_jmvd_scale_factor) { + if (scale_index > 0 && (!is_inter_compound_mode(best_ref_mode)) && + mbmi->pb_mv_precision <= MV_PRECISION_HALF_PEL && + best_mbmi.jmvd_scale_mode == 0 && + best_mbmi.pb_mv_precision > MV_PRECISION_HALF_PEL) + continue; + } +#endif // CONFIG_IMPROVED_JMVD + + if (is_pb_mv_precision_active(cm, mbmi, bsize)) { + if (cpi->sf.flexmv_sf.terminate_early_4_pel_precision && + pb_mv_precision < MV_PRECISION_FOUR_PEL && + best_precision_so_far >= MV_PRECISION_QTR_PEL) + continue; +#if CONFIG_SEP_COMP_DRL + if (mbmi->ref_mv_idx[0] || mbmi->ref_mv_idx[1]) { +#else + if (mbmi->ref_mv_idx) { +#endif // CONFIG_SEP_COMP_DRL + if (cpi->sf.flexmv_sf.do_not_search_8_pel_precision && + mbmi->pb_mv_precision == MV_PRECISION_8_PEL) + continue; + + if (cpi->sf.flexmv_sf.do_not_search_4_pel_precision && + mbmi->pb_mv_precision == MV_PRECISION_FOUR_PEL) + continue; + } } -#if CONFIG_FLEX_MVRES - mode_info[1][mbmi->pb_mv_precision][ref_mv_idx] - .full_search_mv.as_int = - mode_info[0][mbmi->pb_mv_precision][ref_mv_idx] - .full_search_mv.as_int; - mode_info[1][mbmi->pb_mv_precision][ref_mv_idx].full_mv_rate = - mode_info[0][mbmi->pb_mv_precision][ref_mv_idx].full_mv_rate; +#endif +#endif + +#if CONFIG_REFINEMV + // Get the default value of DMVR flag based on mode + mbmi->refinemv_flag = get_default_refinemv_flag(cm, mbmi); +#endif // CONFIG_REFINEMV + +#if !CONFIG_FLEX_MVRES && !CONFIG_BAWP + mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV; + mode_info[ref_mv_idx].mv.as_int = INVALID_MV; + mode_info[ref_mv_idx].rd = INT64_MAX; + if ( +#if CONFIG_WARPMV + mbmi->mode != WARPMV && +#endif // CONFIG_WARPMV + + !mask_check_bit(idx_mask, ref_mv_idx)) { + // MV did not perform well in simple translation search. Skip it. + continue; + } +#endif // !CONFIG_FLEX_MVRES && !CONFIG_BAWP +#if !CONFIG_FLEX_MVRES + if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb && + !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) { + // Skip mode if TPL model indicates it will not be beneficial. + if (prune_modes_based_on_tpl_stats( + &cm->features, inter_cost_info_from_tpl, refs, ref_mv_idx, + this_mode, + cpi->sf.inter_sf.prune_inter_modes_based_on_tpl)) + continue; + } + av1_init_rd_stats(rd_stats); + // Initialize compound mode data + mbmi->interinter_comp.type = COMPOUND_AVERAGE; + mbmi->comp_group_idx = 0; + if (mbmi->ref_frame[1] == INTRA_FRAME) + mbmi->ref_frame[1] = NONE_FRAME; + + mbmi->num_proj_ref = 0; + mbmi->motion_mode = SIMPLE_TRANSLATION; + mbmi->ref_mv_idx = ref_mv_idx; + // Compute cost for signalling this DRL index + rd_stats->rate = base_rate; + const int drl_cost = + get_drl_cost(cm->features.max_drl_bits, mbmi, mbmi_ext, x); + + rd_stats->rate += drl_cost; +#if CONFIG_BAWP + mode_info[0][ref_mv_idx].drl_cost = drl_cost; + mode_info[1][ref_mv_idx].drl_cost = drl_cost; #else - mode_info[1][ref_mv_idx].full_search_mv.as_int = - mode_info[0][ref_mv_idx].full_search_mv.as_int; - mode_info[1][ref_mv_idx].full_mv_rate = - mode_info[0][ref_mv_idx].full_mv_rate; + mode_info[ref_mv_idx].drl_cost = drl_cost; +#endif +#endif //! CONFIG_FLEX_MVRES + + int rs = 0; + int compmode_interinter_cost = 0; + int_mv cur_mv[2]; + // TODO(Cherma): Extend this speed feature to support compound mode + int skip_repeated_ref_mv = + is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv; + // Generate the current mv according to the prediction mode + if ( +#if CONFIG_WARPMV + mbmi->mode != WARPMV && +#endif // CONFIG_WARPMV + !build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) { + continue; + } +#if CONFIG_WARPMV + // For WARPMV mode we will build MV in the later stage + // Currently initialize to 0 + if (mbmi->mode == WARPMV) { + cur_mv[0].as_int = 0; + cur_mv[1].as_int = 0; + +#if CONFIG_SEP_COMP_DRL + assert(ref_mv_idx[0] == 0 && ref_mv_idx[1] == 0); +#else + assert(ref_mv_idx == 0); +#endif // CONFIG_SEP_COMP_DRL + } +#endif // CONFIG_WARPMV + +#if CONFIG_FLEX_MVRES +#if !CONFIG_BAWP + mode_info[mbmi->pb_mv_precision][ref_mv_idx].full_search_mv.as_int = + INVALID_MV; + mode_info[mbmi->pb_mv_precision][ref_mv_idx].mv.as_int = INVALID_MV; + mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd = INT64_MAX; + mode_info[mbmi->pb_mv_precision][ref_mv_idx].drl_cost = drl_cost; + if ( +#if CONFIG_WARPMV + mbmi->mode != WARPMV && +#endif // CONFIG_WARPMV + !mask_check_bit(idx_mask[mbmi->pb_mv_precision], ref_mv_idx)) { + // MV did not perform well in simple translation search. Skip it. + continue; + } +#endif + + if ( +#if CONFIG_WARPMV + mbmi->mode != WARPMV && +#endif // CONFIG_WARPMV + cpi->sf.flexmv_sf.skip_similar_ref_mv && + skip_similar_ref_mv(cpi, x, bsize)) { + continue; + } + +#if CONFIG_WARPMV + assert(IMPLIES(mbmi->mode == WARPMV, + mbmi->pb_mv_precision == mbmi->max_mv_precision)); +#endif // CONFIG_WARPMV +#endif + +#if CONFIG_BAWP + int_mv bawp_off_mv[2]; + int64_t bawp_off_newmv_ret_val = 0; +#if BAWP_BUGFIX + int bawp_off_rate_mv = 0; +#endif + for (i = 0; i < is_comp_pred + 1; ++i) { + bawp_off_mv[i].as_int = cur_mv[i].as_int; + } + int bawp_eanbled = cm->features.enable_bawp && +#if CONFIG_REFINEMV + !mbmi->refinemv_flag && +#endif // CONFIG_REFINEMV + av1_allow_bawp(mbmi, xd->mi_row, xd->mi_col); + for (int bawp_flag = 0; bawp_flag <= bawp_eanbled; bawp_flag++) { + mbmi->bawp_flag = bawp_flag; + +#if CONFIG_FLEX_MVRES +#if CONFIG_SEP_COMP_DRL + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type] + .full_search_mv.as_int = INVALID_MV; + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type] + .mv.as_int = INVALID_MV; + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type].rd = + INT64_MAX; + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type] + .drl_cost = drl_cost; + if ( +#if CONFIG_WARPMV + mbmi->mode != WARPMV && +#endif // CONFIG_WARPMV +#if CONFIG_REFINEMV + !mbmi->refinemv_flag && +#endif // CONFIG_REFINEMV + !mask_check_bit(idx_mask[bawp_flag][mbmi->pb_mv_precision], + ref_mv_idx_type)) { + // MV did not perform well in simple translation search. Skip + // it. + continue; + } +#else + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx] + .full_search_mv.as_int = INVALID_MV; + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx] + .mv.as_int = INVALID_MV; + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd = + INT64_MAX; + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].drl_cost = + drl_cost; + + if ( +#if CONFIG_WARPMV + mbmi->mode != WARPMV && +#endif // CONFIG_WARPMV +#if CONFIG_REFINEMV + !mbmi->refinemv_flag && +#endif // CONFIG_REFINEMV + !mask_check_bit(idx_mask[bawp_flag][mbmi->pb_mv_precision], + ref_mv_idx)) { + // MV did not perform well in simple translation search. Skip + // it. + continue; + } +#endif // CONFIG_SEP_COMP_DRL +#else + mode_info[bawp_flag][ref_mv_idx].full_search_mv.as_int = INVALID_MV; + mode_info[bawp_flag][ref_mv_idx].mv.as_int = INVALID_MV; + mode_info[bawp_flag][ref_mv_idx].rd = INT64_MAX; + mode_info[bawp_flag][ref_mv_idx].drl_cost = drl_cost; + + if ( +#if CONFIG_WARPMV + mbmi->mode != WARPMV && +#endif // CONFIG_WARPMV +#if CONFIG_REFINEMV + !mbmi->refinemv_flag && +#endif // CONFIG_REFINEMV + !mask_check_bit(idx_mask[bawp_flag], ref_mv_idx)) { + // MV did not perform well in simple translation search. Skip it. + continue; + } #endif // CONFIG_FLEX_MVRES - if (bawp_off_newmv_ret_val != 0) continue; - } else { + +#if CONFIG_REFINEMV + assert(!(mbmi->bawp_flag && mbmi->refinemv_flag)); +#endif // CONFIG_REFINEMV + + if (mbmi->bawp_flag == 1) { + for (i = 0; i < is_comp_pred + 1; ++i) { + mbmi->mv[i].as_int = bawp_off_mv[i].as_int; + cur_mv[i].as_int = bawp_off_mv[i].as_int; + } + +#if CONFIG_FLEX_MVRES +#if CONFIG_SEP_COMP_DRL + mode_info[1][mbmi->pb_mv_precision][ref_mv_idx_type] + .full_search_mv.as_int = + mode_info[0][mbmi->pb_mv_precision][ref_mv_idx_type] + .full_search_mv.as_int; + mode_info[1][mbmi->pb_mv_precision][ref_mv_idx_type] + .full_mv_rate = + mode_info[0][mbmi->pb_mv_precision][ref_mv_idx_type] + .full_mv_rate; +#else + mode_info[1][mbmi->pb_mv_precision][ref_mv_idx] + .full_search_mv.as_int = + mode_info[0][mbmi->pb_mv_precision][ref_mv_idx] + .full_search_mv.as_int; + mode_info[1][mbmi->pb_mv_precision][ref_mv_idx].full_mv_rate = + mode_info[0][mbmi->pb_mv_precision][ref_mv_idx] + .full_mv_rate; +#endif // CONFIG_SEP_COMP_DRL +#else + mode_info[1][ref_mv_idx].full_search_mv.as_int = + mode_info[0][ref_mv_idx].full_search_mv.as_int; + mode_info[1][ref_mv_idx].full_mv_rate = + mode_info[0][ref_mv_idx].full_mv_rate; +#endif // CONFIG_FLEX_MVRES +#if BAWP_BUGFIX + rate_mv = bawp_off_rate_mv; #endif - // The above call to build_cur_mv does not handle NEWMV modes. Build - // the mv here if we have NEWMV for any predictors. - if (have_newmv_in_inter_mode(this_mode)) { + if (bawp_off_newmv_ret_val != 0) continue; + } else { +#endif // CONFIG_BAWP + // The above call to build_cur_mv does not handle NEWMV modes. + // Build the mv here if we have NEWMV for any predictors. + if (have_newmv_in_inter_mode(this_mode)) { #if CONFIG_COLLECT_COMPONENT_TIMING - start_timing(cpi, handle_newmv_time); + start_timing(cpi, handle_newmv_time); #endif - newmv_ret_val = - handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, + newmv_ret_val = + handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, #if CONFIG_FLEX_MVRES #if CONFIG_BAWP - mode_info[bawp_flag][mbmi->pb_mv_precision]); + mode_info[bawp_flag][mbmi->pb_mv_precision]); #else - mode_info[mbmi->pb_mv_precision]); + mode_info[mbmi->pb_mv_precision]); #endif #else #if CONFIG_BAWP @@ -4369,67 +5375,71 @@ #endif #endif #if CONFIG_COLLECT_COMPONENT_TIMING - end_timing(cpi, handle_newmv_time); + end_timing(cpi, handle_newmv_time); #endif #if CONFIG_BAWP - for (i = 0; i < is_comp_pred + 1; ++i) { - bawp_off_mv[i].as_int = cur_mv[i].as_int; +#if BAWP_BUGFIX + bawp_off_rate_mv = rate_mv; +#endif + for (i = 0; i < is_comp_pred + 1; ++i) { + bawp_off_mv[i].as_int = cur_mv[i].as_int; + } + bawp_off_newmv_ret_val = newmv_ret_val; + if (newmv_ret_val != 0) continue; + } } - bawp_off_newmv_ret_val = newmv_ret_val; - if (newmv_ret_val != 0) continue; - } - } - if (have_newmv_in_inter_mode(this_mode)) { + if (have_newmv_in_inter_mode(this_mode)) { #else if (newmv_ret_val != 0) continue; #endif #if CONFIG_C071_SUBBLK_WARPMV && CONFIG_FLEX_MVRES - int mv_outlim = 0; - for (int ref = 0; ref < is_comp_pred + 1; ref++) { - const PREDICTION_MODE single_mode = - get_single_mode(this_mode, ref); - if (single_mode == NEWMV) { - SUBPEL_MOTION_SEARCH_PARAMS ms_params; - MV ref_mv = av1_get_ref_mv(x, ref).as_mv; - if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL) - lower_mv_precision(&ref_mv, mbmi->pb_mv_precision); - av1_make_default_subpel_ms_params( - &ms_params, cpi, x, bsize, &ref_mv, pb_mv_precision, NULL); - if (!av1_is_subpelmv_in_range(&ms_params.mv_limits, - cur_mv[ref].as_mv)) { - mv_outlim = 1; - break; + int mv_outlim = 0; + for (int ref = 0; ref < is_comp_pred + 1; ref++) { + const PREDICTION_MODE single_mode = + get_single_mode(this_mode, ref); + if (single_mode == NEWMV) { + SUBPEL_MOTION_SEARCH_PARAMS ms_params; + MV ref_mv = av1_get_ref_mv(x, ref).as_mv; + if (mbmi->pb_mv_precision < MV_PRECISION_HALF_PEL) + lower_mv_precision(&ref_mv, mbmi->pb_mv_precision); + av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, + &ref_mv, pb_mv_precision, + NULL); + if (!av1_is_subpelmv_in_range(&ms_params.mv_limits, + cur_mv[ref].as_mv)) { + mv_outlim = 1; + break; + } + } } - } - } - if (mv_outlim) continue; + if (mv_outlim) continue; #endif // CONFIG_C071_SUBBLK_WARPMV && CONFIG_FLEX_MVRES - // skip NEWMV mode in drl if the motion search result is the same - // as a previous result + // skip NEWMV mode in drl if the motion search result is the + // same as a previous result #if CONFIG_FLEX_MVRES - int skip_new_mv = - cpi->sf.inter_sf.skip_repeated_newmv || - (mbmi->pb_mv_precision != mbmi->max_mv_precision && - cpi->sf.flexmv_sf.skip_repeated_newmv_low_prec); - if (skip_new_mv && - skip_repeated_newmv( - cpi, x, bsize, do_tx_search, this_mode, - mbmi->pb_mv_precision, + int skip_new_mv = + cpi->sf.inter_sf.skip_repeated_newmv || + (mbmi->pb_mv_precision != mbmi->max_mv_precision && + cpi->sf.flexmv_sf.skip_repeated_newmv_low_prec); + if (skip_new_mv && + skip_repeated_newmv( + cpi, x, bsize, do_tx_search, this_mode, + mbmi->pb_mv_precision, #if CONFIG_BAWP - mbmi->bawp_flag, + mbmi->bawp_flag, #endif - &best_mbmi, motion_mode_cand, &ref_best_rd, &best_rd_stats, - &best_rd_stats_y, + &best_mbmi, motion_mode_cand, &ref_best_rd, + &best_rd_stats, &best_rd_stats_y, #if CONFIG_BAWP - &best_rd_stats_uv, - mode_info[bawp_flag][mbmi->pb_mv_precision], args, + &best_rd_stats_uv, + mode_info[bawp_flag][mbmi->pb_mv_precision], args, #else - &best_rd_stats_uv, mode_info[mbmi->pb_mv_precision], args, + &best_rd_stats_uv, mode_info[mbmi->pb_mv_precision], args, #endif - drl_cost, refs, cur_mv, &best_rd, orig_dst, ref_mv_idx)) + drl_cost, refs, cur_mv, &best_rd, orig_dst, ref_mv_idx)) #else if (cpi->sf.inter_sf.skip_repeated_newmv && skip_repeated_newmv( @@ -4446,52 +5456,99 @@ #endif args, drl_cost, refs, cur_mv, &best_rd, orig_dst, ref_mv_idx)) #endif - continue; - } + continue; + } + +#if CONFIG_REFINEMV + const MB_MODE_INFO base_mbmi = *mbmi; + for (int refinemv_loop = 0; refinemv_loop < REFINEMV_NUM_MODES; + refinemv_loop++) { + *mbmi = base_mbmi; + int_mv tmp_cur_mv[2]; + for (i = 0; i < 2; ++i) { + tmp_cur_mv[i].as_int = cur_mv[i].as_int; + } + int tmp_rate_mv = rate_mv; +#endif // CONFIG_REFINEMV #if CONFIG_FLEX_MVRES || CONFIG_BAWP - av1_init_rd_stats(rd_stats); - // Initialize compound mode data - mbmi->interinter_comp.type = COMPOUND_AVERAGE; - mbmi->comp_group_idx = 0; - if (mbmi->ref_frame[1] == INTRA_FRAME) - mbmi->ref_frame[1] = NONE_FRAME; + av1_init_rd_stats(rd_stats); + // Initialize compound mode data + mbmi->interinter_comp.type = COMPOUND_AVERAGE; + mbmi->comp_group_idx = 0; + if (mbmi->ref_frame[1] == INTRA_FRAME) + mbmi->ref_frame[1] = NONE_FRAME; - mbmi->num_proj_ref = 0; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->ref_mv_idx = ref_mv_idx; + mbmi->num_proj_ref = 0; + mbmi->motion_mode = SIMPLE_TRANSLATION; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = ref_mv_idx[0]; + mbmi->ref_mv_idx[1] = ref_mv_idx[1]; +#else + mbmi->ref_mv_idx = ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL - // Compute cost for signalling this DRL index - rd_stats->rate = base_rate; + // Compute cost for signalling this DRL index + rd_stats->rate = base_rate; #if CONFIG_FLEX_MVRES - rd_stats->rate += flex_mv_cost[mbmi->pb_mv_precision]; + rd_stats->rate += flex_mv_cost[mbmi->pb_mv_precision]; #endif - rd_stats->rate += drl_cost; + rd_stats->rate += drl_cost; #endif +#if CONFIG_REFINEMV + if (refinemv_loop && !switchable_refinemv_flag(cm, mbmi)) + continue; + mbmi->refinemv_flag = switchable_refinemv_flag(cm, mbmi) + ? refinemv_loop + : get_default_refinemv_flag(cm, mbmi); + if (mbmi->refinemv_flag && + !is_refinemv_allowed(cm, mbmi, bsize)) { + continue; + } +#if CONFIG_CWP + if (mbmi->refinemv_flag && mbmi->cwp_idx != CWP_EQUAL) continue; +#endif +#endif // CONFIG_REFINEMV + #if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD - if (is_joint_mvd_coding_mode(mbmi->mode)) { - int jmvd_scale_mode_cost = + if (is_joint_mvd_coding_mode(mbmi->mode)) { + int jmvd_scale_mode_cost = #if CONFIG_ADAPTIVE_MVD - is_joint_amvd_coding_mode(mbmi->mode) - ? mode_costs - ->jmvd_amvd_scale_mode_cost[mbmi->jmvd_scale_mode] - : + is_joint_amvd_coding_mode(mbmi->mode) + ? mode_costs->jmvd_amvd_scale_mode_cost + [mbmi->jmvd_scale_mode] + : #endif // CONFIG_ADAPTIVE_MVD - mode_costs->jmvd_scale_mode_cost[mbmi->jmvd_scale_mode]; - rd_stats->rate += jmvd_scale_mode_cost; - } + mode_costs + ->jmvd_scale_mode_cost[mbmi->jmvd_scale_mode]; + rd_stats->rate += jmvd_scale_mode_cost; + } #endif // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD - rd_stats->rate += rate_mv; +#if CONFIG_REFINEMV + rd_stats->rate += tmp_rate_mv; + if (switchable_refinemv_flag(cm, mbmi)) { + rd_stats->rate += + mode_costs->refinemv_flag_cost[av1_get_refinemv_context( + cm, xd, bsize)][mbmi->refinemv_flag]; + } +#else + rd_stats->rate += rate_mv; +#endif // CONFIG_REFINEMV - // Copy the motion vector for this mode into mbmi struct - for (i = 0; i < is_comp_pred + 1; ++i) { - mbmi->mv[i].as_int = cur_mv[i].as_int; - } + // Copy the motion vector for this mode into mbmi struct + for (i = 0; i < is_comp_pred + 1; ++i) { +#if CONFIG_REFINEMV + mbmi->mv[i].as_int = tmp_cur_mv[i].as_int; +#else + + mbmi->mv[i].as_int = cur_mv[i].as_int; +#endif // CONFIG_REFINEMV + } #if CONFIG_C071_SUBBLK_WARPMV #if CONFIG_FLEX_MVRES - assert(check_mv_precision(cm, mbmi, x)); + assert(check_mv_precision(cm, mbmi, x)); #endif #else #if CONFIG_FLEX_MVRES @@ -4499,59 +5556,101 @@ #endif #endif // CONFIG_C071_SUBBLK_WARPMV - const int like_nearest = (mbmi->mode == NEARMV || + const int like_nearest = (mbmi->mode == NEARMV || #if CONFIG_WARPMV - mbmi->mode == WARPMV || + mbmi->mode == WARPMV || #endif // CONFIG_WARPMV #if CONFIG_OPTFLOW_REFINEMENT - mbmi->mode == NEAR_NEARMV_OPTFLOW || + mbmi->mode == NEAR_NEARMV_OPTFLOW || #endif // CONFIG_OPTFLOW_REFINEMENT - mbmi->mode == NEAR_NEARMV) && - mbmi->ref_mv_idx == 0; - if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd && - !like_nearest) { - continue; - } + mbmi->mode == NEAR_NEARMV) && +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] == 0 && + mbmi->ref_mv_idx[1] == 0; +#else + mbmi->ref_mv_idx == 0; +#endif // CONFIG_SEP_COMP_DRL + if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd && + !like_nearest) { + continue; + } - // Skip the rest of the search if prune_ref_mv_idx_search speed - // feature is enabled, and the current MV is similar to a previous - // one. - if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred && - prune_ref_mv_idx_search(&cm->features, ref_mv_idx, - best_ref_mv_idx, + // Skip the rest of the search if prune_ref_mv_idx_search speed + // feature is enabled, and the current MV is similar to a + // previous one. + if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred && + prune_ref_mv_idx_search( + &cm->features, ref_mv_idx, best_ref_mv_idx, #if CONFIG_FLEX_MVRES - save_mv[mbmi->pb_mv_precision], mbmi, + save_mv[mbmi->pb_mv_precision], mbmi, #else - save_mv, mbmi, + save_mv, mbmi, #endif - cpi->sf.inter_sf.prune_ref_mv_idx_search)) - continue; + cpi->sf.inter_sf.prune_ref_mv_idx_search)) + continue; #if CONFIG_COLLECT_COMPONENT_TIMING - start_timing(cpi, compound_type_rd_time); + start_timing(cpi, compound_type_rd_time); #endif - int skip_build_pred = 0; - const int mi_row = xd->mi_row; - const int mi_col = xd->mi_col; + int skip_build_pred = 0; + const int mi_row = xd->mi_row; + const int mi_col = xd->mi_col; - // Handle a compound predictor, continue if it is determined this - // cannot be the best compound mode - if (is_comp_pred +#if CONFIG_CWP + // set cwp_search_mask + if (is_cwp_allowed(mbmi) && mbmi->cwp_idx == CWP_EQUAL) { + set_cwp_search_mask(cpi, x, bsize, rd_buffers->pred0, + rd_buffers->pred1, rd_buffers->residual1, + rd_buffers->diff10, + block_size_wide[bsize], cwp_search_mask); + } +#endif // CONFIG_CWP + + // Handle a compound predictor, continue if it is determined + // this cannot be the best compound mode + if (is_comp_pred #if IMPROVED_AMVD && CONFIG_JOINT_MVD - && !is_joint_amvd_coding_mode(mbmi->mode) + && !is_joint_amvd_coding_mode(mbmi->mode) #endif // IMPROVED_AMVD && CONFIG_JOINT_MVD - ) { - const int not_best_mode = process_compound_inter_mode( - cpi, x, args, ref_best_rd, cur_mv, bsize, - &compmode_interinter_cost, rd_buffers, &orig_dst, &tmp_dst, - &rate_mv, rd_stats, skip_rd, &skip_build_pred); - if (not_best_mode) continue; - } +#if CONFIG_REFINEMV + && (!mbmi->refinemv_flag || + !switchable_refinemv_flag(cm, mbmi)) +#endif // CONFIG_REFINEMV + ) { + const int not_best_mode = process_compound_inter_mode( + cpi, x, args, ref_best_rd, +#if CONFIG_REFINEMV + tmp_cur_mv, +#else + cur_mv, +#endif // CONFIG_REFINEMV + bsize, &compmode_interinter_cost, rd_buffers, &orig_dst, + &tmp_dst, +#if CONFIG_REFINEMV + &tmp_rate_mv, +#else + + &rate_mv, +#endif // CONFIG_REFINEMV + + rd_stats, skip_rd, &skip_build_pred); + if (not_best_mode) continue; + } + +#if CONFIG_CWP + if (cm->features.enable_cwp && is_comp_pred && + is_joint_amvd_coding_mode(mbmi->mode)) { + if (is_cwp_allowed(mbmi)) { + compmode_interinter_cost = + av1_get_cwp_idx_cost(mbmi->cwp_idx, cm, x); + } + } +#endif // CONFIG_CWP #if CONFIG_C071_SUBBLK_WARPMV #if CONFIG_FLEX_MVRES - assert(check_mv_precision(cm, mbmi, x)); + assert(check_mv_precision(cm, mbmi, x)); #endif #else #if CONFIG_FLEX_MVRES @@ -4560,20 +5659,20 @@ #endif // CONFIG_C071_SUBBLK_WARPMV #if CONFIG_COLLECT_COMPONENT_TIMING - end_timing(cpi, compound_type_rd_time); + end_timing(cpi, compound_type_rd_time); #endif #if CONFIG_COLLECT_COMPONENT_TIMING - start_timing(cpi, interpolation_filter_search_time); + start_timing(cpi, interpolation_filter_search_time); #endif - // Determine the interpolation filter for this mode - ret_val = av1_interpolation_filter_search( - x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs, - &skip_build_pred, args, ref_best_rd); + // Determine the interpolation filter for this mode + ret_val = av1_interpolation_filter_search( + x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs, + &skip_build_pred, args, ref_best_rd); #if CONFIG_C071_SUBBLK_WARPMV #if CONFIG_FLEX_MVRES - assert(check_mv_precision(cm, mbmi, x)); + assert(check_mv_precision(cm, mbmi, x)); #endif #else #if CONFIG_FLEX_MVRES @@ -4581,87 +5680,111 @@ #endif #endif // CONFIG_C071_SUBBLK_WARPMV #if CONFIG_COLLECT_COMPONENT_TIMING - end_timing(cpi, interpolation_filter_search_time); + end_timing(cpi, interpolation_filter_search_time); #endif - if (args->modelled_rd != NULL && !is_comp_pred) { - args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd; - } + if (args->modelled_rd != NULL && !is_comp_pred) { +#if CONFIG_SEP_COMP_DRL + args->modelled_rd[this_mode][ref_mv_idx_type][refs[0]] = rd; +#else + args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd; +#endif // CONFIG_SEP_COMP_DRL + } #if CONFIG_WARPMV - if (mbmi->mode != WARPMV) { + if (mbmi->mode != WARPMV) { #endif // CONFIG_WARPMV - if (ret_val != 0) { - restore_dst_buf(xd, orig_dst, num_planes); - continue; - } else if (cpi->sf.inter_sf - .model_based_post_interp_filter_breakout && - ref_best_rd != INT64_MAX && - (rd >> 3) * 3 > ref_best_rd) { - restore_dst_buf(xd, orig_dst, num_planes); - continue; - } + if (ret_val != 0) { + restore_dst_buf(xd, orig_dst, num_planes); + continue; + } else if (cpi->sf.inter_sf + .model_based_post_interp_filter_breakout && + ref_best_rd != INT64_MAX && + (rd >> 3) * 3 > ref_best_rd) { + restore_dst_buf(xd, orig_dst, num_planes); + continue; + } #if CONFIG_WARPMV - } + } #endif // CONFIG_WARPMV // Compute modelled RD if enabled - if (args->modelled_rd != NULL) { + if (args->modelled_rd != NULL) { #if CONFIG_OPTFLOW_REFINEMENT - if (is_comp_pred && this_mode < NEAR_NEARMV_OPTFLOW) { + if (is_comp_pred && this_mode < NEAR_NEARMV_OPTFLOW) { #else if (is_comp_pred) { #endif // CONFIG_OPTFLOW_REFINEMENT - const int mode0 = compound_ref0_mode(this_mode); - const int mode1 = compound_ref1_mode(this_mode); - const int64_t mrd = - AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]], - args->modelled_rd[mode1][ref_mv_idx][refs[1]]); - - if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) { - restore_dst_buf(xd, orig_dst, num_planes); - continue; - } - } - } - rd_stats->rate += compmode_interinter_cost; - if (skip_build_pred != 1 + const int mode0 = compound_ref0_mode(this_mode); + const int mode1 = compound_ref1_mode(this_mode); + const int64_t mrd = +#if CONFIG_SEP_COMP_DRL + AOMMIN(args->modelled_rd[mode0][get_ref_mv_idx(mbmi, 0)] + [refs[0]], + args->modelled_rd[mode1][get_ref_mv_idx(mbmi, 1)] + [refs[1]]); +#else + AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]], + args->modelled_rd[mode1][ref_mv_idx][refs[1]]); +#endif // CONFIG_SEP_COMP_DRL + if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) { + restore_dst_buf(xd, orig_dst, num_planes); + continue; + } + } + } + rd_stats->rate += compmode_interinter_cost; + if (skip_build_pred != 1 #if CONFIG_WARPMV - && (mbmi->mode != WARPMV) + && (mbmi->mode != WARPMV) #endif // CONFIG_WARPMV - ) { - // Build this inter predictor if it has not been previously built - av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, - bsize, 0, av1_num_planes(cm) - 1); - } + ) { + // Build this inter predictor if it has not been previously + // built + av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, + &orig_dst, bsize, 0, + av1_num_planes(cm) - 1); + } #if CONFIG_WARPMV - // So far we did not make prediction for WARPMV mode - assert(IMPLIES(mbmi->mode == WARPMV, skip_build_pred != 1)); + // So far we did not make prediction for WARPMV mode + assert(IMPLIES(mbmi->mode == WARPMV, skip_build_pred != 1)); #endif // CONFIG_WARPMV #if CONFIG_COLLECT_COMPONENT_TIMING - start_timing(cpi, motion_mode_rd_time); + start_timing(cpi, motion_mode_rd_time); #endif - int rate2_nocoeff = rd_stats->rate; + int rate2_nocoeff = rd_stats->rate; #if CONFIG_WARPMV - assert(IMPLIES(mbmi->mode == WARPMV, - (rd_stats->rate == base_rate && rate_mv == 0))); +#if CONFIG_REFINEMV + assert( + IMPLIES(mbmi->mode == WARPMV, + (rd_stats->rate == base_rate && tmp_rate_mv == 0))); +#else + assert(IMPLIES(mbmi->mode == WARPMV, + (rd_stats->rate == base_rate && rate_mv == 0))); +#endif #endif // CONFIG_WARPMV // Determine the motion mode. This will be one of SIMPLE_TRANSLATION, // OBMC_CAUSAL or WARPED_CAUSAL or WARP_EXTEND or WARP_DELTA - ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, - rd_stats_y, rd_stats_uv, args, ref_best_rd, - skip_rd, &rate_mv, &orig_dst, best_est_rd, - do_tx_search, inter_modes_info, 0); + ret_val = motion_mode_rd( + cpi, tile_data, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, + args, ref_best_rd, skip_rd, +#if CONFIG_REFINEMV + &tmp_rate_mv, +#else + &rate_mv, +#endif // CONFIG_REFINEMV + + &orig_dst, best_est_rd, do_tx_search, inter_modes_info, 0); #if CONFIG_COLLECT_COMPONENT_TIMING - end_timing(cpi, motion_mode_rd_time); + end_timing(cpi, motion_mode_rd_time); #endif - assert(IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), - ret_val == INT64_MAX)); + assert(IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), + ret_val == INT64_MAX)); #if CONFIG_C071_SUBBLK_WARPMV #if CONFIG_FLEX_MVRES - assert(check_mv_precision(cm, mbmi, x)); + assert(check_mv_precision(cm, mbmi, x)); #endif #else #if CONFIG_FLEX_MVRES @@ -4669,85 +5792,152 @@ #endif #endif // CONFIG_C071_SUBBLK_WARPMV - if (ret_val != INT64_MAX) { - int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); + if (ret_val != INT64_MAX) { + int64_t tmp_rd = + RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); #if CONFIG_FLEX_MVRES - if (is_pb_mv_precision_active(cm, mbmi, bsize) && - tmp_rd < best_precision_rd_so_far) { - best_precision_so_far = mbmi->pb_mv_precision; - best_precision_rd_so_far = tmp_rd; - } + if (is_pb_mv_precision_active(cm, mbmi, bsize) && + tmp_rd < best_precision_rd_so_far) { + best_precision_so_far = mbmi->pb_mv_precision; + best_precision_rd_so_far = tmp_rd; + } #if CONFIG_BAWP - if (tmp_rd < - mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd) { - // Only update mode_info if the new result is actually better. - mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx] - .mv.as_int = mbmi->mv[0].as_int; - mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rate_mv = - rate_mv; - mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd = - tmp_rd; - } +#if CONFIG_SEP_COMP_DRL + if (tmp_rd < mode_info[bawp_flag][mbmi->pb_mv_precision] + [ref_mv_idx_type] + .rd) { + // Only update mode_info if the new result is actually + // better. + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type] + .mv.as_int = mbmi->mv[0].as_int; +#if CONFIG_REFINEMV + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type] + .rate_mv = tmp_rate_mv; #else - if (tmp_rd < mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd) { - // Only update mode_info if the new result is actually better. - mode_info[mbmi->pb_mv_precision][ref_mv_idx].mv.as_int = - mbmi->mv[0].as_int; - mode_info[mbmi->pb_mv_precision][ref_mv_idx].rate_mv = rate_mv; - mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd = tmp_rd; - } + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type] + .rate_mv = rate_mv; +#endif // CONFIG_REFINEMV + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx_type] + .rd = tmp_rd; + } +#else + if (tmp_rd < + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx] + .rd) { + // Only update mode_info if the new result is actually + // better. + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx] + .mv.as_int = mbmi->mv[0].as_int; +#if CONFIG_REFINEMV + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx] + .rate_mv = tmp_rate_mv; +#else + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx] + .rate_mv = rate_mv; +#endif // CONFIG_REFINEMV + mode_info[bawp_flag][mbmi->pb_mv_precision][ref_mv_idx].rd = + tmp_rd; + } +#endif // CONFIG_SEP_COMP_DRL +#else +#if CONFIG_SEP_COMP_DRL + if (tmp_rd < + mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].rd) { + // Only update mode_info if the new result is actually better. + mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].mv.as_int = + mbmi->mv[0].as_int; +#if CONFIG_REFINEMV + mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].rate_mv = + tmp_rate_mv; +#else + + mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].rate_mv = + rate_mv; +#endif // CONFIG_REFINEMV + mode_info[mbmi->pb_mv_precision][ref_mv_idx_type].rd = tmp_rd; + } +#else + if (tmp_rd < mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd) { + // Only update mode_info if the new result is actually better. + mode_info[mbmi->pb_mv_precision][ref_mv_idx].mv.as_int = + mbmi->mv[0].as_int; +#if CONFIG_REFINEMV + mode_info[mbmi->pb_mv_precision][ref_mv_idx].rate_mv = + tmp_rate_mv; +#else + + mode_info[mbmi->pb_mv_precision][ref_mv_idx].rate_mv = + rate_mv; +#endif // CONFIG_REFINEMV + mode_info[mbmi->pb_mv_precision][ref_mv_idx].rd = tmp_rd; + } +#endif // CONFIG_SEP_COMP_DRL #endif #else #if CONFIG_BAWP if (tmp_rd < mode_info[bawp_flag][ref_mv_idx].rd) { // Only update mode_info if the new result is actually better. mode_info[bawp_flag][ref_mv_idx].mv.as_int = mbmi->mv[0].as_int; +#if CONFIG_REFINEMV + mode_info[bawp_flag][ref_mv_idx].rate_mv = tmp_rate_mv; +#else mode_info[bawp_flag][ref_mv_idx].rate_mv = rate_mv; +#endif // CONFIG_REFINEMV mode_info[bawp_flag][ref_mv_idx].rd = tmp_rd; } #else if (tmp_rd < mode_info[ref_mv_idx].rd) { // Only update mode_info if the new result is actually better. mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int; +#if CONFIG_REFINEMV + mode_info[ref_mv_idx].rate_mv = tmp_rate_mv; +#else mode_info[ref_mv_idx].rate_mv = rate_mv; +#endif // CONFIG_REFINEMV mode_info[ref_mv_idx].rd = tmp_rd; } #endif // CONFIG_BAWP #endif // CONFIG_FLEX_MVRES - // Collect mode stats for multiwinner mode processing - store_winner_mode_stats( - &cpi->common, x, mbmi, rd_stats, rd_stats_y, rd_stats_uv, refs, - mbmi->mode, NULL, bsize, tmp_rd, - cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search); - if (tmp_rd < best_rd) { - // Update the best rd stats if we found the best mode so far - best_rd_stats = *rd_stats; - best_rd_stats_y = *rd_stats_y; - best_rd_stats_uv = *rd_stats_uv; - best_rd = tmp_rd; - best_mbmi = *mbmi; + // Collect mode stats for multiwinner mode processing + store_winner_mode_stats( + &cpi->common, x, mbmi, rd_stats, rd_stats_y, rd_stats_uv, + refs, mbmi->mode, NULL, bsize, tmp_rd, + cpi->sf.winner_mode_sf.multi_winner_mode_type, + do_tx_search); + if (tmp_rd < best_rd) { + // Update the best rd stats if we found the best mode so far + best_rd_stats = *rd_stats; + best_rd_stats_y = *rd_stats_y; + best_rd_stats_uv = *rd_stats_uv; + best_rd = tmp_rd; + best_mbmi = *mbmi; #if CONFIG_C071_SUBBLK_WARPMV - if (is_warp_mode(mbmi->motion_mode)) { - store_submi(xd, cm, best_submi, bsize); - } + if (is_warp_mode(mbmi->motion_mode)) { + store_submi(xd, cm, best_submi, bsize); + } #endif // CONFIG_C071_SUBBLK_WARPMV - best_xskip_txfm = txfm_info->skip_txfm; - memcpy(best_blk_skip, txfm_info->blk_skip, - sizeof(best_blk_skip[0]) * xd->height * xd->width); - av1_copy_array(best_tx_type_map, xd->tx_type_map, - xd->height * xd->width); + best_xskip_txfm = txfm_info->skip_txfm; + memcpy(best_blk_skip, txfm_info->blk_skip, + sizeof(best_blk_skip[0]) * xd->height * xd->width); + av1_copy_array(best_tx_type_map, xd->tx_type_map, + xd->height * xd->width); #if CONFIG_CROSS_CHROMA_TX - av1_copy_array(best_cctx_type_map, xd->cctx_type_map, - xd->height * xd->width); + av1_copy_array(best_cctx_type_map, xd->cctx_type_map, + xd->height * xd->width); #endif // CONFIG_CROSS_CHROMA_TX - motion_mode_cand->rate_mv = rate_mv; - motion_mode_cand->rate2_nocoeff = rate2_nocoeff; - } + +#if CONFIG_REFINEMV + motion_mode_cand->rate_mv = tmp_rate_mv; +#else + motion_mode_cand->rate_mv = rate_mv; +#endif // CONFIG_REFINEMV + motion_mode_cand->rate2_nocoeff = rate2_nocoeff; + } #if CONFIG_C071_SUBBLK_WARPMV #if CONFIG_FLEX_MVRES - assert(check_mv_precision(cm, mbmi, x)); + assert(check_mv_precision(cm, mbmi, x)); #endif #else #if CONFIG_FLEX_MVRES @@ -4755,19 +5945,41 @@ #endif #endif // CONFIG_C071_SUBBLK_WARPMV - if (tmp_rd < ref_best_rd) { - ref_best_rd = tmp_rd; - best_ref_mv_idx = ref_mv_idx; - } - } - restore_dst_buf(xd, orig_dst, num_planes); +#if CONFIG_CWP + if (is_cwp_allowed(mbmi)) { + if (tmp_rd < best_cwp_cost) { + best_cwp_cost = tmp_rd; + best_cwp_idx = mbmi->cwp_idx; + } + } +#endif // CONFIG_CWP + if (tmp_rd < ref_best_rd) { + ref_best_rd = tmp_rd; +#if CONFIG_SEP_COMP_DRL + best_ref_mv_idx[0] = ref_mv_idx[0]; + best_ref_mv_idx[1] = ref_mv_idx[1]; +#else + best_ref_mv_idx = ref_mv_idx; +#endif // CONFIG_SEP_COMP_DRL + } + } + restore_dst_buf(xd, orig_dst, num_planes); +#if CONFIG_REFINEMV + } +#endif // CONFIG_REFINEMV #if CONFIG_BAWP - } + } #endif #if CONFIG_FLEX_MVRES - } + } #endif +#if CONFIG_CWP + } +#endif // CONFIG_CWP + } +#if CONFIG_SEP_COMP_DRL } +#endif // CONFIG_SEP_COMP_DRL #if CONFIG_IMPROVED_JMVD } #endif // CONFIG_IMPROVED_JMVD @@ -4798,7 +6010,7 @@ return rd_stats->rdcost; } -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT // Check if BV is valid static INLINE int is_bv_valid(const FULLPEL_MV *full_mv, const AV1_COMMON *cm, const MACROBLOCKD *xd, int mi_row, int mi_col, @@ -4926,7 +6138,7 @@ } return 0; } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT /*!\brief Search for the best intrabc predictor * @@ -4956,7 +6168,7 @@ set_mv_precision(mbmi, MV_PRECISION_ONE_PEL); set_default_precision_set(cm, mbmi, bsize); set_most_probable_mv_precision(cm, mbmi, bsize); -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 1; #endif #endif @@ -4965,6 +6177,10 @@ mbmi->bawp_flag = 0; #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; const int w = block_size_wide[bsize]; @@ -4996,18 +6212,22 @@ av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame); #if CONFIG_FLEX_MVRES +#if CONFIG_SEP_COMP_DRL + int_mv dv_ref = av1_find_best_ref_mv_from_stack(mbmi_ext, mbmi, ref_frame, +#else int_mv dv_ref = av1_find_best_ref_mv_from_stack(mbmi_ext, ref_frame, +#endif mbmi->pb_mv_precision); #else int_mv dv_ref = av1_find_best_ref_mv_from_stack( /*allow_hp=*/0, mbmi_ext, ref_frame, /*is_integer=*/0); #endif dv_ref.as_int = dv_ref.as_int == INVALID_MV ? 0 : dv_ref.as_int; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT if (mbmi_ext->ref_mv_count[INTRA_FRAME] == 0) { dv_ref.as_int = 0; } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT if (dv_ref.as_int == 0) { av1_find_ref_dv(&dv_ref, tile, cm->mib_size, mi_row); } @@ -5046,7 +6266,7 @@ #if CONFIG_FLEX_MVRES av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize, &dv_ref.as_mv, mbmi->pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif lookahead_search_sites, @@ -5056,7 +6276,7 @@ &dv_ref.as_mv, lookahead_search_sites, /*fine_search_interval=*/0); #endif -#if CONFIG_BVCOST_UPDATE && !CONFIG_FLEX_MVRES +#if CONFIG_IBC_BV_IMPROVEMENT && !CONFIG_FLEX_MVRES // The costs for block vector are stored in x->dv_costs. Assign the costs // to mv_cost_params for motion search. fullms_params.mv_cost_params.mvjcost = x->dv_costs.joint_mv; @@ -5064,7 +6284,7 @@ (int *)&x->dv_costs.mv_component[0][MV_MAX]; fullms_params.mv_cost_params.mvcost[1] = (int *)&x->dv_costs.mv_component[1][MV_MAX]; -#endif // CONFIG_BVCOST_UPDATE +#endif // CONFIG_IBC_BV_IMPROVEMENT fullms_params.is_intra_mode = 1; #if CONFIG_IBC_SR_EXT @@ -5074,17 +6294,20 @@ fullms_params.mi_col = mi_col; fullms_params.mi_row = mi_row; #endif // CONFIG_IBC_SR_EXT -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT fullms_params.x = x; fullms_params.cm = cm; fullms_params.ref_bv_cnt = mbmi_ext->ref_mv_count[INTRA_FRAME]; mbmi->intrabc_mode = 0; mbmi->intrabc_drl_idx = 0; mbmi->ref_bv.as_int = 0; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST mbmi->motion_mode = SIMPLE_TRANSLATION; for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; @@ -5166,7 +6389,7 @@ assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min); assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max); -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT FULLPEL_MOTION_SEARCH_PARAMS fullms_params_init = fullms_params; int best_ref_bv_cost = INT_MAX; int_mv best_bv; @@ -5190,7 +6413,7 @@ mbmi->ref_bv = dv_ref; int best_intrabc_drl_idx = mbmi->intrabc_drl_idx; int best_intrabc_mode = mbmi->intrabc_mode; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv @@ -5212,7 +6435,7 @@ int bestsme = av1_full_pixel_search(start_mv, &fullms_params, step_param, NULL, &best_mv.as_fullmv, NULL); -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT if (bestsme != INT_MAX && is_bv_valid(&best_mv.as_fullmv, cm, xd, mi_row, mi_col, bsize, fullms_params)) { int cur_ref_bv_cost = bestsme; @@ -5244,12 +6467,12 @@ best_bv.as_mv = cur_bv.as_mv; } } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT const int hashsme = av1_intrabc_hash_search( cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv); -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT if (hashsme != INT_MAX && is_bv_valid(&best_hash_mv.as_fullmv, cm, xd, mi_row, mi_col, bsize, fullms_params)) { @@ -5299,7 +6522,7 @@ continue; if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize, cm->mib_size_log2)) continue; -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT // DV should not have sub-pel. assert((dv.col & 7) == 0); @@ -5322,10 +6545,16 @@ mbmi->mv[0].as_mv = dv; mbmi->interp_fltr = BILINEAR; mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 0; +#if CONFIG_CWP + mbmi->cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST mbmi->motion_mode = SIMPLE_TRANSLATION; @@ -5336,7 +6565,7 @@ #if CONFIG_FLEX_MVRES const IntraBCMvCosts *const dv_costs = &x->dv_costs; #else -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const IntraBCMVCosts *const dv_costs = &x->dv_costs; #else const IntraBCMVCosts *const dv_costs = &cpi->dv_costs; @@ -5345,7 +6574,7 @@ (int *)&dv_costs->mv_component[1][MV_MAX] }; #endif -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT int rate_mv = 0; if (!mbmi->intrabc_mode) #if CONFIG_FLEX_MVRES @@ -5383,7 +6612,7 @@ #else const int rate_mode = x->mode_costs.intrabc_cost[1]; #endif // CONFIG_NEW_CONTEXT_MODELING -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv; if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y, @@ -5405,13 +6634,13 @@ } } *mbmi = best_mbmi; -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT if (mbmi->use_intrabc[xd->tree_type == CHROMA_PART]) { mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = mbmi->ref_bv; } else { mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv.as_int = 0; } -#endif // CONFIG_BVP_IMPROVEMENT +#endif // CONFIG_IBC_BV_IMPROVEMENT *rd_stats = best_rdstats; memcpy(txfm_info->blk_skip, best_blk_skip, @@ -5483,9 +6712,13 @@ } // Intra block is always coded as non-skip +#if CONFIG_SKIP_TXFM_OPT + rd_cost->rate = rate_y + rate_uv; +#else rd_cost->rate = rate_y + rate_uv + x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0]; +#endif // CONFIG_SKIP_TXFM_OPT rd_cost->dist = dist_y + dist_uv; rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); rd_cost->skip_txfm = 0; @@ -5507,9 +6740,12 @@ if (xd->tree_type != CHROMA_PART) av1_copy_mbmi_ext_to_mbmi_ext_frame( &ctx->mbmi_ext_best, x->mbmi_ext, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SEP_COMP_DRL + xd->mi[0], +#endif +#if CONFIG_SKIP_MODE_ENHANCEMENT mbmi->skip_mode, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT av1_ref_frame_type(xd->mi[0]->ref_frame)); av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); } @@ -5551,8 +6787,12 @@ const MV_REFERENCE_FRAME second_ref_frame = skip_mode_info->ref_frame_idx_1; #if CONFIG_OPTFLOW_REFINEMENT - const PREDICTION_MODE this_mode = - cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV; + const PREDICTION_MODE this_mode = cm->features.opfl_refine_type +#if CONFIG_CWP + && !cm->features.enable_cwp +#endif // CONFIG_CWP + ? NEAR_NEARMV_OPTFLOW + : NEAR_NEARMV; #else const PREDICTION_MODE this_mode = NEAR_NEARMV; #endif // CONFIG_OPTFLOW_REFINEMENT @@ -5564,19 +6804,34 @@ } mbmi->mode = this_mode; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = 0; + mbmi->ref_mv_idx[1] = 0; +#else mbmi->ref_mv_idx = 0; +#endif mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = ref_frame; mbmi->ref_frame[1] = second_ref_frame; +#if CONFIG_CWP + mbmi->cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP #if CONFIG_IBC_SR_EXT mbmi->use_intrabc[xd->tree_type == CHROMA_PART] = 0; #endif // CONFIG_IBC_SR_EXT #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST -#if !CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + +#if !CONFIG_SKIP_MODE_ENHANCEMENT const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); if (x->mbmi_ext->ref_mv_count[ref_frame_type] == UINT8_MAX) { if (x->mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX || @@ -5600,25 +6855,35 @@ // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs. av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type); } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT #if CONFIG_OPTFLOW_REFINEMENT +#if CONFIG_CWP + assert(this_mode == (cm->features.opfl_refine_type && !cm->features.enable_cwp + ? NEAR_NEARMV_OPTFLOW + : NEAR_NEARMV)); + assert(mbmi->mode == + (cm->features.opfl_refine_type && !cm->features.enable_cwp + ? NEAR_NEARMV_OPTFLOW + : NEAR_NEARMV)); +#else // CONFIG_CWP assert(this_mode == (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV)); assert(mbmi->mode == (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV)); +#endif // CONFIG_CWP #else assert(this_mode == NEAR_NEARMV); assert(mbmi->mode == NEAR_NEARMV); #endif - assert(mbmi->ref_mv_idx == 0); -#if !CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if !CONFIG_SKIP_MODE_ENHANCEMENT if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) { assert(av1_check_newmv_joint_nonzero(cm, x)); return; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT + mbmi->fsc_mode[xd->tree_type == CHROMA_PART] = 0; #if CONFIG_BAWP mbmi->bawp_flag = 0; @@ -5629,7 +6894,12 @@ mbmi->comp_group_idx = 0; mbmi->interinter_comp.type = COMPOUND_AVERAGE; mbmi->motion_mode = SIMPLE_TRANSLATION; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = 0; + mbmi->ref_mv_idx[1] = 0; +#else mbmi->ref_mv_idx = 0; +#endif // CONFIG_SEP_COMP_DRL mbmi->skip_mode = mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 1; #if CONFIG_FLEX_MVRES @@ -5642,6 +6912,9 @@ #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST set_default_interp_filters(mbmi, @@ -5650,7 +6923,7 @@ #endif // CONFIG_OPTFLOW_REFINEMENT cm->features.interp_filter); -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT const int mi_row = xd->mi_row; const int mi_col = xd->mi_col; @@ -5690,17 +6963,54 @@ // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs. av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type); +#if CONFIG_REFINEMV + mbmi->mode = this_mode; +#endif // CONFIG_REFINEMV // loop of ref_mv_idx +#if CONFIG_SEP_COMP_DRL + assert(!has_second_drl(mbmi)); + int ref_set = get_drl_refmv_count(cm->features.max_drl_bits, x, + mbmi->ref_frame, this_mode, 0); +#else int ref_set = get_drl_refmv_count(cm->features.max_drl_bits, x, mbmi->ref_frame, this_mode); +#endif for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) { +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = ref_mv_idx; + mbmi->ref_frame[0] = + xd->skip_mvp_candidate_list.ref_frame0[mbmi->ref_mv_idx[0]]; + mbmi->ref_frame[1] = + xd->skip_mvp_candidate_list.ref_frame1[mbmi->ref_mv_idx[0]]; +#else mbmi->ref_mv_idx = ref_mv_idx; mbmi->ref_frame[0] = xd->skip_mvp_candidate_list.ref_frame0[mbmi->ref_mv_idx]; mbmi->ref_frame[1] = xd->skip_mvp_candidate_list.ref_frame1[mbmi->ref_mv_idx]; +#endif + +#if CONFIG_CWP + // Infer the index of compound weighted prediction from DRL list + mbmi->cwp_idx = +#if CONFIG_SEP_COMP_DRL + xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx[0]].cwp_idx; +#else + xd->skip_mvp_candidate_list.ref_mv_stack[mbmi->ref_mv_idx].cwp_idx; +#endif +#endif // CONFIG_CWP + +#if CONFIG_REFINEMV + mbmi->refinemv_flag = ( +#if CONFIG_CWP + mbmi->cwp_idx == CWP_EQUAL && +#endif + is_refinemv_allowed_skip_mode(cm, mbmi)) + ? 1 + : 0; +#endif // CONFIG_REFINEMV if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) { assert(av1_check_newmv_joint_nonzero(cm, x)); @@ -5723,7 +7033,7 @@ orig_dst.plane[i] = xd->plane[i].dst.buf; orig_dst.stride[i] = xd->plane[i].dst.stride; } -#else // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#else // CONFIG_SKIP_MODE_ENHANCEMENT set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); for (int i = 0; i < num_planes; i++) { xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; @@ -5748,6 +7058,7 @@ av1_rd_cost_update(x->rdmult, best_rd_cost); search_state->best_rd = best_rd_cost->rdcost; } + // loop of ref_mv_idx const int ref_set = get_drl_refmv_count(cm->features.max_drl_bits, x, mbmi->ref_frame, this_mode); @@ -5763,7 +7074,7 @@ assert(av1_check_newmv_joint_nonzero(cm, x)); continue; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0, av1_num_planes(cm) - 1); @@ -5776,12 +7087,14 @@ skip_mode_rd_stats.rate = mode_costs->skip_mode_cost[skip_mode_ctx][1]; // add ref_mv_idx rate + // MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; + // add ref_mv_idx rate const int drl_cost = -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT get_skip_drl_cost(cpi->common.features.max_drl_bits, mbmi, x); #else get_drl_cost(cpi->common.features.max_drl_bits, mbmi, mbmi_ext, x); -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT skip_mode_rd_stats.rate += drl_cost; // Do transform search @@ -5807,17 +7120,22 @@ search_state->best_mbmode.fsc_mode[xd->tree_type == CHROMA_PART] = 0; -#if CONFIG_OPTFLOW_REFINEMENT - search_state->best_mbmode.mode = - (cm->features.opfl_refine_type ? NEAR_NEARMV_OPTFLOW : NEAR_NEARMV); -#else - search_state->best_mbmode.mode = NEAR_NEARMV; -#endif // CONFIG_OPTFLOW_REFINEMENT + search_state->best_mbmode.mode = (cm->features.opfl_refine_type +#if CONFIG_CWP + && !cm->features.enable_cwp +#endif // CONFIG_CWP + ? NEAR_NEARMV_OPTFLOW + : NEAR_NEARMV); search_state->best_mbmode.ref_frame[0] = mbmi->ref_frame[0]; search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1]; search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int; search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int; +#if CONFIG_SEP_COMP_DRL + search_state->best_mbmode.ref_mv_idx[0] = mbmi->ref_mv_idx[0]; + search_state->best_mbmode.ref_mv_idx[1] = mbmi->ref_mv_idx[1]; +#else search_state->best_mbmode.ref_mv_idx = mbmi->ref_mv_idx; +#endif // Set up tx_size related variables for skip-specific loop filtering. if (search_state->best_mbmode.skip_txfm[xd->tree_type == CHROMA_PART]) { @@ -5837,6 +7155,10 @@ x->txfm_search_info.skip_txfm = 1; search_state->best_mode_skippable = 1; search_state->best_skip2 = 1; +#if CONFIG_SKIP_TXFM_OPT + search_state->best_rate_y = + x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][1]; +#endif // CONFIG_SKIP_TXFM_OPT restore_dst_buf(xd, orig_dst, num_planes); } else { @@ -5873,6 +7195,9 @@ cm, #endif // CONFIG_OPTFLOW_REFINEMENT cm->features.interp_filter); +#if CONFIG_REFINEMV + search_state->best_mbmode.refinemv_flag = mbmi->refinemv_flag; +#endif // CONFIG_REFINEMV // Update rd_cost best_rd_cost->rate = skip_mode_rd_stats.rate; @@ -5916,7 +7241,12 @@ } mbmi->mode = this_mode; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = 0; + mbmi->ref_mv_idx[1] = 0; +#else mbmi->ref_mv_idx = 0; +#endif // CONFIG_SEP_COMP_DRL mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = ref_frame; mbmi->ref_frame[1] = second_ref_frame; @@ -5960,7 +7290,12 @@ mbmi->comp_group_idx = 0; mbmi->interinter_comp.type = COMPOUND_AVERAGE; mbmi->motion_mode = SIMPLE_TRANSLATION; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = 0; + mbmi->ref_mv_idx[1] = 0; +#else mbmi->ref_mv_idx = 0; +#endif // CONFIG_SEP_COMP_DRL mbmi->skip_mode = mbmi->skip_txfm[xd->tree_type == CHROMA_PART] = 1; set_default_interp_filters(mbmi, @@ -5972,6 +7307,10 @@ set_mv_precision(mbmi, mbmi->max_mv_precision); #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); for (int i = 0; i < num_planes; i++) { xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; @@ -6011,7 +7350,12 @@ search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1]; search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int; search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int; +#if CONFIG_SEP_COMP_DRL + search_state->best_mbmode.ref_mv_idx[0] = 0; + search_state->best_mbmode.ref_mv_idx[1] = 0; +#else search_state->best_mbmode.ref_mv_idx = 0; +#endif #if CONFIG_FLEX_MVRES search_state->best_mbmode.pb_mv_precision = mbmi->max_mv_precision; @@ -6179,6 +7523,11 @@ av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); } +#if CONFIG_ATC_DCTX_ALIGNED + // Occasionally TX search will be unable to find a best mode decision. + // This case needs to be skipped to avoid integer overflows. + if (rd_stats_y.rate == INT_MAX) continue; +#endif // CONFIG_ATC_DCTX_ALIGNED if (num_planes > 1) { av1_txfm_uvrd(cpi, x, &rd_stats_uv, INT64_MAX); @@ -6201,7 +7550,13 @@ rd_stats_uv.dist = rd_stats_uv.sse; } else { skip_blk = 0; +#if CONFIG_SKIP_TXFM_OPT + rd_stats_y.rate += is_inter_block(mbmi, xd->tree_type) + ? mode_costs->skip_txfm_cost[skip_ctx][0] + : 0; +#else rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0]; +#endif // CONFIG_SKIP_TXFM_OPT } int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate - winner_rate_y - winner_rate_uv; @@ -6676,6 +8031,7 @@ av1_zero(search_state->single_newmv); av1_zero(search_state->single_newmv_rate); av1_zero(search_state->single_newmv_valid); + for (int i = 0; i < MB_MODE_COUNT; ++i) { for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) { for (int ref_frame = 0; ref_frame < SINGLE_REF_FRAMES; ++ref_frame) { @@ -6991,7 +8347,12 @@ ) { PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = 0; + mbmi->ref_mv_idx[1] = 0; +#else mbmi->ref_mv_idx = 0; +#endif mbmi->mode = curr_mode; mbmi->uv_mode = UV_DC_PRED; mbmi->ref_frame[0] = ref_frames[0]; @@ -7000,8 +8361,14 @@ pmi->palette_size[1] = 0; mbmi->filter_intra_mode_info.use_filter_intra = 0; mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0; +#if CONFIG_CWP + mbmi->cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP mbmi->motion_mode = SIMPLE_TRANSLATION; mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV set_default_interp_filters(mbmi, #if CONFIG_OPTFLOW_REFINEMENT cm, @@ -7020,10 +8387,16 @@ #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST #if CONFIG_BAWP mbmi->bawp_flag = 0; #endif +#if CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD + mbmi->jmvd_scale_mode = 0; +#endif // CONFIG_IMPROVED_JMVD && CONFIG_JOINT_MVD } #if CONFIG_C071_SUBBLK_WARPMV @@ -7045,8 +8418,14 @@ const MV_REFERENCE_FRAME ref_frame = COMPACT_INDEX0_NRS(mbmi->ref_frame[0]); const int dir = get_dir_rank(cm, mbmi->ref_frame[0], NULL); const int mode_offset = INTER_OFFSET(this_mode); +#if CONFIG_SEP_COMP_DRL + const int ref_set = get_drl_refmv_count(features->max_drl_bits, x, + mbmi->ref_frame, this_mode, 0); + assert(!has_second_drl(mbmi)); +#else const int ref_set = get_drl_refmv_count(features->max_drl_bits, x, mbmi->ref_frame, this_mode); +#endif // Simple rd int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame]; @@ -7233,13 +8612,19 @@ } } } - +#if !CONFIG_SEP_COMP_DRL const int ref_set = get_drl_refmv_count(cpi->common.features.max_drl_bits, x, refs, this_mode); +#endif for (i = 0; i < 2; ++i) { if (!ref_searched[i] || (mode[i] != NEARMV)) { continue; } +#if CONFIG_SEP_COMP_DRL + const int ref_set = get_drl_refmv_count(cpi->common.features.max_drl_bits, + x, refs, this_mode, i); +#endif + const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME }; for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) { int_mv single_mv; @@ -7404,8 +8789,16 @@ if (txfm_search_done) { search_state->best_rate_y = new_best_rd_stats_y->rate + +#if CONFIG_SKIP_TXFM_OPT + (mode_is_intra + ? 0 + : (x->mode_costs + .skip_txfm_cost[skip_ctx][new_best_rd_stats->skip_txfm || + skip_txfm])); +#else x->mode_costs.skip_txfm_cost[skip_ctx] [new_best_rd_stats->skip_txfm || skip_txfm]; +#endif // CONFIG_SKIP_TXFM_OPT search_state->best_rate_uv = new_best_rd_stats_uv->rate; } memcpy(ctx->blk_skip, txfm_info->blk_skip, @@ -7565,14 +8958,14 @@ } #endif // CONFIG_TIP -#if CONFIG_WARPMV - if (this_mode == WARPMV) return 0; -#endif // CONFIG_WARPMV - // Check if this mode should be skipped because it is incompatible with the // current frame if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames)) return 1; + +#if CONFIG_WARPMV + if (this_mode == WARPMV) return 0; +#endif const int ret = inter_mode_search_order_independent_skip( cpi, x, args->mode_skip_mask, args->search_state, args->skip_ref_frame_mask, this_mode, ref_frames); @@ -7888,22 +9281,34 @@ INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES }; - HandleInterModeArgs args = { { NULL }, - { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }, - { NULL }, - { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, - MAX_SB_SIZE >> 1 }, - NULL, - NULL, - NULL, - search_state.modelled_rd, - INT_MAX, - INT_MAX, - search_state.simple_rd, - 0, - interintra_modes, - { { 0, { { 0 } }, { 0 }, 0, 0, 0 } }, - 0 }; +#if CONFIG_SKIP_ME_FOR_OPFL_MODES + int_mv comp_newmv[MODE_CTX_REF_FRAMES][4][NUM_MV_PRECISIONS][2]; + int comp_newmv_valid[MODE_CTX_REF_FRAMES][4][NUM_MV_PRECISIONS]; + av1_zero(comp_newmv_valid); +#endif // CONFIG_SKIP_ME_FOR_OPFL_MODES + + HandleInterModeArgs args = { + { NULL }, + { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }, + { NULL }, + { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 }, + NULL, + NULL, + NULL, + search_state.modelled_rd, + INT_MAX, + INT_MAX, + search_state.simple_rd, + 0, + interintra_modes, + { { 0, { { 0 } }, { 0 }, 0, 0, 0 } }, + 0 +#if CONFIG_SKIP_ME_FOR_OPFL_MODES + , + comp_newmv, + comp_newmv_valid +#endif // CONFIG_SKIP_ME_FOR_OPFL_MODES + }; // Indicates the appropriate number of simple translation winner modes for // exhaustive motion mode evaluation @@ -8013,6 +9418,13 @@ mbmi->bawp_flag = 0; #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV + +#if CONFIG_SEP_COMP_DRL + mbmi->mode = NEARMV; +#endif // init params, set frame modes, speed features set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask, skip_ref_frame_mask, ref_costs_single, @@ -8289,10 +9701,18 @@ mbmi->angle_delta[PLANE_TYPE_Y] = 0; mbmi->angle_delta[PLANE_TYPE_UV] = 0; mbmi->filter_intra_mode_info.use_filter_intra = 0; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = 0; + mbmi->ref_mv_idx[1] = 0; +#else mbmi->ref_mv_idx = 0; +#endif #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST const int64_t ref_best_rd = search_state.best_rd; RD_STATS rd_stats, rd_stats_y, rd_stats_uv; @@ -8601,23 +10021,24 @@ set_mode_eval_params(cpi, x, DEFAULT_EVAL); // Only try palette mode when the best mode so far is an intra mode. - int try_palette = cpi->oxcf.tool_cfg.enable_palette && - av1_allow_palette(features->allow_screen_content_tools, - mbmi->sb_type[PLANE_TYPE_Y]) && - !is_inter_mode(search_state.best_mbmode.mode) && - rd_cost->rate < INT_MAX; + const int try_palette = + cpi->oxcf.tool_cfg.enable_palette && + av1_allow_palette(features->allow_screen_content_tools, + mbmi->sb_type[PLANE_TYPE_Y]) && + !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate < INT_MAX; + int search_palette_mode = try_palette; #if CONFIG_EXT_RECUR_PARTITIONS const MB_MODE_INFO *cached_mode = x->inter_mode_cache; if (should_reuse_mode(x, REUSE_INTRA_MODE_IN_INTERFRAME_FLAG) && cached_mode && !(cached_mode->mode == DC_PRED && cached_mode->palette_mode_info.palette_size[0] > 0)) { - try_palette = 0; + search_palette_mode = 0; } #endif // CONFIG_EXT_RECUR_PARTITIONS RD_STATS this_rd_cost; int this_skippable = 0; - if (try_palette) { + if (search_palette_mode) { this_skippable = av1_search_palette_mode( &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx, &this_rd_cost, search_state.best_rd); @@ -8670,6 +10091,9 @@ #if CONFIG_WARP_REF_LIST mbmi->warp_ref_idx = 0; mbmi->max_num_warp_candidates = 0; +#if CONFIG_CWG_D067_IMPROVED_WARP + mbmi->warpmv_with_mvd_flag = 0; +#endif // CONFIG_CWG_D067_IMPROVED_WARP #endif // CONFIG_WARP_REF_LIST rd_pick_intrabc_mode_sb(cpi, x, ctx, &this_rd_cost, bsize, INT64_MAX); @@ -8697,13 +10121,23 @@ // Make sure that the ref_mv_idx is only nonzero when we're // using a mode which can support ref_mv_idx +#if CONFIG_SEP_COMP_DRL + if ((search_state.best_mbmode.ref_mv_idx[0] != 0 || + search_state.best_mbmode.ref_mv_idx[1] != 0) && +#else if (search_state.best_mbmode.ref_mv_idx != 0 && +#endif !(have_newmv_in_each_reference(search_state.best_mbmode.mode) || #if CONFIG_JOINT_MVD is_joint_mvd_coding_mode(search_state.best_mbmode.mode) || #endif // CONFIG_JOINT_MVD have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) { +#if CONFIG_SEP_COMP_DRL + search_state.best_mbmode.ref_mv_idx[0] = 0; + search_state.best_mbmode.ref_mv_idx[1] = 0; +#else search_state.best_mbmode.ref_mv_idx = 0; +#endif } if (search_state.best_mbmode.mode == MODE_INVALID || @@ -8883,7 +10317,15 @@ mbmi->tx_size = max_txsize_lookup[bsize]; x->txfm_search_info.skip_txfm = 1; +#if CONFIG_SEP_COMP_DRL + mbmi->ref_mv_idx[0] = 0; + mbmi->ref_mv_idx[1] = 0; +#else mbmi->ref_mv_idx = 0; +#endif // CONFIG_SEP_COMP_DRL +#if CONFIG_CWP + mbmi->cwp_idx = CWP_EQUAL; +#endif // CONFIG_CWP mbmi->motion_mode = SIMPLE_TRANSLATION; #if CONFIG_FLEX_MVRES @@ -8896,6 +10338,9 @@ #if CONFIG_BAWP mbmi->bawp_flag = 0; #endif +#if CONFIG_REFINEMV + mbmi->refinemv_flag = 0; +#endif // CONFIG_REFINEMV av1_count_overlappable_neighbors(cm, xd); if (is_motion_variation_allowed_bsize(bsize, mi_row, mi_col) && @@ -9119,9 +10564,9 @@ AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1; struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above, above_stride, overlap }; - foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, - max_neighbor_obmc[mi_size_wide_log2[bsize]], - calc_target_weighted_pred_above, &ctxt); + foreach_overlappable_nb_above( + cm, (MACROBLOCKD *)xd, max_neighbor_obmc[mi_size_wide_log2[bsize]], + calc_target_weighted_pred_above, &ctxt, false); } for (int i = 0; i < bw * bh; ++i) {
diff --git a/av1/encoder/rdopt.h b/av1/encoder/rdopt.h index 17b043f..de2167f 100644 --- a/av1/encoder/rdopt.h +++ b/av1/encoder/rdopt.h
@@ -177,17 +177,38 @@ static INLINE void av1_copy_usable_ref_mv_stack_and_weight( const MACROBLOCKD *xd, MB_MODE_INFO_EXT *const mbmi_ext, MV_REFERENCE_FRAME ref_frame) { -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (xd->mi[0]->skip_mode) { memcpy(&(mbmi_ext->skip_mvp_candidate_list), &(xd->skip_mvp_candidate_list), sizeof(xd->skip_mvp_candidate_list)); return; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_SEP_COMP_DRL + if (has_second_drl(xd->mi[0])) { + MV_REFERENCE_FRAME rf[2]; + av1_set_ref_frame(rf, ref_frame); + if (rf[1] < 0) rf[1] = 0; + memcpy(mbmi_ext->weight[rf[0]], xd->weight[rf[0]], + USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0])); + memcpy(mbmi_ext->ref_mv_stack[rf[0]], xd->ref_mv_stack[rf[0]], + USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0])); + memcpy(mbmi_ext->weight[rf[1]], xd->weight[rf[1]], + USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0])); + memcpy(mbmi_ext->ref_mv_stack[rf[1]], xd->ref_mv_stack[rf[1]], + USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0])); + } else { + memcpy(mbmi_ext->weight[ref_frame], xd->weight[ref_frame], + USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0])); + memcpy(mbmi_ext->ref_mv_stack[ref_frame], xd->ref_mv_stack[ref_frame], + USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0])); + } +#else memcpy(mbmi_ext->weight[ref_frame], xd->weight[ref_frame], USABLE_REF_MV_STACK_SIZE * sizeof(xd->weight[0][0])); memcpy(mbmi_ext->ref_mv_stack[ref_frame], xd->ref_mv_stack[ref_frame], USABLE_REF_MV_STACK_SIZE * sizeof(xd->ref_mv_stack[0][0])); +#endif // CONFIG_SEP_COMP_DRL } #define PRUNE_SINGLE_REFS 0 @@ -301,26 +322,52 @@ static INLINE void av1_copy_mbmi_ext_to_mbmi_ext_frame( MB_MODE_INFO_EXT_FRAME *mbmi_ext_best, const MB_MODE_INFO_EXT *const mbmi_ext, -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SEP_COMP_DRL + MB_MODE_INFO *mbmi, +#endif // CONFIG_SEP_COMP_DRL +#if CONFIG_SKIP_MODE_ENHANCEMENT uint8_t skip_mode, -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT uint8_t ref_frame_type) { -#if CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#if CONFIG_SKIP_MODE_ENHANCEMENT if (skip_mode) { memcpy(&(mbmi_ext_best->skip_mvp_candidate_list), &(mbmi_ext->skip_mvp_candidate_list), sizeof(mbmi_ext->skip_mvp_candidate_list)); return; } -#endif // CONFIG_SKIP_MODE_DRL_WITH_REF_IDX +#endif // CONFIG_SKIP_MODE_ENHANCEMENT +#if CONFIG_SEP_COMP_DRL + MV_REFERENCE_FRAME rf[2]; + av1_set_ref_frame(rf, ref_frame_type); + if (!has_second_drl(mbmi)) + rf[0] = ref_frame_type; //????????????? need to know how encoder work, + // whether the mode has been set + memcpy(mbmi_ext_best->ref_mv_stack[0], mbmi_ext->ref_mv_stack[rf[0]], + sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE])); + memcpy(mbmi_ext_best->weight[0], mbmi_ext->weight[rf[0]], + sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE])); + mbmi_ext_best->ref_mv_count[0] = mbmi_ext->ref_mv_count[rf[0]]; + + if (has_second_drl(mbmi)) { + assert(rf[0] == mbmi->ref_frame[0]); + assert(rf[1] == mbmi->ref_frame[1]); + memcpy(mbmi_ext_best->ref_mv_stack[1], mbmi_ext->ref_mv_stack[rf[1]], + sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE])); + memcpy(mbmi_ext_best->weight[1], mbmi_ext->weight[rf[1]], + sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE])); + mbmi_ext_best->ref_mv_count[1] = mbmi_ext->ref_mv_count[rf[1]]; + } +#else memcpy(mbmi_ext_best->ref_mv_stack, mbmi_ext->ref_mv_stack[ref_frame_type], sizeof(mbmi_ext->ref_mv_stack[USABLE_REF_MV_STACK_SIZE])); memcpy(mbmi_ext_best->weight, mbmi_ext->weight[ref_frame_type], sizeof(mbmi_ext->weight[USABLE_REF_MV_STACK_SIZE])); - mbmi_ext_best->mode_context = mbmi_ext->mode_context[ref_frame_type]; mbmi_ext_best->ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type]; +#endif // CONFIG_SEP_COMP_DRL + mbmi_ext_best->mode_context = mbmi_ext->mode_context[ref_frame_type]; memcpy(mbmi_ext_best->global_mvs, mbmi_ext->global_mvs, sizeof(mbmi_ext->global_mvs));
diff --git a/av1/encoder/rdopt_utils.h b/av1/encoder/rdopt_utils.h index de13e91..b3861c8 100644 --- a/av1/encoder/rdopt_utils.h +++ b/av1/encoder/rdopt_utils.h
@@ -459,10 +459,19 @@ winner_mode_stats[mode_idx].rd_cost = *rd_cost; if (txfm_search_done) { +#if CONFIG_SKIP_TXFM_OPT + winner_mode_stats[mode_idx].rate_y = + rd_cost_y->rate + + (!is_intra_mode + ? x->mode_costs + .skip_txfm_cost[skip_ctx][rd_cost->skip_txfm || skip_txfm] + : 0); +#else winner_mode_stats[mode_idx].rate_y = rd_cost_y->rate + x->mode_costs .skip_txfm_cost[skip_ctx][rd_cost->skip_txfm || skip_txfm]; +#endif // CONFIG_SKIP_TXFM_OPT winner_mode_stats[mode_idx].rate_uv = rd_cost_uv->rate; } }
diff --git a/av1/encoder/reconinter_enc.c b/av1/encoder/reconinter_enc.c index 32bb328..655fcec 100644 --- a/av1/encoder/reconinter_enc.c +++ b/av1/encoder/reconinter_enc.c
@@ -38,6 +38,19 @@ uint16_t **mc_buf, uint16_t **pre, SubpelParams *subpel_params, int *src_stride) { + +#if CONFIG_REFINEMV + if (inter_pred_params->use_ref_padding) { + common_calc_subpel_params_and_extend( + src_mv, inter_pred_params, xd, mi_x, mi_y, ref, +#if CONFIG_OPTFLOW_REFINEMENT + use_optflow_refinement, +#endif // CONFIG_OPTFLOW_REFINEMENT + mc_buf, pre, subpel_params, src_stride); + return; + } +#endif // CONFIG_REFINEMV + // These are part of the function signature to use this function through a // function pointer. See typedef of 'CalcSubpelParamsFunc'. (void)xd; @@ -87,12 +100,47 @@ subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK; subpel_params->xs = sf->x_step_q4; subpel_params->ys = sf->y_step_q4; + +#if CONFIG_D071_IMP_MSK_BLD + if (inter_pred_params->border_data.enable_bacp) { + // Get reference block top left coordinate. + subpel_params->x0 = pos_x >> SCALE_SUBPEL_BITS; + subpel_params->y0 = pos_y >> SCALE_SUBPEL_BITS; + // Get reference block bottom right coordinate. + subpel_params->x1 = + ((pos_x + (inter_pred_params->block_width - 1) * subpel_params->xs) >> + SCALE_SUBPEL_BITS) + + 1; + subpel_params->y1 = ((pos_y + (inter_pred_params->block_height - 1) * + subpel_params->ys) >> + SCALE_SUBPEL_BITS) + + 1; + } +#endif // CONFIG_D071_IMP_MSK_BLD + *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride + (pos_x >> SCALE_SUBPEL_BITS); #if CONFIG_OPTFLOW_REFINEMENT || CONFIG_EXT_RECUR_PARTITIONS } else { int pos_x = inter_pred_params->pix_col << SUBPEL_BITS; int pos_y = inter_pred_params->pix_row << SUBPEL_BITS; + +#if CONFIG_REFINEMV +#if CONFIG_OPTFLOW_REFINEMENT + const int bw = inter_pred_params->original_pu_width; + const int bh = inter_pred_params->original_pu_height; + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, src_mv, bw, bh, use_optflow_refinement, + inter_pred_params->subsampling_x, inter_pred_params->subsampling_y); +#else + const int bw = inter_pred_params->original_pu_width; + const int bh = inter_pred_params->original_pu_height; + const MV mv_q4 = clamp_mv_to_umv_border_sb( + xd, src_mv, bw, bh, inter_pred_params->subsampling_x, + inter_pred_params->subsampling_y); +#endif // CONFIG_OPTFLOW_REFINEMENT + +#else #if CONFIG_OPTFLOW_REFINEMENT const int bw = use_optflow_refinement ? inter_pred_params->orig_block_width : inter_pred_params->block_width; @@ -108,11 +156,25 @@ xd, src_mv, bw, bh, inter_pred_params->subsampling_x, inter_pred_params->subsampling_y); #endif // CONFIG_OPTFLOW_REFINEMENT +#endif // CONFIG_REFINEMV + subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS; subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; pos_x += mv_q4.col; pos_y += mv_q4.row; +#if CONFIG_D071_IMP_MSK_BLD + if (inter_pred_params->border_data.enable_bacp) { + subpel_params->x0 = pos_x >> SUBPEL_BITS; + subpel_params->y0 = pos_y >> SUBPEL_BITS; + + // Get reference block bottom right coordinate. + subpel_params->x1 = + (pos_x >> SUBPEL_BITS) + (inter_pred_params->block_width - 1) + 1; + subpel_params->y1 = + (pos_y >> SUBPEL_BITS) + (inter_pred_params->block_height - 1) + 1; + } +#endif // CONFIG_D071_IMP_MSK_BLD *pre = pre_buf->buf0 + (pos_y >> SUBPEL_BITS) * pre_buf->stride + (pos_x >> SUBPEL_BITS); } @@ -128,18 +190,26 @@ 0 /* mi_y */, 0 /* ref */, NULL /* mc_buf */, enc_calc_subpel_params); } -static void enc_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, - int plane, MB_MODE_INFO *mi, +void enc_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, + int plane, MB_MODE_INFO *mi, #if CONFIG_BAWP - const BUFFER_SET *ctx, + const BUFFER_SET *ctx, #endif - int bw, int bh, int mi_x, int mi_y) { +#if CONFIG_REFINEMV + int build_for_refine_mv_only, +#endif // CONFIG_REFINEMV + int bw, int bh, int mi_x, int mi_y) { av1_build_inter_predictors(cm, xd, plane, mi, #if CONFIG_BAWP ctx, #endif +#if CONFIG_REFINEMV + build_for_refine_mv_only, +#endif // CONFIG_REFINEMV 0 /* build_for_obmc */, bw, bh, mi_x, mi_y, - NULL /* mc_buf */, enc_calc_subpel_params); + NULL /* mc_buf */, + + enc_calc_subpel_params); } void av1_enc_build_inter_predictor_y(MACROBLOCKD *xd, int mi_row, int mi_col) { @@ -168,17 +238,60 @@ int mi_row, int mi_col, const BUFFER_SET *ctx, BLOCK_SIZE bsize, int plane_from, int plane_to) { +#if CONFIG_REFINEMV + MB_MODE_INFO *mbmi = xd->mi[0]; + + int is_refinemv_supported = + mbmi->refinemv_flag && !is_intrabc_block(mbmi, xd->tree_type); + + int need_chroma_dmvr = xd->is_chroma_ref && + (plane_from != 0 || plane_to != 0) && + is_refinemv_supported; + assert(IMPLIES(need_chroma_dmvr, !is_interintra_pred(mbmi))); + + if (need_chroma_dmvr && default_refinemv_modes(mbmi)) + need_chroma_dmvr &= (mbmi->comp_group_idx == 0 && + mbmi->interinter_comp.type == COMPOUND_AVERAGE); + + if (need_chroma_dmvr) { + fill_subblock_refine_mv(xd->refinemv_subinfo, xd->plane[0].width, + xd->plane[0].height, mbmi->mv[0].as_mv, + mbmi->mv[1].as_mv); + + // if luma build is not available, we need to get refinemv based on luma + // need to search DMVR here based on luma plane + if (plane_from != 0) { +#if CONFIG_BAWP + enc_build_inter_predictors(cm, xd, 0, xd->mi[0], ctx, 1, + xd->plane[0].width, xd->plane[0].height, + mi_col * MI_SIZE, mi_row * MI_SIZE); +#else + enc_build_inter_predictors(cm, xd, 0, xd->mi[0], 1, xd->plane[0].width, + xd->plane[0].height, mi_col * MI_SIZE, + mi_row * MI_SIZE); +#endif + } + } +#endif // CONFIG_REFINEMV + for (int plane = plane_from; plane <= plane_to; ++plane) { if (plane && !xd->is_chroma_ref) break; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; #if CONFIG_BAWP enc_build_inter_predictors(cm, xd, plane, xd->mi[0], ctx, +#if CONFIG_REFINEMV + 0, +#endif // CONFIG_REFINEMV xd->plane[plane].width, xd->plane[plane].height, mi_x, mi_y); #else - enc_build_inter_predictors(cm, xd, plane, xd->mi[0], xd->plane[plane].width, - xd->plane[plane].height, mi_x, mi_y); + enc_build_inter_predictors(cm, xd, plane, xd->mi[0], +#if CONFIG_REFINEMV + 0, +#endif // CONFIG_REFINEMV + xd->plane[plane].width, xd->plane[plane].height, + mi_x, mi_y); #endif if (is_interintra_pred(xd->mi[0])) { @@ -285,7 +398,7 @@ BLOCK_SIZE bsize = xd->mi[0]->sb_type[PLANE_TYPE_Y]; foreach_overlappable_nb_above(cm, xd, max_neighbor_obmc[mi_size_wide_log2[bsize]], - build_obmc_prediction, &ctxt); + build_obmc_prediction, &ctxt, false); } void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd,
diff --git a/av1/encoder/reconinter_enc.h b/av1/encoder/reconinter_enc.h index 2309f16..5f3de5f 100644 --- a/av1/encoder/reconinter_enc.h +++ b/av1/encoder/reconinter_enc.h
@@ -34,6 +34,16 @@ void av1_enc_build_inter_predictor_y(MACROBLOCKD *xd, int mi_row, int mi_col); +void enc_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, + int plane, MB_MODE_INFO *mi, +#if CONFIG_BAWP + const BUFFER_SET *ctx, +#endif +#if CONFIG_REFINEMV + int build_for_refine_mv_only, +#endif // CONFIG_REFINEMV + int bw, int bh, int mi_x, int mi_y); + // Build one inter predictor. It is called for building predictor for single // reference case, or just the 1st or 2nd reference in compound reference case. // Can build both regular and masked predictors.
diff --git a/av1/encoder/segmentation.c b/av1/encoder/segmentation.c index cabf66e..7da61f4 100644 --- a/av1/encoder/segmentation.c +++ b/av1/encoder/segmentation.c
@@ -100,6 +100,9 @@ const int bw = mi_size_wide[bsize], bh = mi_size_high[bsize]; const int hbw = bw / 2, hbh = bh / 2; const int qbw = bw / 4, qbh = bh / 4; +#if CONFIG_UNEVEN_4WAY + const int ebw = bw / 8, ebh = bh / 8; +#endif // CONFIG_UNEVEN_4WAY #else const int bs = mi_size_wide[bsize], hbs = bs / 2; const int qbs = bs / 4; @@ -142,21 +145,55 @@ case PARTITION_HORZ_3: CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]); CSEGS_RECURSIVE(qbh, 0, ptree->sub_tree[tree_idx++]); -#if CONFIG_H_PARTITION CSEGS_RECURSIVE(qbh, hbw, ptree->sub_tree[tree_idx++]); -#endif // CONFIG_H_PARTITION if (mi_row + 3 * qbh < mi_params->mi_rows) CSEGS_RECURSIVE(3 * qbh, 0, ptree->sub_tree[tree_idx++]); break; case PARTITION_VERT_3: CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]); CSEGS_RECURSIVE(0, qbw, ptree->sub_tree[tree_idx++]); -#if CONFIG_H_PARTITION CSEGS_RECURSIVE(hbh, qbw, ptree->sub_tree[tree_idx++]); -#endif // CONFIG_H_PARTITION if (mi_col + 3 * qbw < mi_params->mi_cols) CSEGS_RECURSIVE(0, 3 * qbw, ptree->sub_tree[tree_idx++]); break; +#if CONFIG_UNEVEN_4WAY + case PARTITION_HORZ_4A: + CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]); + if (mi_row + ebh < mi_params->mi_rows) + CSEGS_RECURSIVE(ebh, 0, ptree->sub_tree[tree_idx++]); + if (mi_row + 3 * ebh < mi_params->mi_rows) + CSEGS_RECURSIVE(3 * ebh, 0, ptree->sub_tree[tree_idx++]); + if (mi_row + 7 * ebh < mi_params->mi_rows) + CSEGS_RECURSIVE(7 * ebh, 0, ptree->sub_tree[tree_idx++]); + break; + case PARTITION_HORZ_4B: + CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]); + if (mi_row + ebh < mi_params->mi_rows) + CSEGS_RECURSIVE(ebh, 0, ptree->sub_tree[tree_idx++]); + if (mi_row + 5 * ebh < mi_params->mi_rows) + CSEGS_RECURSIVE(5 * ebh, 0, ptree->sub_tree[tree_idx++]); + if (mi_row + 7 * ebh < mi_params->mi_rows) + CSEGS_RECURSIVE(7 * ebh, 0, ptree->sub_tree[tree_idx++]); + break; + case PARTITION_VERT_4A: + CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]); + if (mi_col + ebw < mi_params->mi_cols) + CSEGS_RECURSIVE(0, ebw, ptree->sub_tree[tree_idx++]); + if (mi_col + 3 * ebw < mi_params->mi_cols) + CSEGS_RECURSIVE(0, 3 * ebw, ptree->sub_tree[tree_idx++]); + if (mi_col + 7 * ebw < mi_params->mi_cols) + CSEGS_RECURSIVE(0, 7 * ebw, ptree->sub_tree[tree_idx++]); + break; + case PARTITION_VERT_4B: + CSEGS_RECURSIVE(0, 0, ptree->sub_tree[tree_idx++]); + if (mi_col + ebw < mi_params->mi_cols) + CSEGS_RECURSIVE(0, ebw, ptree->sub_tree[tree_idx++]); + if (mi_col + 5 * ebw < mi_params->mi_cols) + CSEGS_RECURSIVE(0, 5 * ebw, ptree->sub_tree[tree_idx++]); + if (mi_col + 7 * ebw < mi_params->mi_cols) + CSEGS_RECURSIVE(0, 7 * ebw, ptree->sub_tree[tree_idx++]); + break; +#endif // CONFIG_UNEVEN_4WAY #else // CONFIG_EXT_RECUR_PARTITIONS case PARTITION_NONE: CSEGS(bs, bs, 0, 0); break; case PARTITION_HORZ:
diff --git a/av1/encoder/speed_features.c b/av1/encoder/speed_features.c index 93c839a..fa88d44 100644 --- a/av1/encoder/speed_features.c +++ b/av1/encoder/speed_features.c
@@ -123,16 +123,6 @@ { 1, 1, 0 }, { 1, 1, 1 } }; -// This table holds the maximum number of reference frames for global motion. -// The table is indexed as per the speed feature 'gm_search_type'. -// 0 : All reference frames are allowed. -// 1 : All reference frames except L2 and L3 are allowed. -// 2 : All reference frames except L2, L3 and ARF2 are allowed. -// 3 : No reference frame is allowed. -static int gm_available_reference_frames[GM_DISABLE_SEARCH + 1] = { - INTER_REFS_PER_FRAME, INTER_REFS_PER_FRAME - 2, INTER_REFS_PER_FRAME - 3, 0 -}; - // Intra only frames, golden frames (except alt ref overlays) and // alt ref frames tend to be coded at a higher than ambient quality static int frame_is_boosted(const AV1_COMP *cpi) { @@ -353,8 +343,9 @@ #endif // Speed 0 for all speed features that give neutral coding performance change. - sf->gm_sf.gm_disable_recode = 1; - sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_LEV2; + sf->gm_sf.max_ref_frames = boosted ? 4 : 2; + sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1; + sf->gm_sf.disable_gm_search_based_on_stats = 1; sf->part_sf.less_rectangular_check_level = 1; #if CONFIG_EXT_RECUR_PARTITIONS @@ -410,9 +401,6 @@ sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL; if (speed >= 1) { - sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_LEV3; - sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1; - #if CONFIG_EXT_RECUR_PARTITIONS sf->part_sf.intra_cnn_split = 0; #else // CONFIG_EXT_RECUR_PARTITIONS @@ -514,7 +502,7 @@ sf->hl_sf.high_precision_mv_usage = CURRENT_Q; sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF; - sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH; + sf->gm_sf.max_ref_frames = 0; sf->part_sf.less_rectangular_check_level = 2; sf->part_sf.simple_motion_search_prune_agg = 1; @@ -715,10 +703,9 @@ } static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) { - gm_sf->selective_ref_gm = 1; - gm_sf->gm_search_type = GM_FULL_SEARCH; - gm_sf->gm_disable_recode = 0; + gm_sf->max_ref_frames = INTER_REFS_PER_FRAME; gm_sf->prune_ref_frame_for_gm_search = 0; + gm_sf->disable_gm_search_based_on_stats = 0; } static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) { @@ -759,13 +746,19 @@ part_sf->early_term_after_none_split = 0; #if CONFIG_EXT_RECUR_PARTITIONS part_sf->prune_rect_with_none_rd = 0; - part_sf->prune_part_3_with_part_none = 0; - part_sf->prune_part_3_with_part_rect = 0; + part_sf->prune_ext_part_with_part_none = 0; + part_sf->prune_ext_part_with_part_rect = 0; +#if CONFIG_UNEVEN_4WAY + part_sf->prune_part_4_with_partition_boundary = 0; + part_sf->prune_part_4_horz_or_vert = 0; + part_sf->prune_part_4_with_part_3 = 0; +#endif // CONFIG_UNEVEN_4WAY part_sf->two_pass_partition_search = 0; part_sf->prune_rect_with_ml = 0; part_sf->end_part_search_after_consec_failures = 0; part_sf->ext_recur_depth = INT_MAX; part_sf->prune_rect_with_split_depth = 0; + part_sf->prune_part_h_with_partition_boundary = 0; #endif // CONFIG_EXT_RECUR_PARTITIONS } @@ -853,6 +846,10 @@ #if CONFIG_EXT_RECUR_PARTITIONS inter_sf->reuse_erp_mode_flag = 0; #endif // CONFIG_EXT_RECUR_PARTITIONS + +#if CONFIG_CWG_D067_IMPROVED_WARP + inter_sf->prune_warpmv_prob_thresh = 32; +#endif // CONFIG_CWG_D067_IMPROVED_WARP } static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) { @@ -1053,17 +1050,22 @@ sf->part_sf.simple_motion_search_early_term_none = 1; AOM_FALLTHROUGH_INTENDED; case 5: + sf->part_sf.prune_part_h_with_partition_boundary = true; + sf->part_sf.adaptive_partition_search_order = true; sf->tx_sf.use_largest_tx_size_for_small_bsize = true; // TODO(chiyotsai@google.com): This speed feature causes large regression // on b2 testset. Disable this for now until we figure out how to avoid // the loss. // sf->part_sf.end_part_search_after_consec_failures = 1; AOM_FALLTHROUGH_INTENDED; - case 4: - sf->part_sf.prune_part_3_with_part_rect = 1; + case 4: sf->part_sf.prune_ext_part_with_part_rect = 1; +#if CONFIG_UNEVEN_4WAY + sf->part_sf.prune_part_4_horz_or_vert = 1; + sf->part_sf.prune_part_4_with_part_3 = 1; +#endif // CONFIG_UNEVEN_4WAY AOM_FALLTHROUGH_INTENDED; case 3: - sf->part_sf.prune_part_3_with_part_none = 1; + sf->part_sf.prune_ext_part_with_part_none = 1; AOM_FALLTHROUGH_INTENDED; case 2: sf->inter_sf.prune_ref_frame_for_rect_partitions = @@ -1249,9 +1251,8 @@ // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve // better parallelism when number of threads available are greater than or // equal to maximum number of reference frames allowed for global motion. - if (sf->gm_sf.gm_search_type != GM_DISABLE_SEARCH && - (cpi->oxcf.max_threads >= - gm_available_reference_frames[sf->gm_sf.gm_search_type])) + if (sf->gm_sf.max_ref_frames > 0 && + cpi->oxcf.max_threads >= sf->gm_sf.max_ref_frames) sf->gm_sf.prune_ref_frame_for_gm_search = 0; } }
diff --git a/av1/encoder/speed_features.h b/av1/encoder/speed_features.h index 61e21d1..e3a6391 100644 --- a/av1/encoder/speed_features.h +++ b/av1/encoder/speed_features.h
@@ -33,13 +33,6 @@ } MESH_PATTERN; enum { - GM_FULL_SEARCH, - GM_REDUCED_REF_SEARCH_SKIP_LEV2, - GM_REDUCED_REF_SEARCH_SKIP_LEV3, - GM_DISABLE_SEARCH -} UENUM1BYTE(GM_SEARCH_TYPE); - -enum { INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) | (1 << D135_PRED) | (1 << D113_PRED) | (1 << D157_PRED) | (1 << D203_PRED) | (1 << D67_PRED) | (1 << SMOOTH_PRED) | @@ -369,20 +362,16 @@ } TPL_SPEED_FEATURES; typedef struct GLOBAL_MOTION_SPEED_FEATURES { - // Do not compute the global motion parameters for a LAST2_FRAME or - // LAST3_FRAME if the GOLDEN_FRAME is closer and it has a non identity - // global model. - int selective_ref_gm; - - GM_SEARCH_TYPE gm_search_type; - - // whether to disable the global motion recode loop - int gm_disable_recode; + int max_ref_frames; // During global motion estimation, prune remaining reference frames in a // given direction(past/future), if the evaluated ref_frame in that direction // yields gm_type as INVALID/TRANSLATION/IDENTITY int prune_ref_frame_for_gm_search; + + // Disable global motion estimation based on stats of previous frames in the + // GF group + int disable_gm_search_based_on_stats; } GLOBAL_MOTION_SPEED_FEATURES; typedef struct PARTITION_SPEED_FEATURES { @@ -496,11 +485,23 @@ // Prunes PARTITION_3 if PARTITION_NONE is used instead of PARTITION_HORZ|VERT int prune_rect_with_none_rd; - // Prunes PARTITION_3 if PARTITION_NONE is used instead of PARTITION_HORZ|VERT - int prune_part_3_with_part_none; + // Prunes extended partitions if PARTITION_NONE is used instead of + // PARTITION_HORZ|VERT. + int prune_ext_part_with_part_none; - // Prunes PARTITION_3 partition 3 doesn't split in the same direction - int prune_part_3_with_part_rect; + // Prunes extended partitions if rect sub-partitions don't further split in + // the same direction. + int prune_ext_part_with_part_rect; + +#if CONFIG_UNEVEN_4WAY + // Prunes PARTITION_HORZ_4A/4B if vertical is the best partition, and + // Prunes PARTITION_VERT_4A/4B if horizontal is the best partition. + int prune_part_4_horz_or_vert; + + // Prunes PARTITION_HORZ_4A/4B based on PARTITION_HORZ_3 search result, and + // Prunes PARTITION_VERT_4A/4B based on PARTITION_VERT_3 search result. + int prune_part_4_with_part_3; +#endif // CONFIG_UNEVEN_4WAY int two_pass_partition_search; @@ -516,6 +517,21 @@ // Prune rect partitions if PARTITION_SPLIT goes deep. int prune_rect_with_split_depth; + + // Search horizontal and vertical split before PARTITION_NONE if the neighbor + // blocks are much smaller than the current block size. + int adaptive_partition_search_order; + + // Prune h partition types if their resulting boundary does not agree with + // the current best partition's boundary after searching NONE, HORZ, and VERT. + int prune_part_h_with_partition_boundary; + +#if CONFIG_UNEVEN_4WAY + // Prune r-way partition types if their resulting boundary does not agree with + // the current best partition's boundary after searching NONE, HORZ, VERT, and + // H-parts. + int prune_part_4_with_partition_boundary; +#endif // CONFIG_UNEVEN_4WAY #endif // CONFIG_EXT_RECUR_PARTITIONS } PARTITION_SPEED_FEATURES; @@ -725,6 +741,11 @@ // Prune warped motion search using previous frame stats. int prune_warped_prob_thresh; +#if CONFIG_CWG_D067_IMPROVED_WARP + // Prune warpmv with mvd search using previous frame stats. + int prune_warpmv_prob_thresh; +#endif // CONFIG_CWG_D067_IMPROVED_WARP + // Enable/disable interintra wedge search. int disable_wedge_interintra_search;
diff --git a/av1/encoder/subgop.c b/av1/encoder/subgop.c index 025fb82..9240d6c 100644 --- a/av1/encoder/subgop.c +++ b/av1/encoder/subgop.c
@@ -195,9 +195,15 @@ switch (code) { case FRAME_TYPE_INO_VISIBLE: case FRAME_TYPE_INO_REPEAT: +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + case FRAME_TYPE_OOO_UNFILTERED: return 1; + case FRAME_TYPE_INO_SHOWEXISTING: + case FRAME_TYPE_OOO_FILTERED: return 0; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT case FRAME_TYPE_INO_SHOWEXISTING: return 1; case FRAME_TYPE_OOO_FILTERED: case FRAME_TYPE_OOO_UNFILTERED: return 0; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT default: assert(0 && "Invalid frame type code"); return 0; } } @@ -211,9 +217,24 @@ // Each disp frame index must be shown exactly once and in ascending order int last_visible = 0; for (int s = 0; s < config->num_steps; ++s) { +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (config->step[s].type_code == FRAME_TYPE_INO_VISIBLE || + config->step[s].type_code == FRAME_TYPE_INO_REPEAT) { + int updated_last_visible = config->step[s].disp_frame_idx; + do { + last_visible = updated_last_visible; + for (int k = 0; k < s; ++k) { + if (is_visible(config->step[k].type_code) && + config->step[k].disp_frame_idx == last_visible + 1) { + updated_last_visible = config->step[k].disp_frame_idx; + } + } + } while (last_visible != updated_last_visible); +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT if (is_visible(config->step[s].type_code)) { if (config->step[s].disp_frame_idx != last_visible + 1) return 0; last_visible = config->step[s].disp_frame_idx; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } } if (last_visible != config->num_frames) return 0;
diff --git a/av1/encoder/temporal_filter.c b/av1/encoder/temporal_filter.c index f00c3e2..0e7fdce 100644 --- a/av1/encoder/temporal_filter.c +++ b/av1/encoder/temporal_filter.c
@@ -98,7 +98,7 @@ // Save input state. #if CONFIG_FLEX_MVRES const AV1_COMMON *cm = &cpi->common; -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 0; #endif #endif @@ -153,7 +153,7 @@ &baseline_mv, #if CONFIG_FLEX_MVRES pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif #endif @@ -226,7 +226,7 @@ subblock_size, &baseline_mv, #if CONFIG_FLEX_MVRES pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif #endif @@ -1111,9 +1111,19 @@ // Set showable frame. if (filter_frame_lookahead_idx >= 0) { +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // When enable_frame_output_order == 1, it is intended to set showable_frame + // to one only for the coded frames to be outputted. When enable_overlay == + // 1, showable_frame of the filtered frame is set to zero by default. + cpi->common.showable_frame = + (!cpi->oxcf.ref_frm_cfg.enable_frame_output_order && + (num_frames_for_filtering == 1 || is_second_arf)) || + cpi->oxcf.ref_frm_cfg.enable_frame_output_order || +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT cpi->common.showable_frame = num_frames_for_filtering == 1 || is_second_arf || - (cpi->oxcf.algo_cfg.enable_overlay == 0); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + (cpi->oxcf.algo_cfg.enable_overlay == 0); } // Do filtering.
diff --git a/av1/encoder/tokenize.c b/av1/encoder/tokenize.c index 8f95c72..221054e 100644 --- a/av1/encoder/tokenize.c +++ b/av1/encoder/tokenize.c
@@ -28,7 +28,7 @@ #include "av1/encoder/rdopt.h" #include "av1/encoder/tokenize.h" -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS static int cost_and_tokenize_map(Av1ColorMapParam *param, TokenExtra **t, int plane, int calc_rate, int allow_update_cdf, FRAME_COUNTS *counts, MapCdf map_pb_cdf, @@ -157,7 +157,7 @@ if (calc_rate) return this_rate; return 0; } -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS static void get_palette_params(const MACROBLOCK *const x, int plane, BLOCK_SIZE bsize, Av1ColorMapParam *params) { @@ -167,12 +167,12 @@ params->color_map = xd->plane[plane].color_index_map; params->map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf : xd->tile_ctx->palette_y_color_index_cdf; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS params->identity_row_cdf = plane ? xd->tile_ctx->identity_row_cdf_uv : xd->tile_ctx->identity_row_cdf_y; params->identity_row_cost = plane ? &x->mode_costs.palette_uv_row_flag_cost : &x->mode_costs.palette_y_row_flag_cost; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS params->color_cost = plane ? &x->mode_costs.palette_uv_color_cost : &x->mode_costs.palette_y_color_cost; params->n_colors = pmi->palette_size[plane]; @@ -200,7 +200,7 @@ get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params); MapCdf map_pb_cdf = plane ? x->tile_pb_ctx->palette_uv_color_index_cdf : x->tile_pb_ctx->palette_y_color_index_cdf; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS IdentityRowCdf eq_row_pb_cdf = plane ? x->tile_pb_ctx->identity_row_cdf_uv : x->tile_pb_ctx->identity_row_cdf_y; return cost_and_tokenize_map(&color_map_params, NULL, plane, 1, 0, NULL, @@ -208,7 +208,7 @@ #else return cost_and_tokenize_map(&color_map_params, NULL, plane, 1, 0, NULL, map_pb_cdf); -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS } void av1_tokenize_color_map(const MACROBLOCK *const x, int plane, @@ -218,7 +218,7 @@ assert(plane == 0 || plane == 1); Av1ColorMapParam color_map_params; get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params); -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS MapCdf map_pb_cdf = plane ? x->tile_pb_ctx->palette_uv_color_index_cdf : x->tile_pb_ctx->palette_y_color_index_cdf; IdentityRowCdf eq_row_pb_cdf = plane ? x->tile_pb_ctx->identity_row_cdf_uv @@ -234,7 +234,7 @@ : x->tile_pb_ctx->palette_y_color_index_cdf; cost_and_tokenize_map(&color_map_params, t, plane, 0, allow_update_cdf, counts, map_pb_cdf); -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS } static void tokenize_vartx(ThreadData *td, TX_SIZE tx_size,
diff --git a/av1/encoder/tokenize.h b/av1/encoder/tokenize.h index 675b1f1..b886ea6 100644 --- a/av1/encoder/tokenize.h +++ b/av1/encoder/tokenize.h
@@ -24,10 +24,10 @@ typedef struct { aom_cdf_prob *color_map_cdf; uint8_t token; -#if CONFIG_NEW_COLOR_MAP_CODING +#if CONFIG_PALETTE_IMPROVEMENTS aom_cdf_prob *identity_row_cdf; uint8_t identity_row_flag; -#endif // CONFIG_NEW_COLOR_MAP_CODING +#endif // CONFIG_PALETTE_IMPROVEMENTS } TokenExtra; typedef struct {
diff --git a/av1/encoder/tpl_model.c b/av1/encoder/tpl_model.c index f91ed5b..a09040c 100644 --- a/av1/encoder/tpl_model.c +++ b/av1/encoder/tpl_model.c
@@ -156,7 +156,7 @@ #if CONFIG_FLEX_MVRES const MvSubpelPrecision pb_mv_precision = cm->features.fr_mv_precision; full_pel_lower_mv_precision(&start_mv, pb_mv_precision); -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT const int is_ibc_cost = 0; #endif #endif @@ -165,7 +165,7 @@ av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, ¢er_mv, #if CONFIG_FLEX_MVRES pb_mv_precision, -#if CONFIG_BVCOST_UPDATE +#if CONFIG_IBC_BV_IMPROVEMENT is_ibc_cost, #endif #endif
diff --git a/av1/encoder/tx_search.c b/av1/encoder/tx_search.c index fe75770..0f8f819 100644 --- a/av1/encoder/tx_search.c +++ b/av1/encoder/tx_search.c
@@ -1192,6 +1192,12 @@ best_tx_type != DCT_DCT) { update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT); } +#if CONFIG_ATC_DCTX_ALIGNED + if (plane == 0 && x->plane[plane].eobs[block] == 1 && + best_tx_type != DCT_DCT && !is_inter) { + update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT); + } +#endif // CONFIG_ATC_DCTX_ALIGNED } } @@ -1357,10 +1363,10 @@ static uint32_t get_intra_txb_hash(MACROBLOCK *x, int plane, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC , PREDICTION_MODE intra_dir -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC ) { int16_t tmp_data[64 * 64]; const int diff_stride = block_size_wide[plane_bsize]; @@ -1380,11 +1386,11 @@ } CRC32C *crc = &x->txfm_search_info.mb_rd_record.crc_calculator; const uint32_t hash = av1_get_crc32c_value(crc, hash_data, 2 * txb_w * txb_h); -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC return (hash << 9) + (tx_size << 4) + (intra_dir); #else return (hash << 5) + tx_size; -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC } // pruning thresholds for prune_txk_type and prune_txk_type_separ @@ -1404,7 +1410,7 @@ frame_is_intra_only(&cpi->common) && !is_inter_block(xd->mi[0], xd->tree_type) && plane == 0 && tx_size_wide[tx_size] == tx_size_high[tx_size]); -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC MB_MODE_INFO *mbmi = xd->mi[0]; PREDICTION_MODE intra_dir; if (mbmi->filter_intra_mode_info.use_filter_intra) @@ -1417,7 +1423,7 @@ #else const uint32_t intra_hash = get_intra_txb_hash(x, plane, blk_row, blk_col, plane_bsize, tx_size); -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC const int intra_hash_idx = find_tx_size_rd_info(&txfm_info->txb_rd_record_intra, intra_hash); *intra_txb_rd_info = @@ -2087,14 +2093,14 @@ const AV1_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = xd->mi[0]; -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC PREDICTION_MODE intra_dir; if (mbmi->filter_intra_mode_info.use_filter_intra) intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode]; else intra_dir = mbmi->mode; -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC const TxfmSearchParams *txfm_params = &x->txfm_search_params; const int is_inter = is_inter_block(mbmi, xd->tree_type); const int fast_tx_search = ftxs_mode & FTXS_DCT_AND_1D_DCT_ONLY; @@ -2120,12 +2126,12 @@ av1_get_tx_type(xd, get_plane_type(plane), blk_row, blk_col, tx_size, cm->features.reduced_tx_set_used); } -#if !CONFIG_ATC_NEWTXSETS +#if !CONFIG_ATC PREDICTION_MODE intra_dir = mbmi->filter_intra_mode_info.use_filter_intra ? fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode] : mbmi->mode; -#endif // !CONFIG_ATC_NEWTXSETS +#endif // !CONFIG_ATC uint16_t ext_tx_used_flag = cpi->sf.tx_sf.tx_type_search.use_reduced_intra_txset && tx_set_type == EXT_TX_SET_DTT4_IDTX_1DDCT @@ -2138,7 +2144,7 @@ txk_allowed = DCT_DCT; } -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC if (!is_inter) { uint16_t mdtx_mask = av1_md_trfm_used_flag[av1_size_class[tx_size]] @@ -2150,6 +2156,9 @@ (1 << DCT_DCT) | (1 << ADST_ADST); // DCT_DCT, ADST_ADST } #endif // CONFIG_ATC_REDUCED_TXSET +#if CONFIG_ATC_DCTX_ALIGNED + if (txsize_sqr_up_map[tx_size] == TX_32X32) ext_tx_used_flag |= (1 << IDTX); +#endif // CONFIG_ATC_DCTX_ALIGNED } #if CONFIG_ATC_REDUCED_TXSET else { @@ -2158,7 +2167,7 @@ } } #endif // CONFIG_ATC_REDUCED_TXSET -#endif // CONFIG_ATC_NEWTXSETS +#endif // CONFIG_ATC if (cpi->oxcf.txfm_cfg.enable_flip_idtx == 0) ext_tx_used_flag &= DCT_ADST_TX_MASK; @@ -2207,6 +2216,7 @@ if (plane) { const CctxType cctx_type = av1_get_cctx_type(xd, blk_row, blk_col); assert(cctx_type == CCTX_NONE); + (void)cctx_type; } #endif // CONFIG_DEBUG && CONFIG_CROSS_CHROMA_TX @@ -2242,7 +2252,12 @@ } if (mbmi->fsc_mode[xd->tree_type == CHROMA_PART] && - txsize_sqr_up_map[tx_size] < TX_32X32 && plane == PLANE_TYPE_Y) { +#if CONFIG_ATC_DCTX_ALIGNED + txsize_sqr_up_map[tx_size] <= TX_32X32 +#else + txsize_sqr_up_map[tx_size] < TX_32X32 +#endif // CONFIG_ATC_DCTX_ALIGNED + && plane == PLANE_TYPE_Y) { txk_allowed = IDTX; allowed_tx_mask = (1 << txk_allowed); } @@ -2381,6 +2396,9 @@ best_rd_stats->skip_txfm = 1; x->plane[plane].eobs[block] = 0; +#if CONFIG_ATC_DCTX_ALIGNED + x->plane[plane].bobs[block] = 0; +#endif // CONFIG_ATC_DCTX_ALIGNED *block_sse = ROUND_POWER_OF_TWO((*block_sse), (xd->bd - 8) * 2); @@ -2485,6 +2503,9 @@ best_rd_stats->sse = intra_txb_rd_info->sse; best_rd_stats->skip_txfm = intra_txb_rd_info->eob == 0; x->plane[plane].eobs[block] = intra_txb_rd_info->eob; +#if CONFIG_ATC_DCTX_ALIGNED + x->plane[plane].bobs[block] = intra_txb_rd_info->bob; +#endif // CONFIG_ATC_DCTX_ALIGNED x->plane[plane].txb_entropy_ctx[block] = intra_txb_rd_info->txb_entropy_ctx; best_eob = intra_txb_rd_info->eob; @@ -2597,6 +2618,9 @@ av1_setup_quant(tx_size, !skip_trellis, skip_trellis ? xform_quant_b : AV1_XFORM_QUANT_FP, cpi->oxcf.q_cfg.quant_b_adapt, &quant_param); +#if CONFIG_ATC_DCTX_ALIGNED + int eob_found = 0; +#endif // CONFIG_ATC_DCTX_ALIGNED // Iterate through all transform type candidates. for (int idx = 0; idx < TX_TYPES; ++idx) { @@ -2622,8 +2646,13 @@ bool skip_idx = false; xd->enable_ist = cm->seq_params.enable_ist && !cpi->sf.tx_sf.tx_type_search.skip_stx_search && - !mbmi->fsc_mode[xd->tree_type == CHROMA_PART]; + !mbmi->fsc_mode[xd->tree_type == CHROMA_PART] && + !xd->lossless[mbmi->segment_id]; +#if CONFIG_ATC_DCTX_ALIGNED + const int max_stx = xd->enable_ist && !(eob_found) ? 4 : 1; +#else const int max_stx = xd->enable_ist ? 4 : 1; +#endif // CONFIG_ATC_DCTX_ALIGNED for (int stx = 0; stx < max_stx; ++stx) { TX_TYPE tx_type = (TX_TYPE)txk_map[idx]; if (!(allowed_tx_mask & (1 << tx_type))) continue; @@ -2635,6 +2664,9 @@ ((tx_type != DCT_DCT && tx_type != ADST_ADST) || plane != 0 || is_inter_block(mbmi, xd->tree_type) || dc_only_blk || intra_mode >= PAETH_PRED || filter || !is_depth0 || +#if CONFIG_ATC_DCTX_ALIGNED + (eob_found) || +#endif // CONFIG_ATC_DCTX_ALIGNED mbmi->fsc_mode[xd->tree_type == CHROMA_PART] || xd->lossless[mbmi->segment_id]); if (skip_stx && stx) continue; @@ -2675,6 +2707,17 @@ if (*eob != 0) *eob = av1_get_max_eob(txfm_param.tx_size); } } +#if CONFIG_ATC_DCTX_ALIGNED + // pre-skip DC only case to make things faster + uint16_t *const eob = &p->eobs[block]; + if (*eob == 1 && plane == PLANE_TYPE_Y && !is_inter) { + if (tx_type == DCT_DCT) eob_found = 1; + if (tx_type != DCT_DCT || (stx && get_primary_tx_type(tx_type))) { + update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT); + continue; + } + } +#endif // CONFIG_ATC_DCTX_ALIGNED // Calculate rate cost of quantized coefficients. if (quant_param.use_optimize_b) { av1_optimize_b(cpi, x, plane, block, tx_size, tx_type, @@ -2699,6 +2742,19 @@ txb_ctx, cm->features.reduced_tx_set_used); } +#if CONFIG_ATC_DCTX_ALIGNED + if (*eob == 1 && plane == PLANE_TYPE_Y && !is_inter) { + // post quant-skip DC only case + if (tx_type == DCT_DCT) eob_found = 1; + if (tx_type != DCT_DCT || (stx && get_primary_tx_type(tx_type))) { + if (plane == 0) + update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT); + continue; + } + if (get_secondary_tx_type(tx_type) > 0) continue; + if (txfm_param.sec_tx_type > 0) continue; + } +#endif // CONFIG_ATC_DCTX_ALIGNED // If rd cost based on coeff rate alone is already more than best_rd, // terminate early. if (RDCOST(x->rdmult, rate_cost, 0) > best_rd) continue; @@ -2831,6 +2887,14 @@ if (skip_idx) break; } +#if CONFIG_ATC_DCTX_ALIGNED + if (((best_eob == 1 && best_tx_type != DCT_DCT && plane == 0) || + best_rd == INT64_MAX) && + !is_inter) { + best_tx_type = DCT_DCT; + if (plane == 0) update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT); + } +#endif // CONFIG_ATC_DCTX_ALIGNED best_rd_stats->skip_txfm = best_eob == 0; if (plane == 0) update_txk_array(xd, blk_row, blk_col, tx_size, best_tx_type); x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx; @@ -2851,6 +2915,13 @@ best_rd_stats->sse = block_sse; } +#if CONFIG_ATC_DCTX_ALIGNED + if (plane == 0 && x->plane[plane].eobs[block] == 1 && + best_tx_type != DCT_DCT && !is_inter) { + av1_invalid_rd_stats(best_rd_stats); + } +#endif // CONFIG_ATC_DCTX_ALIGNED + if (intra_txb_rd_info != NULL) { intra_txb_rd_info->valid = 1; intra_txb_rd_info->entropy_context = cur_joint_ctx; @@ -2878,6 +2949,12 @@ // can use them for prediction. recon_intra(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, txb_ctx, skip_trellis, best_tx_type, 0, &rate_cost, best_eob); +#if CONFIG_ATC_DCTX_ALIGNED + if (plane == 0 && x->plane[plane].eobs[block] == 1 && + best_tx_type != DCT_DCT && !is_inter) { + av1_invalid_rd_stats(best_rd_stats); + } +#endif // CONFIG_ATC_DCTX_ALIGNED p->dqcoeff = orig_dqcoeff; #endif // CONFIG_CROSS_CHROMA_TX } @@ -2894,6 +2971,9 @@ MB_MODE_INFO *mbmi = xd->mi[0]; struct macroblock_plane *const p_c1 = &x->plane[AOM_PLANE_U]; struct macroblock_plane *const p_c2 = &x->plane[AOM_PLANE_V]; +#if CONFIG_ATC_DCTX_ALIGNED + const int is_inter = is_inter_block(mbmi, xd->tree_type); +#endif // CONFIG_ATC_DCTX_ALIGNED const int max_eob = av1_get_max_eob(tx_size); int64_t best_rd = RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist); @@ -3020,6 +3100,11 @@ if (eobs_ptr_c1[block] == 0 || sse_dqcoeff_c2 > sse_dqcoeff_c1) { continue; } +#if CONFIG_ATC_DCTX_ALIGNED + if (eobs_ptr_c1[block] == 1 && !is_inter && cctx_type != CCTX_NONE) { + continue; + } +#endif // CONFIG_ATC_DCTX_ALIGNED // If rd cost based on coeff rate alone is already more than best_rd, // terminate early. @@ -3061,7 +3146,16 @@ assert(best_rd != INT64_MAX); best_rd_stats->skip_txfm = (best_eob_c1 == 0 && best_eob_c2 == 0); +#if CONFIG_ATC_DCTX_ALIGNED + if (best_eob_c1 == 1 && !is_inter && best_cctx_type != CCTX_NONE) { + best_cctx_type = CCTX_NONE; + update_cctx_array(xd, blk_row, blk_col, 0, 0, TX_4X4, CCTX_NONE); + } else { + update_cctx_array(xd, blk_row, blk_col, 0, 0, TX_4X4, best_cctx_type); + } +#else update_cctx_array(xd, blk_row, blk_col, 0, 0, TX_4X4, best_cctx_type); +#endif // CONFIG_ATC_DCTX_ALIGNED p_c1->txb_entropy_ctx[block] = best_txb_ctx_c1; p_c2->txb_entropy_ctx[block] = best_txb_ctx_c2; p_c1->eobs[block] = best_eob_c1; @@ -3144,6 +3238,11 @@ #endif // CONFIG_CROSS_CHROMA_TX txb_ctx, ftxs_mode, skip_trellis, ref_rdcost, &this_rd_stats); +#if CONFIG_ATC_DCTX_ALIGNED + if (this_rd_stats.dist == INT64_MAX || this_rd_stats.rate == INT_MAX) { + return; + } +#endif // CONFIG_ATC_DCTX_ALIGNED av1_merge_rd_stats(rd_stats, &this_rd_stats); #if !CONFIG_NEW_TX_PARTITION @@ -3201,6 +3300,9 @@ rd_stats->rate = zero_blk_rate; rd_stats->dist = rd_stats->sse; p->eobs[block] = 0; +#if CONFIG_ATC_DCTX_ALIGNED + p->bobs[block] = 0; +#endif // CONFIG_ATC_DCTX_ALIGNED update_txk_array(xd, blk_row, blk_col, tx_size, DCT_DCT); } rd_stats->skip_txfm = pick_skip_txfm; @@ -3648,7 +3750,13 @@ const int64_t skip_txfm_rd = is_inter_block(mbmi, xd->tree_type) ? RDCOST(x->rdmult, skip_txfm_rate, 0) : INT64_MAX; +#if CONFIG_SKIP_TXFM_OPT + const int64_t no_skip_txfm_rd = is_inter_block(mbmi, xd->tree_type) + ? RDCOST(x->rdmult, no_skip_txfm_rate, 0) + : 0; +#else const int64_t no_skip_txfm_rd = RDCOST(x->rdmult, no_skip_txfm_rate, 0); +#endif // CONFIG_SKIP_TXFM_OPT const int skip_trellis = 0; av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOMMIN(no_skip_txfm_rd, skip_txfm_rd), AOM_PLANE_Y, bs, @@ -3873,7 +3981,11 @@ &txb_ctx, args->ftxs_mode, args->skip_trellis, args->best_rd - args->current_rd, &this_rd_stats); - if (this_rd_stats.dist == INT64_MAX) { + if (this_rd_stats.dist == INT64_MAX +#if CONFIG_ATC_DCTX_ALIGNED + || this_rd_stats.rate == INT_MAX +#endif // CONFIG_ATC_DCTX_ALIGNED + ) { args->exit_early = 1; args->incomplete_exit = 1; return; @@ -3883,11 +3995,7 @@ assert(!is_inter || plane_bsize < BLOCK_8X8); #if CONFIG_ADAPTIVE_DS_FILTER cfl_store_tx(xd, blk_row, blk_col, tx_size, -#if DS_FRAME_LEVEL - cm->features.ds_filter_type); -#else cm->seq_params.enable_cfl_ds_filter); -#endif // DS_FRAME_LEVEL #else cfl_store_tx(xd, blk_row, blk_col, tx_size); #endif // CONFIG_ADAPTIVE_DS_FILTER @@ -3963,8 +4071,13 @@ const int skip_txfm_rate = mode_costs->skip_txfm_cost[skip_ctx][1]; const int64_t skip_txfm_rd = is_inter ? RDCOST(x->rdmult, skip_txfm_rate, 0) : INT64_MAX; +#if CONFIG_SKIP_TXFM_OPT + const int64_t no_this_rd = + is_inter ? RDCOST(x->rdmult, no_skip_txfm_rate + tx_size_rate, 0) : 0; +#else const int64_t no_this_rd = RDCOST(x->rdmult, no_skip_txfm_rate + tx_size_rate, 0); +#endif // CONFIG_SKIP_TXFM_OPT mbmi->tx_size = tx_size; av1_txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, @@ -4221,7 +4334,11 @@ #endif // CONFIG_CROSS_CHROMA_TX txb_ctx, args->ftxs_mode, args->skip_trellis, args->best_rd - args->current_rd, this_rd_stats); - if (this_rd_stats->dist == INT64_MAX) { + if (this_rd_stats->dist == INT64_MAX +#if CONFIG_ATC_DCTX_ALIGNED + || this_rd_stats->rate == INT_MAX +#endif // CONFIG_ATC_DCTX_ALIGNED + ) { args->exit_early = 1; args->incomplete_exit = 1; }
diff --git a/av1/encoder/x86/encodetxb_sse2.c b/av1/encoder/x86/encodetxb_sse2.c index 93f4032..35c664b 100644 --- a/av1/encoder/x86/encodetxb_sse2.c +++ b/av1/encoder/x86/encodetxb_sse2.c
@@ -544,11 +544,17 @@ // Note: levels[] must be in the range [0, 127], inclusive. void av1_get_nz_map_contexts_skip_sse2(const uint8_t *const levels, const int16_t *const scan, +#if CONFIG_ATC_DCTX_ALIGNED + const uint16_t bob, +#endif // CONFIG_ATC_DCTX_ALIGNED const uint16_t eob, const TX_SIZE tx_size, int8_t *const coeff_contexts) { (void)scan; (void)eob; +#if CONFIG_ATC_DCTX_ALIGNED + (void)bob; +#endif // CONFIG_ATC_DCTX_ALIGNED const int width = get_txb_wide(tx_size); const int height = get_txb_high(tx_size); // coeff_contexts must be 16 byte aligned.
diff --git a/build/cmake/aom_config_defaults.cmake b/build/cmake/aom_config_defaults.cmake index 4fc3b51..4ad1bf5 100644 --- a/build/cmake/aom_config_defaults.cmake +++ b/build/cmake/aom_config_defaults.cmake
@@ -137,25 +137,22 @@ "Use zero offset for non-normative bit upshift") # AV2 experiment flags. -set_aom_config_var( - CONFIG_ATC_COEFCODING 1 - "AV2 enable adaptive transform coefficient coding improvement.") -set_aom_config_var(CONFIG_ATC_NEWTXSETS 1 - "AV2 enable adaptive transform coding and new TX sets.") +set_aom_config_var(CONFIG_ATC 1 "AV2 enable adaptive transform coding.") set_aom_config_var(CONFIG_ATC_REDUCED_TXSET 1 "AV2 enable reduced transform set.") +set_aom_config_var( + CONFIG_ATC_DCTX_ALIGNED 1 + "AV2 TX signaling restriction for DC blocks with EOB alignment.") set_aom_config_var(CONFIG_BYPASS_IMPROVEMENT 1 "AV2 enable entropy bypass improvement.") -set_aom_config_var(CONFIG_C043_MVP_IMPROVEMENTS 1 - "AV2 enable MVP list improvements.") -set_aom_config_var(CONFIG_C063_TMVP_IMPROVEMENT 1 - "AV2 experiment flag for improved TMVP derivation.") -set_aom_config_var(CONFIG_EXT_RECUR_PARTITIONS 1 NUMBER - "AV2 Fully recursive partitions experiment flag") -set_aom_config_var(CONFIG_H_PARTITION 1 NUMBER - "AV2 H partition mode experiment flag") +set_aom_config_var(CONFIG_EXT_DIR 1 "AV2 extended intra prediction angles.") +set_aom_config_var( + CONFIG_EXT_RECUR_PARTITIONS 1 NUMBER + "AV2 Fully recursive partitions including H partitions experiment flag") set_aom_config_var(CONFIG_BLOCK_256 1 NUMBER "AV2 BLOCK_256 experiment flag") set_aom_config_var(CONFIG_ERP_TFLITE 0 NUMBER "Build ERP with TFLite") +set_aom_config_var(CONFIG_UNEVEN_4WAY 1 NUMBER + "AV2 uneven 4-way partition experiment flag") set_aom_config_var(CONFIG_COMPOUND_WARP_SAMPLES 1 NUMBER "AV2 compound warped motion samples experiment flag") set_aom_config_var(CONFIG_NEW_TX_PARTITION 1 @@ -163,12 +160,14 @@ set_aom_config_var( CONFIG_ORIP 1 "AV2 experiment flag to enable offset based refinement of intra prediction.") +set_aom_config_var( + CONFIG_IDIF 1 + "AV2 experiment flag to enable Intra Directional Interpolation Filter.") set_aom_config_var(CONFIG_ORIP_DC_DISABLED 0 "AV2 experiment flag to disable ORIP for DC mode.") set_aom_config_var(CONFIG_ORIP_NONDC_DISABLED 0 "AV2 experiment flag to disable ORIP for non-DC modes.") -set_aom_config_var(CONFIG_SMVP_IMPROVEMENT 1 "Enable SMVP improvement") -set_aom_config_var(CONFIG_TMVP_IMPROVEMENT 1 "Enable TMVP improvement") +set_aom_config_var(CONFIG_MVP_IMPROVEMENT 1 "Enable MVP improvement") set_aom_config_var(CONFIG_REF_MV_BANK 1 "AV2 ref mv bank experiment flag") set_aom_config_var( CONFIG_CCSO 1 "AV2 experiment flag to enable cross component sample offset.") @@ -178,8 +177,6 @@ CONFIG_IBP_DC 1 "AV2 experiment flag to enable intra bi-prediction for DC mode.") set_aom_config_var(CONFIG_AIMC 1 "AV2 adaptive intra mode coding flag.") -set_aom_config_var(CONFIG_COMPLEXITY_SCALABLE_MVP 1 - "Enable complexity scalable mvp") set_aom_config_var( CONFIG_CONTEXT_DERIVATION 1 "AV2 experiment flag to enable modified context derivation : CWG-B065.") @@ -199,31 +196,40 @@ set_aom_config_var(CONFIG_PC_WIENER 1 NUMBER "AV2 pixel-classified Wiener filter experiment flag") # End: CWG-C016 +set_aom_config_var(CONFIG_HIGH_PASS_CROSS_WIENER_FILTER 1 + "AV2 high pass cross non-sep wiener filter experiment flag") +set_aom_config_var( + CONFIG_FLEXIBLE_RU_SIZE 1 + "AV2 experiment flag to choose RU size between 128x128, 256x256 and 512x512") # Source of throughput analysis : CWG-B065 set_aom_config_var(CONFIG_THROUGHPUT_ANALYSIS 0 "AV2 experiment flag to measure throughput.") set_aom_config_var(CONFIG_IBC_SR_EXT 1 "Enables IntraBC search range extension") -set_aom_config_var(CONFIG_BVP_IMPROVEMENT 1 "Enables BVP improvements") -set_aom_config_var(CONFIG_BVCOST_UPDATE 1 "Enables sb-level update for bv cost") +set_aom_config_var(CONFIG_IBC_BV_IMPROVEMENT 1 + "Enables BV improvements for IBC") set_aom_config_var(CONFIG_CCSO_EXT 1 "AV2 experiment flag to enable extended CCSO.") +set_aom_config_var(CONFIG_CFL_IMPROVEMENTS 1 + "AV2 Cfl improvements from CWG-D029.") +set_aom_config_var(CONFIG_ADPTIVE_DS_422 1 + "AV2 adaptive downsampling in CfL for 422 from CWG-D028.") set_aom_config_var(CONFIG_ADAPTIVE_MVD 1 "Enable adaptive MVD resolution") set_aom_config_var(CONFIG_JOINT_MVD 1 "Enable joint MVD coding") set_aom_config_var(CONFIG_IMPROVED_JMVD 1 "Enable joint MVD coding with multiple scaling factors") -set_aom_config_var(CONFIG_INDEP_PALETTE_PARSING 1 - "AV2 experiment flag for palette parsing independency.") -set_aom_config_var(CONFIG_NEW_COLOR_MAP_CODING 1 - "AV2 experiment flag to enable improved palette coding.") +set_aom_config_var( + CONFIG_PALETTE_IMPROVEMENTS + 1 + "AV2 experiment flag for palette parsing independency and improved palette color map coding." +) set_aom_config_var(CONFIG_SKIP_MODE_SSE_BUG_FIX 1 "AV2 experiment flag to fix the SSE calc bug for skip mode.") set_aom_config_var(CONFIG_SKIP_MODE_ENHANCEMENT 1 - "AV2 experiment flag to enable skip mode enhancement: C019.") -set_aom_config_var( - CONFIG_SKIP_MODE_DRL_WITH_REF_IDX 1 - "AV2 experiment flag to enable DRL with ref_MV_idx for skip mode.") + "AV2 experiment flag to enable skip mode enhancement.") set_aom_config_var(CONFIG_TIP 1 "Enable temporal interpolated prediction (TIP)") +set_aom_config_var(CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT 0 + "Enable frame output order derivation from order hint") set_aom_config_var(CONFIG_OPTFLOW_ON_TIP 1 "Enable optical flow refinement on top of TIP") set_aom_config_var(CONFIG_FLEX_MVRES 1 @@ -236,6 +242,28 @@ "Enable parity hiding for coefficients coding. (PH)") set_aom_config_var(CONFIG_BAWP 1 "Enable block adaptive weighted prediction") set_aom_config_var(CONFIG_WARPMV 1 "Enable warpmv modes") +set_aom_config_var(CONFIG_IMPROVED_ANGULAR_INTRA 1 + "Improved angular intra prediction mode") +set_aom_config_var(CONFIG_D071_IMP_MSK_BLD 1 + "Enable single reference mode for frame boundary") + +set_aom_config_var(CONFIG_SKIP_TXFM_OPT 1 + "Enable to optimize the signaling of skip_txfm") +set_aom_config_var(CONFIG_CWP 1 "Enables compound weighted prediction.") +set_aom_config_var(CONFIG_REFINEMV 1 "Enable refinemv modes") + +set_aom_config_var(CONFIG_EXPLICIT_TEMPORAL_DIST_CALC 1 + "Enable to explicit temporal distance calculation") + +set_aom_config_var(CONFIG_IMPROVED_GLOBAL_MOTION 1 + "New global motion syntax for AV2") +set_aom_config_var(CONFIG_SEP_COMP_DRL 1 + "Use separate drl list for compound modes") +set_aom_config_var(CONFIG_SKIP_ME_FOR_OPFL_MODES 1 + "Reuse the mvs of compound mode from non-opfl path") + +set_aom_config_var(CONFIG_CWG_D067_IMPROVED_WARP 1 + "Improvement of warp motions") # This is an encode-only change. set_aom_config_var(CONFIG_MV_SEARCH_RANGE 1 @@ -244,6 +272,9 @@ "AV2 experiment flag to fix CDEF syntax.") set_aom_config_var(CONFIG_IMPROVED_CFL 1 "Enable improved CfL mode from CWG-C044") +set_aom_config_var(CONFIG_BLEND_MODE 1 + "Enable improved intra blend mode from CWG-D046") + set_aom_config_var( CONFIG_PEF 1 "AV2 experiment flag to enable prediction enhancement filter") @@ -264,6 +295,10 @@ set_aom_config_var(CONFIG_CROSS_CHROMA_TX 1 "AV2 cross chroma component transform experiment flag.") set_aom_config_var(CONFIG_WEDGE_MOD_EXT 1 "AV2 wedge modes extensions.") + +set_aom_config_var(CONFIG_MF_IMPROVEMENT 1 + "Enable to improve temporal motion projection") + # # Variables in this section control optional features of the build system. #
diff --git a/build/cmake/aom_experiment_deps.cmake b/build/cmake/aom_experiment_deps.cmake index 760ed4b..e9460f2 100644 --- a/build/cmake/aom_experiment_deps.cmake +++ b/build/cmake/aom_experiment_deps.cmake
@@ -37,11 +37,10 @@ change_config_and_warn(CONFIG_CCSO_EXT 0 !CONFIG_CCSO) endif() - # CONFIG_ATC_REDUCED_TXSET depends on CONFIG_ATC_NEWTXSETS. If - # CONFIG_ATC_NEWTXSETS is off, then CONFIG_ATC_REDUCED_TXSET needs to be - # disabled. - if(NOT CONFIG_ATC_NEWTXSETS AND CONFIG_ATC_REDUCED_TXSET) - change_config_and_warn(CONFIG_ATC_REDUCED_TXSET 0 !CONFIG_ATC_NEWTXSETS) + # CONFIG_ATC_REDUCED_TXSET depends on CONFIG_ATC. If CONFIG_ATC is off, then + # CONFIG_ATC_REDUCED_TXSET needs to be disabled. + if(NOT CONFIG_ATC AND CONFIG_ATC_REDUCED_TXSET) + change_config_and_warn(CONFIG_ATC_REDUCED_TXSET 0 !CONFIG_ATC) endif() # CONFIG_OPTFLOW_ON_TIP is dependent on CONFIG_OPTFLOW_REFINEMENT and @@ -71,6 +70,17 @@ change_config_and_warn(CONFIG_WARPMV 0 !CONFIG_WARP_REF_LIST) endif() + # CONFIG_CWG_D067_IMPROVED_WARP depends on CONFIG_WARP_REF_LIST + if(NOT CONFIG_WARP_REF_LIST AND CONFIG_CWG_D067_IMPROVED_WARP) + change_config_and_warn(CONFIG_CWG_D067_IMPROVED_WARP 0 + !CONFIG_WARP_REF_LIST) + endif() + + # CONFIG_CWG_D067_IMPROVED_WARP depends on CONFIG_WARPMV + if(NOT CONFIG_WARPMV AND CONFIG_CWG_D067_IMPROVED_WARP) + change_config_and_warn(CONFIG_CWG_D067_IMPROVED_WARP 0 !CONFIG_WARPMV) + endif() + # Begin: CWG-C016. if(CONFIG_WIENER_NONSEP_CROSS_FILT) change_config_and_warn(CONFIG_WIENER_NONSEP 1 @@ -78,11 +88,11 @@ endif() # End: CWG-C016. - # CONFIG_H_PARTITION is dependent on CONFIG_EXT_RECUR_PARTITIONS. If - # CONFIG_EXT_RECUR_PARTITIONS is off, CONFIG_H_PARTITION needs to be turned + # CONFIG_UNEVEN_4WAY is dependent on CONFIG_EXT_RECUR_PARTITIONS. If + # CONFIG_EXT_RECUR_PARTITIONS is off, CONFIG_UNEVEN_4WAY needs to be turned # off. - if(NOT CONFIG_EXT_RECUR_PARTITIONS AND CONFIG_H_PARTITION) - change_config_and_warn(CONFIG_H_PARTITION 0 !CONFIG_EXT_RECUR_PARTITIONS) + if(NOT CONFIG_EXT_RECUR_PARTITIONS AND CONFIG_UNEVEN_4WAY) + change_config_and_warn(CONFIG_UNEVEN_4WAY 0 !CONFIG_EXT_RECUR_PARTITIONS) endif() endmacro()
diff --git a/common/args.c b/common/args.c index ffec73b..82a7e2a 100644 --- a/common/args.c +++ b/common/args.c
@@ -96,10 +96,19 @@ #if CONFIG_BAWP GET_PARAMS(enable_bawp); #endif // CONFIG_BAWP +#if CONFIG_CWP + GET_PARAMS(enable_cwp); +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + GET_PARAMS(enable_imp_msk_bld); +#endif // CONFIG_D071_IMP_MSK_BLD GET_PARAMS(enable_fsc); #if CONFIG_ORIP GET_PARAMS(enable_orip); #endif +#if CONFIG_IDIF + GET_PARAMS(enable_idif); +#endif // CONFIG_IDIF GET_PARAMS(enable_ist); #if CONFIG_CROSS_CHROMA_TX GET_PARAMS(enable_cctx); @@ -117,6 +126,9 @@ #if CONFIG_JOINT_MVD GET_PARAMS(enable_joint_mvd); #endif // CONFIG_JOINT_MVD +#if CONFIG_REFINEMV + GET_PARAMS(enable_refinemv); +#endif // CONFIG_REFINEMV GET_PARAMS(enable_flip_idtx); GET_PARAMS(enable_deblocking); GET_PARAMS(enable_cdef);
diff --git a/common/av1_config.c b/common/av1_config.c index e8a9215..3d00147 100644 --- a/common/av1_config.c +++ b/common/av1_config.c
@@ -9,6 +9,7 @@ * source code in the PATENTS file, you can obtain it at * aomedia.org/license/patent-license/. */ +#include <stdbool.h> #include <stdio.h> #include <string.h> @@ -234,7 +235,10 @@ } // Parse Sequence Header OBU for coding tools beyond AV1 -int parse_sequence_header_beyond_av1(struct aom_read_bit_buffer *reader) { +int parse_sequence_header_beyond_av1(struct aom_read_bit_buffer *reader, + bool reduced_still_picture_header) { + (void)reduced_still_picture_header; + int result = 0; #if CONFIG_REF_MV_BANK AV1C_READ_BIT_OR_RETURN_ERROR(enable_refmvbank); @@ -244,6 +248,10 @@ AV1C_READ_BITS_OR_RETURN_ERROR(max_reference_frames, 2); } AV1C_READ_BIT_OR_RETURN_ERROR(explicit_ref_frame_map); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // 0: show_existing_frame, 1: implicit derivation + AV1C_READ_BIT_OR_RETURN_ERROR(enable_frame_output_order); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT AV1C_READ_BIT_OR_RETURN_ERROR(enable_sdp); AV1C_READ_BIT_OR_RETURN_ERROR(enable_ist); #if CONFIG_CROSS_CHROMA_TX @@ -259,6 +267,12 @@ #if CONFIG_BAWP AV1C_READ_BIT_OR_RETURN_ERROR(enable_bawp); #endif // CONFIG_BAWP +#if CONFIG_CWP + AV1C_READ_BIT_OR_RETURN_ERROR(enable_cwp); +#endif // CONFIG_CWP +#if CONFIG_D071_IMP_MSK_BLD + AV1C_READ_BIT_OR_RETURN_ERROR(enable_imp_msk_bld); +#endif // CONFIG_D071_IMP_MSK_BLD AV1C_READ_BIT_OR_RETURN_ERROR(enable_fsc); #if CONFIG_CCSO AV1C_READ_BIT_OR_RETURN_ERROR(enable_ccso); @@ -269,10 +283,16 @@ #if CONFIG_ORIP AV1C_READ_BIT_OR_RETURN_ERROR(enable_orip); #endif +#if CONFIG_IDIF + AV1C_READ_BIT_OR_RETURN_ERROR(enable_idif); +#endif // CONFIG_IDIF AV1C_READ_BIT_OR_RETURN_ERROR(enable_ibp); #if CONFIG_ADAPTIVE_MVD AV1C_READ_BIT_OR_RETURN_ERROR(enable_adaptive_mvd); #endif // CONFIG_ADAPTIVE_MVD +#if CONFIG_REFINEMV + AV1C_READ_BIT_OR_RETURN_ERROR(enable_refinemv); +#endif // CONFIG_REFINEMV #if CONFIG_FLEX_MVRES AV1C_READ_BIT_OR_RETURN_ERROR(enable_flex_mvres); #endif // CONFIG_FLEX_MVRES @@ -282,6 +302,11 @@ #if CONFIG_PAR_HIDING AV1C_READ_BIT_OR_RETURN_ERROR(enable_parity_hiding); #endif // CONFIG_PAR_HIDING +#if CONFIG_IMPROVED_GLOBAL_MOTION + if (!reduced_still_picture_header) { + AV1C_READ_BIT_OR_RETURN_ERROR(enable_global_motion); + } +#endif // CONFIG_IMPROVED_GLOBAL_MOTION return 0; } @@ -447,7 +472,7 @@ AV1C_READ_BIT_OR_RETURN_ERROR(film_grain_params_present); // Sequence header for coding tools beyond AV1 - parse_sequence_header_beyond_av1(reader); + parse_sequence_header_beyond_av1(reader, reduced_still_picture_header); return 0; }
diff --git a/common/y4minput.c b/common/y4minput.c index d24bf56..aec02e5 100644 --- a/common/y4minput.c +++ b/common/y4minput.c
@@ -24,12 +24,13 @@ // Reads 'size' bytes from 'file' into 'buf' with some fault tolerance. // Returns true on success. static int file_read(void *buf, size_t size, FILE *file) { - const int kMaxRetries = 5; - int retry_count = 0; - int file_error; + const int kMaxTries = 5; + int try_count = 0; + int file_error = 0; size_t len = 0; - do { + while (!feof(file) && len < size && try_count < kMaxTries) { const size_t n = fread((uint8_t *)buf + len, 1, size - len, file); + ++try_count; len += n; file_error = ferror(file); if (file_error) { @@ -42,13 +43,13 @@ return 0; } } - } while (!feof(file) && len < size && ++retry_count < kMaxRetries); + } if (!feof(file) && len != size) { fprintf(stderr, "Error reading file: %u of %u bytes read," - " error: %d, retries: %d, %d: %s\n", - (uint32_t)len, (uint32_t)size, file_error, retry_count, errno, + " error: %d, tries: %d, %d: %s\n", + (uint32_t)len, (uint32_t)size, file_error, try_count, errno, strerror(errno)); } return len == size; @@ -1142,9 +1143,15 @@ y4m_ctx->dst_buf = (unsigned char *)malloc(y4m_ctx->dst_buf_sz); else y4m_ctx->dst_buf = (unsigned char *)malloc(2 * y4m_ctx->dst_buf_sz); + if (!y4m_ctx->dst_buf) return -1; - if (y4m_ctx->aux_buf_sz > 0) + if (y4m_ctx->aux_buf_sz > 0) { y4m_ctx->aux_buf = (unsigned char *)malloc(y4m_ctx->aux_buf_sz); + if (!y4m_ctx->aux_buf) { + free(y4m_ctx->dst_buf); + return -1; + } + } return 0; }
diff --git a/examples/inspect.c b/examples/inspect.c index 4e98dca..1cc05e8 100644 --- a/examples/inspect.c +++ b/examples/inspect.c
@@ -601,13 +601,39 @@ } const int num_syms = accounting->syms.num_syms; const int num_strs = accounting->syms.dictionary.num_strs; - buf += put_str(buf, " \"symbolsMap\": ["); + buf += put_str(buf, " \"symbolsFileMap\": ["); for (i = 0; i < num_strs; i++) { - buf += snprintf(buf, MAX_BUFFER, "\"%s\"", - accounting->syms.dictionary.strs[i]); + buf += snprintf(buf, MAX_BUFFER, "\"%s:%d\"", + accounting->syms.dictionary.acct_infos[i].c_file, + accounting->syms.dictionary.acct_infos[i].c_line); if (i < num_strs - 1) *(buf++) = ','; } buf += put_str(buf, "],\n"); + + buf += put_str(buf, " \"symbolsMap\": ["); + for (i = 0; i < num_strs; i++) { + buf += snprintf(buf, MAX_BUFFER, "\"%s\"", + accounting->syms.dictionary.acct_infos[i].c_func); + if (i < num_strs - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + + buf += put_str(buf, " \"symbolsTagsMap\": ["); + for (i = 0; i < num_strs; i++) { + buf += put_str(buf, "["); + for (int j = 0; j < AOM_ACCOUNTING_MAX_TAGS; j++) { + if (accounting->syms.dictionary.acct_infos[i].tags[j] == NULL) break; + if (j > 0) { + *(buf++) = ','; + } + buf += snprintf(buf, MAX_BUFFER, "\"%s\"", + accounting->syms.dictionary.acct_infos[i].tags[j]); + } + buf += put_str(buf, "]"); + if (i < num_strs - 1) *(buf++) = ','; + } + buf += put_str(buf, "],\n"); + buf += put_str(buf, " \"symbols\": [\n "); AccountingSymbolContext context; context.x = -2; @@ -618,11 +644,12 @@ if (memcmp(&context, &sym->context, sizeof(AccountingSymbolContext)) != 0) { buf += put_num(buf, '[', sym->context.x, 0); buf += put_num(buf, ',', sym->context.y, ']'); - } else { - buf += put_num(buf, '[', sym->id, 0); - buf += put_num(buf, ',', sym->bits, 0); - buf += put_num(buf, ',', sym->samples, ']'); + *(buf++) = ','; } + buf += put_num(buf, '[', sym->id, 0); + buf += put_num(buf, ',', sym->bits, 0); + buf += put_num(buf, ',', sym->value, 0); + buf += put_num(buf, ',', sym->coding_mode, ']'); context = sym->context; if (i < num_syms - 1) *(buf++) = ','; } @@ -748,6 +775,8 @@ frame_data.delta_q_present_flag); buf += snprintf(buf, MAX_BUFFER, " \"deltaQRes\": %d,\n", frame_data.delta_q_res); + buf += snprintf(buf, MAX_BUFFER, " \"superblockSize\": %d,\n", + frame_data.superblock_size); buf += put_str(buf, " \"config\": {"); buf += put_map(buf, config_map); buf += put_str(buf, "},\n"); @@ -764,6 +793,7 @@ void ifd_init_cb() { aom_inspect_init ii; ii.inspect_cb = inspect; + ii.inspect_sb_cb = NULL; ii.inspect_ctx = NULL; aom_codec_control(&codec, AV1_SET_INSPECTION_CALLBACK, &ii); }
diff --git a/test/accounting_test.cc b/test/accounting_test.cc index 8d8d964..1e640fa 100644 --- a/test/accounting_test.cc +++ b/test/accounting_test.cc
@@ -42,11 +42,9 @@ aom_accounting_init(&accounting); br.accounting = &accounting; for (int i = 0; i < kSymbols; i++) { - aom_read(&br, 32, "A"); + aom_read(&br, 32, ACCT_INFO("A")); } - // Consecutive symbols that are the same are coalesced. - GTEST_ASSERT_EQ(accounting.syms.num_syms, 1); - GTEST_ASSERT_EQ(accounting.syms.syms[0].samples, (unsigned int)kSymbols); + GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols); aom_accounting_reset(&accounting); GTEST_ASSERT_EQ(accounting.syms.num_syms, 0); @@ -55,9 +53,9 @@ aom_reader_init(&br, bw_buffer, bw.pos); br.accounting = &accounting; for (int i = 0; i < kSymbols; i++) { - aom_read(&br, 32, "A"); - aom_read(&br, 32, "B"); - aom_read(&br, 32, "B"); + aom_read(&br, 32, ACCT_INFO("A")); + aom_read(&br, 32, ACCT_INFO("B")); + aom_read(&br, 32, ACCT_INFO("B")); } GTEST_ASSERT_EQ(accounting.syms.num_syms, kSymbols * 2); uint32_t tell_frac = aom_reader_tell_frac(&br); @@ -66,11 +64,15 @@ } GTEST_ASSERT_EQ(tell_frac, 0U); - GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, "A"), - aom_accounting_dictionary_lookup(&accounting, "A")); + AccountingSymbolInfo a1 = ACCT_INFO("A"); + AccountingSymbolInfo a2 = ACCT_INFO("A"); + GTEST_ASSERT_EQ(aom_accounting_dictionary_lookup(&accounting, &a1), + aom_accounting_dictionary_lookup(&accounting, &a2)); // Check for collisions. The current aom_accounting_hash function returns // the same hash code for AB and BA. - GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, "AB"), - aom_accounting_dictionary_lookup(&accounting, "BA")); + AccountingSymbolInfo ab = ACCT_INFO("AB"); + AccountingSymbolInfo ba = ACCT_INFO("BA"); + GTEST_ASSERT_NE(aom_accounting_dictionary_lookup(&accounting, &ab), + aom_accounting_dictionary_lookup(&accounting, &ba)); }
diff --git a/test/altref_test.cc b/test/altref_test.cc index cf7ccd3..f5dfb4a 100644 --- a/test/altref_test.cc +++ b/test/altref_test.cc
@@ -185,7 +185,11 @@ virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { (void)pkt; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + frame_num_ += pkt->data.frame.frame_count; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT ++frame_num_; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } const gfIntervalParam gf_interval_param_;
diff --git a/test/av1_encoder_parms_get_to_decoder.cc b/test/av1_encoder_parms_get_to_decoder.cc index 29043b1..47b4448 100644 --- a/test/av1_encoder_parms_get_to_decoder.cc +++ b/test/av1_encoder_parms_get_to_decoder.cc
@@ -24,8 +24,6 @@ namespace { -const int kMaxPsnr = 100; - struct ParamPassingTestVideo { const char *name; uint32_t width; @@ -99,6 +97,7 @@ virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, ::libaom_test::Encoder *encoder) { if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, 5); encoder->Control(AV1E_SET_COLOR_PRIMARIES, encode_parms.color_primaries); encoder->Control(AV1E_SET_TRANSFER_CHARACTERISTICS, encode_parms.transfer_characteristics); @@ -130,7 +129,9 @@ virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { if (encode_parms.lossless) { - EXPECT_EQ(kMaxPsnr, pkt->data.psnr.psnr[0]); + const double lossless_psnr = + get_lossless_psnr(test_video_.width, test_video_.height, 8, false); + EXPECT_EQ(lossless_psnr, pkt->data.psnr.psnr[0]); } }
diff --git a/test/binary_codes_test.cc b/test/binary_codes_test.cc index 68cb79c..1f99f20 100644 --- a/test/binary_codes_test.cc +++ b/test/binary_codes_test.cc
@@ -25,8 +25,6 @@ #include "aom_dsp/binary_codes_reader.h" #include "aom_dsp/binary_codes_writer.h" -#define ACCT_STR __func__ - using libaom_test::ACMRandom; namespace { @@ -73,7 +71,7 @@ assert(k == enc_values[n][k][r][v][1]); const uint16_t ref = enc_values[n][k][r][v][2]; const uint16_t value = - aom_read_primitive_refsubexpfin(&br, range, k, ref, ACCT_STR); + aom_read_primitive_refsubexpfin(&br, range, k, ref, ACCT_INFO()); GTEST_ASSERT_EQ(value, enc_values[n][k][r][v][3]); } }
diff --git a/test/boolcoder_test.cc b/test/boolcoder_test.cc index 9c9eac8..983afd9 100644 --- a/test/boolcoder_test.cc +++ b/test/boolcoder_test.cc
@@ -78,7 +78,7 @@ } else if (bit_method == 3) { bit = bit_rnd(2); } - GTEST_ASSERT_EQ(aom_read(&br, probas[i], NULL), bit) + GTEST_ASSERT_EQ(aom_read(&br, probas[i], {}), bit) << "pos: " << i << " / " << kBitsToTest << " bit_method: " << bit_method << " method: " << method; } @@ -105,25 +105,26 @@ aom_reader br; aom_reader_init(&br, bw_buffer, bw.pos); uint32_t last_tell = aom_reader_tell(&br); - uint32_t last_tell_frac = aom_reader_tell_frac(&br); + uint64_t last_tell_frac = aom_reader_tell_frac(&br); double frac_diff_total = 0; GTEST_ASSERT_GE(aom_reader_tell(&br), 0u); GTEST_ASSERT_LE(aom_reader_tell(&br), 1u); ASSERT_FALSE(aom_reader_has_overflowed(&br)); for (int i = 0; i < kSymbols; i++) { - aom_read(&br, p, NULL); + aom_read(&br, p, {}); uint32_t tell = aom_reader_tell(&br); - uint32_t tell_frac = aom_reader_tell_frac(&br); + uint64_t tell_frac = aom_reader_tell_frac(&br); GTEST_ASSERT_GE(tell, last_tell) << "tell: " << tell << ", last_tell: " << last_tell; GTEST_ASSERT_GE(tell_frac, last_tell_frac) << "tell_frac: " << tell_frac << ", last_tell_frac: " << last_tell_frac; // Frac tell should round up to tell. - GTEST_ASSERT_EQ(tell, (tell_frac + 7) >> 3); + GTEST_ASSERT_EQ(tell, (tell_frac + (1 << OD_BITRES) - 1) >> OD_BITRES); last_tell = tell; frac_diff_total += - fabs(((tell_frac - last_tell_frac) / 8.0) + log2(probability)); + fabs(((tell_frac - last_tell_frac) / (double)(1 << OD_BITRES)) + + log2(probability)); last_tell_frac = tell_frac; } const uint32_t expected = (uint32_t)(-kSymbols * log2(probability)); @@ -152,7 +153,7 @@ aom_reader_init(&br, bw_buffer, bw.pos); ASSERT_FALSE(aom_reader_has_overflowed(&br)); for (int i = 0; i < kSymbols; i++) { - GTEST_ASSERT_EQ(aom_read(&br, p, NULL), 1); + GTEST_ASSERT_EQ(aom_read(&br, p, {}), 1); ASSERT_FALSE(aom_reader_has_overflowed(&br)); } // In the worst case, the encoder uses just a tiny fraction of the last @@ -171,11 +172,11 @@ // additional bits; therefore the number of reads should be increased; // 174 * 8 will be enough to consume more than this number of bits. for (int i = 0; i < 174 * 8; i++) { - aom_read(&br, p, NULL); + aom_read(&br, p, {}); } #else for (int i = 0; i < 174; i++) { - aom_read(&br, p, NULL); + aom_read(&br, p, {}); } #endif ASSERT_TRUE(aom_reader_has_overflowed(&br));
diff --git a/test/cpu_speed_test.cc b/test/cpu_speed_test.cc index bddd8ab..e9984ad 100644 --- a/test/cpu_speed_test.cc +++ b/test/cpu_speed_test.cc
@@ -19,15 +19,13 @@ namespace { -const int kMaxPSNR = 100; - class CpuSpeedTest : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, public ::libaom_test::EncoderTest { protected: CpuSpeedTest() : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), - set_cpu_used_(GET_PARAM(2)), min_psnr_(kMaxPSNR), + set_cpu_used_(GET_PARAM(2)), min_psnr_(DBL_MAX), tune_content_(AOM_CONTENT_DEFAULT) {} virtual ~CpuSpeedTest() {} @@ -38,7 +36,7 @@ cfg_.rc_end_usage = AOM_VBR; } - virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = kMaxPSNR; } + virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = DBL_MAX; } virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, ::libaom_test::Encoder *encoder) { @@ -75,14 +73,19 @@ cfg_.rc_target_bitrate = 400; cfg_.rc_max_quantizer = 0; cfg_.rc_min_quantizer = 0; + const unsigned int width = 208; + const unsigned int height = 144; + const unsigned int bit_depth = 8; - ::libaom_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, - 10); + ::libaom_test::I420VideoSource video("hantro_odd.yuv", width, height, 30, 1, + 0, 10); init_flags_ = AOM_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - EXPECT_GE(min_psnr_, kMaxPSNR); + const double lossless_psnr = + get_lossless_psnr(width, height, bit_depth, false); + EXPECT_EQ(min_psnr_, lossless_psnr); } void CpuSpeedTest::TestScreencastQ0() { @@ -91,11 +94,17 @@ cfg_.rc_target_bitrate = 400; cfg_.rc_max_quantizer = 0; cfg_.rc_min_quantizer = 0; + const unsigned int width = 640; + const unsigned int height = 480; + const unsigned int bit_depth = 8; init_flags_ = AOM_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - EXPECT_GE(min_psnr_, kMaxPSNR); + + const double lossless_psnr = + get_lossless_psnr(width, height, bit_depth, false); + EXPECT_EQ(min_psnr_, lossless_psnr); } void CpuSpeedTest::TestTuneScreen() {
diff --git a/test/decode_perf_test.cc b/test/decode_perf_test.cc index 5e5380e..4a85357 100644 --- a/test/decode_perf_test.cc +++ b/test/decode_perf_test.cc
@@ -148,7 +148,11 @@ } virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + out_frames_ += pkt->data.frame.frame_count; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT ++out_frames_; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT // Write initial file header if first frame. if (pkt->data.frame.pts == 0)
diff --git a/test/ec_test.cc b/test/ec_test.cc index d89c991..6b30c66 100644 --- a/test/ec_test.cc +++ b/test/ec_test.cc
@@ -44,7 +44,7 @@ #if CONFIG_BYPASS_IMPROVEMENT unsigned *mode; #endif // CONFIG_BYPASS_IMPROVEMENT - unsigned *tell; + unsigned long *tell; unsigned *enc_method; int j; sz = rand() / ((RAND_MAX >> (rand() % 9U)) + 1U); @@ -54,7 +54,7 @@ #if CONFIG_BYPASS_IMPROVEMENT mode = (unsigned *)malloc(sz * sizeof(*mode)); #endif // CONFIG_BYPASS_IMPROVEMENT - tell = (unsigned *)malloc((sz + 1) * sizeof(*tell)); + tell = (unsigned long *)malloc((sz + 1) * sizeof(*tell)); enc_method = (unsigned *)malloc(sz * sizeof(*enc_method)); od_ec_enc_reset(&enc); tell[0] = od_ec_enc_tell_frac(&enc); @@ -139,7 +139,7 @@ EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[0]) << "od_ec_dec_tell() mismatch between encoder and decoder " "at symbol 0: " - << (unsigned)od_ec_dec_tell_frac(&dec) << " instead of " << tell[0] + << (unsigned long)od_ec_dec_tell_frac(&dec) << " instead of " << tell[0] << " (Random seed: " << seed << ").\n"; for (j = 0; j < sz; j++) { int dec_method; @@ -208,7 +208,7 @@ EXPECT_EQ(od_ec_dec_tell_frac(&dec), tell[j + 1]) << "od_ec_dec_tell() mismatch between encoder and " "decoder at symbol " - << j + 1 << ": " << (unsigned)od_ec_dec_tell_frac(&dec) + << j + 1 << ": " << (unsigned long)od_ec_dec_tell_frac(&dec) << " instead of " << tell[j + 1] << " (Random seed: " << seed << ").\n"; }
diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc index 2d191cd..74045a4 100644 --- a/test/encode_test_driver.cc +++ b/test/encode_test_driver.cc
@@ -202,6 +202,10 @@ number_spatial_layers_ = GetNumSpatialLayers(); bool again; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + unsigned int rec_frame_cnt = 0; + unsigned int failed_frame_cnt = 0; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT for (again = true; again; video->Next()) { again = (video->img() != NULL); @@ -236,6 +240,9 @@ if (!HandleDecodeResult(res_dec, decoder.get())) break; has_dxdata = true; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + rec_frame_cnt += pkt->data.frame.frame_count; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } ASSERT_GE(pkt->data.frame.pts, last_pts_); if (sl == number_spatial_layers_) last_pts_ = pkt->data.frame.pts; @@ -260,7 +267,22 @@ } } if (img_dec) DecompressedFrameHook(*img_dec, video->pts()); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + failed_frame_cnt = 0; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + // Continue the encoding process, when an empty packet is received + // by skipping OBU with show_existing_frame == 1) and + // no longer input frames are remained due to lag_in frames. + // However the consecutive(10) packets are empty/failed, stop the + // encoding. + else if (rec_frame_cnt < video->limit() && !again && + failed_frame_cnt < 10) { + again = true; + failed_frame_cnt++; + } +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT if (!Continue()) break; } // Loop over spatial layers }
diff --git a/test/encodetxb_test.cc b/test/encodetxb_test.cc index 58f1711..84ebbad 100644 --- a/test/encodetxb_test.cc +++ b/test/encodetxb_test.cc
@@ -61,7 +61,7 @@ libaom_test::ClearSystemState(); } -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC void GetNzMapContextsRun() { const int kNumTests = 10; int result = 0; @@ -149,7 +149,7 @@ (elapsed_time_ref * 1.0) / (elapsed_time * 1.0)); } } -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC private: void InitDataWithEob(const int16_t *const scan, const int bwl, @@ -193,7 +193,7 @@ }; GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(EncodeTxbTest); -#if !CONFIG_ATC_COEFCODING +#if !CONFIG_ATC TEST_P(EncodeTxbTest, GetNzMapContexts) { GetNzMapContextsRun(); } TEST_P(EncodeTxbTest, DISABLED_SpeedTestGetNzMapContexts) { @@ -209,7 +209,7 @@ INSTANTIATE_TEST_SUITE_P(NEON, EncodeTxbTest, ::testing::Values(av1_get_nz_map_contexts_neon)); #endif -#endif // !CONFIG_ATC_COEFCODING +#endif // !CONFIG_ATC typedef void (*av1_txb_init_levels_func)(const tran_low_t *const coeff, const int width, const int height,
diff --git a/test/error_resilience_test.cc b/test/error_resilience_test.cc index cd9ca58..dc63465 100644 --- a/test/error_resilience_test.cc +++ b/test/error_resilience_test.cc
@@ -82,6 +82,9 @@ if (video->frame() == 0) { encoder->Control(AOME_SET_CPUUSED, kCpuUsed); encoder->Control(AOME_SET_ENABLEAUTOALTREF, enable_altref_); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } frame_flags_ &= ~(AOM_EFLAG_NO_REF_FRAME_MVS | AOM_EFLAG_ERROR_RESILIENT | AOM_EFLAG_NO_UPD_ALL | AOM_EFLAG_SET_S_FRAME | @@ -502,6 +505,9 @@ ::libaom_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(AOME_SET_CPUUSED, 5); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT if (rc_end_usage_ == AOM_Q) { encoder->Control(AOME_SET_QP, 210); }
diff --git a/test/fwd_kf_test.cc b/test/fwd_kf_test.cc index db429da..6c0050b 100644 --- a/test/fwd_kf_test.cc +++ b/test/fwd_kf_test.cc
@@ -26,7 +26,7 @@ } FwdKfTestParam; const FwdKfTestParam kTestParams[] = { - { 4, 31.1 }, { 6, 31.1 }, { 8, 32.6 }, + { 4, 31.1 }, { 6, 31.1 }, { 8, 32.3 }, { 12, 31.7 }, { 16, 32.3 }, { 18, 32.1 } }; @@ -183,7 +183,7 @@ is_fwd_kf_present_ = 0; libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, cfg_.g_timebase.den, cfg_.g_timebase.num, - 0, 150); + 0, 60); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_EQ(is_fwd_kf_present_, 1); }
diff --git a/test/gf_pyr_height_test.cc b/test/gf_pyr_height_test.cc index 7b8c8e3..8cfd996 100644 --- a/test/gf_pyr_height_test.cc +++ b/test/gf_pyr_height_test.cc
@@ -68,19 +68,19 @@ } // Params: encoding mode, rate control mode and GFPyrHeightTestParam object. -class GFPyrHeightTest +class GFPyrHeightTestLarge : public ::libaom_test::CodecTestWith3Params< libaom_test::TestMode, aom_rc_mode, GFPyrHeightTestParam>, public ::libaom_test::EncoderTest { protected: - GFPyrHeightTest() + GFPyrHeightTestLarge() : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), rc_mode_(GET_PARAM(2)) { gf_min_pyr_height_ = GET_PARAM(3).gf_min_pyr_height; gf_max_pyr_height_ = GET_PARAM(3).gf_max_pyr_height; psnr_threshold_ = GET_PARAM(3).psnr_thresh; } - virtual ~GFPyrHeightTest() {} + virtual ~GFPyrHeightTestLarge() {} virtual void SetUp() { InitializeConfig(); @@ -139,7 +139,7 @@ double psnr_; }; -TEST_P(GFPyrHeightTest, EncodeAndVerifyPSNR) { +TEST_P(GFPyrHeightTestLarge, EncodeAndVerifyPSNR) { libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, cfg_.g_timebase.den, cfg_.g_timebase.num, 0, 32); @@ -149,7 +149,7 @@ << "GF Max Pyramid Height = " << gf_max_pyr_height_; } -AV1_INSTANTIATE_TEST_SUITE(GFPyrHeightTest, GOODQUALITY_TEST_MODES, +AV1_INSTANTIATE_TEST_SUITE(GFPyrHeightTestLarge, GOODQUALITY_TEST_MODES, ::testing::Values(AOM_Q, AOM_VBR), ::testing::ValuesIn(kTestParams)); } // namespace
diff --git a/test/horz_superres_test.cc b/test/horz_superres_test.cc index c7f1e54..be61740 100644 --- a/test/horz_superres_test.cc +++ b/test/horz_superres_test.cc
@@ -54,9 +54,9 @@ const TestVideoParam kTestVideoVectors[] = { { "park_joy_90p_8_420.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 5, 0, 25.0, - 44.7 }, + 43.0 }, { "park_joy_90p_10_444.y4m", AOM_IMG_FMT_I44416, AOM_BITS_10, 1, 5, 0, 28.0, - 47.7 }, + 47.5 }, { "screendata.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 4, 1, 16.0, 56.0 }, // Image coding (single frame). { "niklas_1280_720_30.y4m", AOM_IMG_FMT_I420, AOM_BITS_8, 0, 1, 0, 25.0,
diff --git a/test/kf_test.cc b/test/kf_test.cc index 4e290e3..4a2910c 100644 --- a/test/kf_test.cc +++ b/test/kf_test.cc
@@ -73,6 +73,20 @@ } } + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { + if (kf_dist_ != -1) { +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + kf_dist_ += pkt->data.frame.frame_count; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + (void)pkt; + ++kf_dist_; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (kf_dist_ > (int)kf_dist_param_.max_kf_dist) { + is_kf_interval_violated_ = true; + } + } + } + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, libaom_test::Decoder *decoder) { EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); @@ -81,12 +95,6 @@ int frame_flags = 0; AOM_CODEC_CONTROL_TYPECHECKED(ctx_dec, AOMD_GET_FRAME_FLAGS, &frame_flags); - if (kf_dist_ != -1) { - kf_dist_++; - if (kf_dist_ > (int)kf_dist_param_.max_kf_dist) { - is_kf_interval_violated_ = true; - } - } if ((frame_flags & AOM_FRAME_IS_KEY) == static_cast<aom_codec_frame_flags_t>(AOM_FRAME_IS_KEY)) { if (kf_dist_ != -1 && kf_dist_ < (int)kf_dist_param_.min_kf_dist) { @@ -172,7 +180,9 @@ is_kf_placement_violated_ = true; } } +#if !CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT ++frame_num_; +#endif // !CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } return AOM_CODEC_OK == res_dec; }
diff --git a/test/level_test.cc b/test/level_test.cc index 7c053d1..b5e0869 100644 --- a/test/level_test.cc +++ b/test/level_test.cc
@@ -26,21 +26,21 @@ const int kLevelKeepStats = 24; // Speed settings tested static const int kCpuUsedVectors[] = { - 1, 2, 3, 4, + 5, }; -class LevelTest +class LevelTestLarge : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, int>, public ::libaom_test::EncoderTest { protected: - LevelTest() + LevelTestLarge() : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), cpu_used_(GET_PARAM(2)), target_level_(31) {} - virtual ~LevelTest() {} + virtual ~LevelTestLarge() {} virtual void SetUp() { InitializeConfig(); @@ -70,7 +70,7 @@ int level_[32]; }; -TEST_P(LevelTest, TestTargetLevelApi) { +TEST_P(LevelTestLarge, TestTargetLevelApi) { static aom_codec_iface_t *codec = &aom_codec_av1_cx_algo; aom_codec_ctx_t enc; aom_codec_enc_cfg_t cfg; @@ -96,7 +96,7 @@ EXPECT_EQ(AOM_CODEC_OK, aom_codec_destroy(&enc)); } -TEST_P(LevelTest, TestTargetLevel19) { +TEST_P(LevelTestLarge, TestTargetLevel19) { std::unique_ptr<libaom_test::VideoSource> video; video.reset(new libaom_test::Y4mVideoSource("park_joy_90p_8_420.y4m", 0, 10)); ASSERT_TRUE(video.get() != NULL); @@ -105,9 +105,9 @@ ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); } -TEST_P(LevelTest, TestLevelMonitoringLowBitrate) { +TEST_P(LevelTestLarge, TestLevelMonitoringLowBitrate) { // To save run time, we only test speed 4. - if (cpu_used_ == 4) { + if (cpu_used_ == 5) { libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 40); target_level_ = kLevelKeepStats; @@ -118,22 +118,23 @@ } } -TEST_P(LevelTest, TestLevelMonitoringHighBitrate) { +TEST_P(LevelTestLarge, TestLevelMonitoringHighBitrate) { // To save run time, we only test speed 4. - if (cpu_used_ == 4) { + if (cpu_used_ == 5) { + const int num_frames = 17; libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - 30, 1, 0, 40); + 30, 1, 0, num_frames); target_level_ = kLevelKeepStats; cfg_.rc_target_bitrate = 4000; - cfg_.g_limit = 40; + cfg_.g_limit = num_frames; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_EQ(level_[0], 4); } } -TEST_P(LevelTest, TestTargetLevel0) { +TEST_P(LevelTestLarge, TestTargetLevel0) { // To save run time, we only test speed 4. - if (cpu_used_ == 4) { + if (cpu_used_ == 5) { libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 50); const int target_level = 0; @@ -144,7 +145,7 @@ } } -AV1_INSTANTIATE_TEST_SUITE(LevelTest, +AV1_INSTANTIATE_TEST_SUITE(LevelTestLarge, ::testing::Values(::libaom_test::kOnePassGood), ::testing::ValuesIn(kCpuUsedVectors)); } // namespace
diff --git a/test/lossless_test.cc b/test/lossless_test.cc index a5cf3d7..69f3ff4 100644 --- a/test/lossless_test.cc +++ b/test/lossless_test.cc
@@ -22,15 +22,13 @@ namespace { -const int kMaxPsnr = 100; - class LosslessTestLarge : public ::libaom_test::CodecTestWith2Params<libaom_test::TestMode, aom_rc_mode>, public ::libaom_test::EncoderTest { protected: LosslessTestLarge() - : EncoderTest(GET_PARAM(0)), psnr_(kMaxPsnr), nframes_(0), + : EncoderTest(GET_PARAM(0)), psnr_(DBL_MAX), nframes_(0), encoding_mode_(GET_PARAM(1)), rc_end_usage_(GET_PARAM(2)) {} virtual ~LosslessTestLarge() {} @@ -53,7 +51,7 @@ } virtual void BeginPassHook(unsigned int /*pass*/) { - psnr_ = kMaxPsnr; + psnr_ = DBL_MAX; nframes_ = 0; } @@ -95,11 +93,17 @@ init_flags_ = AOM_CODEC_USE_PSNR; // intentionally changed the dimension for better testing coverage - libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - timebase.den, timebase.num, 0, 5); + const unsigned int width = 352; + const unsigned int height = 288; + const unsigned int bit_depth = 8; + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", width, + height, timebase.den, timebase.num, 0, 5); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - const double psnr_lossless = GetMinPsnr(); - EXPECT_GE(psnr_lossless, kMaxPsnr); + + const double min_psnr = GetMinPsnr(); + const double lossless_psnr = + get_lossless_psnr(width, height, bit_depth, false); + EXPECT_EQ(min_psnr, lossless_psnr); } TEST_P(LosslessTestLarge, TestLossLessEncoding444) { @@ -115,8 +119,10 @@ init_flags_ = AOM_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - const double psnr_lossless = GetMinPsnr(); - EXPECT_GE(psnr_lossless, kMaxPsnr); + + const double min_psnr = GetMinPsnr(); + const double lossless_psnr = get_lossless_psnr(352, 288, 8, true); + EXPECT_EQ(min_psnr, lossless_psnr); } TEST_P(LosslessTestLarge, TestLossLessEncodingCtrl) { @@ -131,11 +137,17 @@ init_flags_ = AOM_CODEC_USE_PSNR; - libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, - timebase.den, timebase.num, 0, 5); + const unsigned int width = 352; + const unsigned int height = 288; + const unsigned int bit_depth = 8; + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", width, + height, timebase.den, timebase.num, 0, 5); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - const double psnr_lossless = GetMinPsnr(); - EXPECT_GE(psnr_lossless, kMaxPsnr); + + const double min_psnr = GetMinPsnr(); + const double lossless_psnr = + get_lossless_psnr(width, height, bit_depth, false); + EXPECT_EQ(min_psnr, lossless_psnr); } AV1_INSTANTIATE_TEST_SUITE(LosslessTestLarge, GOODQUALITY_TEST_MODES,
diff --git a/test/resize_test.cc b/test/resize_test.cc index f4fc0ee..46a17a1 100644 --- a/test/resize_test.cc +++ b/test/resize_test.cc
@@ -279,8 +279,11 @@ #if WRITE_COMPRESSED_STREAM virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + out_frames_ += pkt->data.frame.frame_count; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT ++out_frames_; - +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT // Write initial file header if first frame. if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_); @@ -372,6 +375,9 @@ if (video->frame() == 0) { encoder->Control(AOME_SET_CPUUSED, cpu_used_); encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } }
diff --git a/test/sad_test.cc b/test/sad_test.cc index 340e1e8..fd21490 100644 --- a/test/sad_test.cc +++ b/test/sad_test.cc
@@ -1495,8 +1495,10 @@ make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 8), make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 8), make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 8), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 8), make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 8), +#endif // !CONFIG_UNEVEN_4WAY make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 8), make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 8), make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 8), @@ -1508,8 +1510,10 @@ make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 10), make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 10), make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 10), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 10), make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 10), +#endif // !CONFIG_UNEVEN_4WAY make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 10), make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 10), make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 10), @@ -1521,8 +1525,10 @@ make_tuple(32, 32, &aom_highbd_sad32x32_sse2, 12), make_tuple(32, 16, &aom_highbd_sad32x16_sse2, 12), make_tuple(16, 32, &aom_highbd_sad16x32_sse2, 12), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 16, &aom_highbd_sad16x16_sse2, 12), make_tuple(16, 8, &aom_highbd_sad16x8_sse2, 12), +#endif // !CONFIG_UNEVEN_4WAY make_tuple(8, 16, &aom_highbd_sad8x16_sse2, 12), make_tuple(8, 8, &aom_highbd_sad8x8_sse2, 12), make_tuple(8, 4, &aom_highbd_sad8x4_sse2, 12), @@ -1543,11 +1549,13 @@ make_tuple(32, 8, &aom_highbd_sad32x8_sse2, 12), make_tuple(8, 32, &aom_highbd_sad8x32_sse2, 12), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 8), - make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 8), make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 10), - make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 10), make_tuple(16, 4, &aom_highbd_sad16x4_sse2, 12), +#endif // !CONFIG_UNEVEN_4WAY + make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 8), + make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 10), make_tuple(4, 16, &aom_highbd_sad4x16_sse2, 12), }; INSTANTIATE_TEST_SUITE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests)); @@ -1559,8 +1567,10 @@ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 8), make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 8), make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 8), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 8), make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 8), +#endif // !CONFIG_UNEVEN_4WAY make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 8), make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 8), make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 8), @@ -1576,8 +1586,10 @@ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 10), make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 10), make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 10), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 10), make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 10), +#endif // !CONFIG_UNEVEN_4WAY make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 10), make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 10), make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 10), @@ -1593,8 +1605,10 @@ make_tuple(32, 32, &aom_highbd_sad_skip_32x32_sse2, 12), make_tuple(32, 16, &aom_highbd_sad_skip_32x16_sse2, 12), make_tuple(16, 32, &aom_highbd_sad_skip_16x32_sse2, 12), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 16, &aom_highbd_sad_skip_16x16_sse2, 12), make_tuple(16, 8, &aom_highbd_sad_skip_16x8_sse2, 12), +#endif // !CONFIG_UNEVEN_4WAY make_tuple(8, 16, &aom_highbd_sad_skip_8x16_sse2, 12), make_tuple(8, 8, &aom_highbd_sad_skip_8x8_sse2, 12), make_tuple(4, 8, &aom_highbd_sad_skip_4x8_sse2, 12), @@ -1614,8 +1628,10 @@ make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 8), make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 8), make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 8), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 8), make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 8), +#endif // !CONFIG_UNEVEN_4WAY make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 8), make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 8), make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 8), @@ -1627,8 +1643,10 @@ make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 10), make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 10), make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 10), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 10), make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 10), +#endif // !CONFIG_UNEVEN_4WAY make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 10), make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 10), make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 10), @@ -1640,8 +1658,10 @@ make_tuple(32, 32, &aom_highbd_sad32x32_avg_sse2, 12), make_tuple(32, 16, &aom_highbd_sad32x16_avg_sse2, 12), make_tuple(16, 32, &aom_highbd_sad16x32_avg_sse2, 12), +#if !CONFIG_UNEVEN_4WAY make_tuple(16, 16, &aom_highbd_sad16x16_avg_sse2, 12), make_tuple(16, 8, &aom_highbd_sad16x8_avg_sse2, 12), +#endif // !CONFIG_UNEVEN_4WAY make_tuple(8, 16, &aom_highbd_sad8x16_avg_sse2, 12), make_tuple(8, 8, &aom_highbd_sad8x8_avg_sse2, 12), make_tuple(8, 4, &aom_highbd_sad8x4_avg_sse2, 12),
diff --git a/test/scan_test.cc b/test/scan_test.cc index cf3c317..e356c50 100644 --- a/test/scan_test.cc +++ b/test/scan_test.cc
@@ -110,7 +110,7 @@ SCAN_MODE scan_mode; TX_CLASS tx_class = tx_type_to_class[(TX_TYPE)tx_type]; if (tx_class == TX_CLASS_2D) { -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC scan_mode = SCAN_MODE_COL_DIAG; #else if (rows == cols) { @@ -120,7 +120,7 @@ } else { scan_mode = SCAN_MODE_COL_DIAG; } -#endif // CONFIG_ATC_COEFCODING +#endif // CONFIG_ATC } else if (tx_class == TX_CLASS_VERT) { scan_mode = SCAN_MODE_ROW_1D; } else {
diff --git a/test/sef_test.cc b/test/sef_test.cc new file mode 100644 index 0000000..e1bfa72 --- /dev/null +++ b/test/sef_test.cc
@@ -0,0 +1,129 @@ +/* + * Copyright (c) 2023, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 3-Clause Clear License + * and the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear + * License was not distributed with this source code in the LICENSE file, you + * can obtain it at aomedia.org/license/software-license/bsd-3-c-c/. If the + * Alliance for Open Media Patent License 1.0 was not distributed with this + * source code in the PATENTS file, you can obtain it at + * aomedia.org/license/patent-license/. + */ + +#include <ostream> + +#include "third_party/googletest/src/googletest/include/gtest/gtest.h" +#include "test/codec_factory.h" +#include "test/encode_test_driver.h" +#include "test/i420_video_source.h" +#include "test/util.h" + +namespace { + +static const struct SEFTestParam { + int enable_frame_output_order_derivation; + double psnr_thresh; +} sefTestParams[] = { + // enable_frame_output_order_derivation = 0 + { 0, 30.0 }, + // enable_frame_output_order_derivation = 1 + { 1, 30.0 }, +}; + +// Compiler may decide to add some padding to the struct above for alignment, +// which the gtest may try to print (on error for example). This would cause +// valgrind to complain that the padding is uninitialized. To avoid that, we +// provide our own function to print the struct. +// This also makes '--gtest_list_tests' output more understandable. +std::ostream &operator<<(std::ostream &os, const SEFTestParam &p) { + os << "SEFTestParam { " + << "frame_output_order_derivation = " + << p.enable_frame_output_order_derivation << ", " + << "psnr_thresh = " << p.psnr_thresh << " }"; + return os; +} + +// Params: encoding mode, rate control mode and SEFTestParam object. +class SEFTest + : public ::libaom_test::CodecTestWith3Params<libaom_test::TestMode, + aom_rc_mode, SEFTestParam>, + public ::libaom_test::EncoderTest { + protected: + SEFTest() + : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), + rc_mode_(GET_PARAM(2)) { + enable_frame_output_order_derivation_ = + GET_PARAM(3).enable_frame_output_order_derivation; + psnr_threshold_ = GET_PARAM(3).psnr_thresh; + } + virtual ~SEFTest() {} + + virtual void SetUp() { + InitializeConfig(); + SetMode(encoding_mode_); + const aom_rational timebase = { 1, 30 }; + cfg_.g_timebase = timebase; + cpu_used_ = 4; + cfg_.rc_end_usage = rc_mode_; + cfg_.g_lag_in_frames = 19; + cfg_.g_threads = 0; + init_flags_ = AOM_CODEC_USE_PSNR; + } + + virtual void BeginPassHook(unsigned int) { + psnr_ = 0.0; + nframes_ = 0; + } + + virtual void PSNRPktHook(const aom_codec_cx_pkt_t *pkt) { + psnr_ += pkt->data.psnr.psnr[0]; + nframes_++; + } + + virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, + ::libaom_test::Encoder *encoder) { + if (video->frame() == 0) { + encoder->Control(AOME_SET_CPUUSED, cpu_used_); + if (rc_mode_ == AOM_Q) { + encoder->Control(AOME_SET_QP, 210); + } + encoder->Control(AOME_SET_ENABLEAUTOALTREF, 1); + encoder->Control(AOME_SET_ARNR_MAXFRAMES, 7); + encoder->Control(AOME_SET_ARNR_STRENGTH, 5); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, + enable_frame_output_order_derivation_); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + } + } + + double GetAveragePsnr() const { + if (nframes_) return psnr_ / nframes_; + return 0.0; + } + + double GetPsnrThreshold() { return psnr_threshold_; } + + ::libaom_test::TestMode encoding_mode_; + aom_rc_mode rc_mode_; + int enable_frame_output_order_derivation_; + double psnr_threshold_; + int cpu_used_; + int nframes_; + double psnr_; +}; + +TEST_P(SEFTest, TestShowExistingFrame) { + libaom_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + cfg_.g_timebase.den, cfg_.g_timebase.num, + 0, 32); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + EXPECT_GT(GetAveragePsnr(), GetPsnrThreshold()) + << "Frame output order derivation = " + << enable_frame_output_order_derivation_ << ", "; +} + +AV1_INSTANTIATE_TEST_SUITE(SEFTest, GOODQUALITY_TEST_MODES, + ::testing::Values(AOM_Q), + ::testing::ValuesIn(sefTestParams)); +} // namespace
diff --git a/test/subgop_test.cc b/test/subgop_test.cc index c77b5c0..be62bd3 100644 --- a/test/subgop_test.cc +++ b/test/subgop_test.cc
@@ -62,6 +62,9 @@ // low delay config without references extern "C" const char subgop_config_str_ld[]; +const int kCpuUsed = 5; +const unsigned int kFrames = 70; + typedef enum { DEFAULT, ENHANCE, @@ -88,7 +91,6 @@ int max_gf_interval; int frame_w; int frame_h; - int cpu_used; int lag_in_frames; int use_fixed_qp_offsets; } SubgopTestParams; @@ -104,69 +106,69 @@ static const SubgopTestParams SubGopTestVectors[] = { // Default subgop config { subgop_config_str_preset_map[DEFAULT].preset_tag, - "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 5, 35, 0 }, + "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 35, 0 }, { subgop_config_str_preset_map[DEFAULT].preset_tag, "desktop1.320_180.yuv", 0, - 16, 320, 180, 5, 35, 0 }, + 16, 320, 180, 35, 0 }, { subgop_config_str_preset_map[DEFAULT].preset_tag, - "pixel_capture_w320h240.yuv", 16, 16, 320, 240, 5, 35, 1 }, + "pixel_capture_w320h240.yuv", 16, 16, 320, 240, 35, 1 }, { subgop_config_str_preset_map[DEFAULT].preset_tag, - "hantro_collage_w352h288.yuv", 0, 32, 352, 288, 5, 35, 0 }, + "hantro_collage_w352h288.yuv", 0, 32, 352, 288, 35, 0 }, { subgop_config_str_preset_map[DEFAULT].preset_tag, - "pixel_capture_w320h240.yuv", 32, 32, 320, 240, 5, 35, 1 }, + "pixel_capture_w320h240.yuv", 32, 32, 320, 240, 35, 1 }, // Enhanced subgop config { subgop_config_str_preset_map[ENHANCE].preset_tag, "niklas_640_480_30.yuv", - 0, 15, 640, 480, 5, 35, 0 }, + 0, 15, 640, 480, 35, 0 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, "paris_352_288_30.y4m", 0, - 6, 352, 288, 5, 35, 0 }, + 6, 352, 288, 35, 0 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, - "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 5, 35, 0 }, + "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 35, 0 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, - "pixel_capture_w320h240.yuv", 0, 12, 320, 240, 5, 35, 0 }, + "pixel_capture_w320h240.yuv", 0, 12, 320, 240, 35, 0 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, "niklas_1280_720_30.y4m", - 0, 11, 1280, 720, 5, 35, 0 }, + 0, 11, 1280, 720, 35, 0 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, "screendata.y4m", 0, 16, - 640, 480, 5, 35, 0 }, + 640, 480, 35, 0 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, - "pixel_capture_w320h240.yuv", 0, 14, 320, 240, 5, 35, 0 }, + "pixel_capture_w320h240.yuv", 0, 14, 320, 240, 35, 0 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, "desktop1.320_180.yuv", 0, - 10, 320, 180, 5, 35, 0 }, + 10, 320, 180, 35, 0 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, "paris_352_288_30.y4m", 0, - 13, 352, 288, 5, 35, 0 }, + 13, 352, 288, 35, 0 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, - "pixel_capture_w320h240.yuv", 0, 8, 320, 240, 5, 35, 0 }, + "pixel_capture_w320h240.yuv", 0, 8, 320, 240, 35, 0 }, // Asymmetric subgop config { subgop_config_str_preset_map[ASYMMETRIC].preset_tag, - "pixel_capture_w320h240.yuv", 0, 16, 320, 240, 5, 35, 0 }, + "pixel_capture_w320h240.yuv", 0, 16, 320, 240, 35, 0 }, { subgop_config_str_preset_map[ASYMMETRIC].preset_tag, "desktop1.320_180.yuv", - 0, 16, 320, 180, 5, 35, 0 }, + 0, 16, 320, 180, 35, 0 }, // Temporal scalable subgop config { subgop_config_str_preset_map[TEMPORAL_SCALABLE].preset_tag, - "pixel_capture_w320h240.yuv", 0, 16, 320, 240, 5, 35, 0 }, + "pixel_capture_w320h240.yuv", 0, 16, 320, 240, 35, 0 }, { subgop_config_str_preset_map[TEMPORAL_SCALABLE].preset_tag, - "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 5, 35, 0 }, + "hantro_collage_w352h288.yuv", 0, 16, 352, 288, 35, 0 }, // Low delay subgop config { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "paris_352_288_30.y4m", - 0, 16, 352, 288, 5, 0, 0 }, + 0, 16, 352, 288, 0, 0 }, { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "desktop1.320_180.yuv", - 16, 16, 320, 180, 5, 0, 1 }, + 16, 16, 320, 180, 0, 1 }, { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "paris_352_288_30.y4m", - 0, 32, 352, 288, 5, 0, 0 }, + 0, 32, 352, 288, 0, 0 }, { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "desktop1.320_180.yuv", - 32, 32, 320, 180, 5, 0, 1 }, + 32, 32, 320, 180, 0, 1 }, // Non-default subgop config { subgop_config_str_nondef[0], "pixel_capture_w320h240.yuv", 0, 4, 320, 240, - 5, 35, 0 }, - { subgop_config_str_nondef[0], "desktop1.320_180.yuv", 0, 5, 320, 180, 5, 35, + 35, 0 }, + { subgop_config_str_nondef[0], "desktop1.320_180.yuv", 0, 5, 320, 180, 35, 0 }, { subgop_config_str_nondef[0], "pixel_capture_w320h240.yuv", 0, 7, 320, 240, - 5, 35, 0 }, + 35, 0 }, { subgop_config_str_nondef[0], "hantro_collage_w352h288.yuv", 0, 9, 352, 288, - 5, 35, 0 }, + 35, 0 }, }; std::ostream &operator<<(std::ostream &os, const SubgopTestParams &test_arg) { @@ -175,8 +177,7 @@ << " min_gf_interval:" << test_arg.min_gf_interval << " max_gf_interval:" << test_arg.max_gf_interval << " frame_width:" << test_arg.frame_w - << " frame_height:" << test_arg.frame_h - << " cpu_used:" << test_arg.cpu_used + << " frame_height:" << test_arg.frame_h << " cpu_used:" << kCpuUsed << " lag_in_frames:" << test_arg.lag_in_frames << " use_fixed_qp_offsets:" << test_arg.use_fixed_qp_offsets << " }"; @@ -229,7 +230,7 @@ virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, ::libaom_test::Encoder *encoder) { if (video->frame() == 0) { - encoder->Control(AOME_SET_CPUUSED, subgop_test_params_.cpu_used); + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); if (rc_end_usage_ == AOM_Q) { encoder->Control(AOME_SET_QP, 210); } @@ -262,7 +263,6 @@ ResetSubgop(); is_first_frame_in_subgop_key_ = 0; frames_from_key_ = 0; - frame_num_ = 0; enable_subgop_stats_ = 1; memset(&subgop_last_step_, 0, sizeof(subgop_last_step_)); } @@ -420,12 +420,18 @@ // Validates frametype(along with temporal filtering), frame coding order bool ValidateSubgopFrametype() { for (int idx = 0; idx < subgop_cfg_ref_->num_steps; idx++) { - EXPECT_EQ(subgop_cfg_ref_->step[idx].disp_frame_idx, - subgop_cfg_test_.step[idx].disp_frame_idx) - << "Error:display_index doesn't match"; - EXPECT_EQ(subgop_cfg_ref_->step[idx].type_code, - subgop_cfg_test_.step[idx].type_code) - << "Error:frame type doesn't match"; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + EXPECT_EQ(subgop_cfg_ref_->step[idx].disp_frame_idx, + subgop_cfg_test_.step[idx].disp_frame_idx) + << "Error:display_index doesn't match"; + EXPECT_EQ(subgop_cfg_ref_->step[idx].type_code, + subgop_cfg_test_.step[idx].type_code) + << "Error:frame type doesn't match"; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + } +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } return 1; } @@ -438,12 +444,18 @@ max_pyramid_level = subgop_cfg_ref_->step[idx].pyr_level; } for (int idx = 0; idx < subgop_cfg_ref_->num_steps; idx++) { - int8_t ref_pyramid_level = - (subgop_cfg_ref_->step[idx].pyr_level == max_pyramid_level) - ? MAX_ARF_LAYERS - : subgop_cfg_ref_->step[idx].pyr_level; - EXPECT_EQ(subgop_cfg_test_.step[idx].pyr_level, ref_pyramid_level) - << "Error:pyramid level doesn't match"; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + int8_t ref_pyramid_level = + (subgop_cfg_ref_->step[idx].pyr_level == max_pyramid_level) + ? MAX_ARF_LAYERS + : subgop_cfg_ref_->step[idx].pyr_level; + EXPECT_EQ(subgop_cfg_test_.step[idx].pyr_level, ref_pyramid_level) + << "Error:pyramid level doesn't match"; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + } +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } } @@ -454,13 +466,19 @@ int pyramid_level; for (int idx = 0; idx < subgop_cfg_ref_->num_steps; idx++) { pyramid_level = subgop_cfg_test_.step[idx].pyr_level; - if (level_qindex[pyramid_level] < 0) { - level_qindex[pyramid_level] = subgop_data_.step[idx].qindex; - } else if (!subgop_data_.step[idx].show_existing_frame && - !subgop_data_.step[idx].is_filtered) { - EXPECT_EQ(level_qindex[pyramid_level], subgop_data_.step[idx].qindex) - << "Error:qindex in a pyramid level doesn't match"; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (level_qindex[pyramid_level] < 0) { + level_qindex[pyramid_level] = subgop_data_.step[idx].qindex; + } else if (!subgop_data_.step[idx].show_existing_frame && + !subgop_data_.step[idx].is_filtered) { + EXPECT_EQ(level_qindex[pyramid_level], subgop_data_.step[idx].qindex) + << "Error:qindex in a pyramid level doesn't match"; + } +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } for (pyramid_level = 1; pyramid_level <= MAX_ARF_LAYERS; pyramid_level++) { if (level_qindex[pyramid_level] >= 0) { @@ -486,13 +504,22 @@ int refresh_frame_flags = curr_step_data->refresh_frame_flags; // Validates user-defined refresh_flag with decoder if (subgop_cfg_ref_->step[idx].refresh != -1 && +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) { +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT !curr_step_data->show_existing_frame) { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT EXPECT_EQ(subgop_cfg_ref_->step[idx].refresh, (int8_t)refresh_frame_flags) << "Error: refresh flag mismatch"; } // Validates reference picture management w.r.t refresh_flags +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (refresh_frame_flags && + subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) { +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT if (refresh_frame_flags && !curr_step_data->show_existing_frame) { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT for (int mask = refresh_frame_flags; mask; mask >>= 1) { if (mask & 1) EXPECT_EQ(curr_step_data->disp_frame_idx, @@ -521,7 +548,11 @@ unsigned int *ref_frame_map = (idx > 0) ? subgop_data_.step[idx - 1].ref_frame_map : subgop_last_step_.ref_frame_map; +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (subgop_cfg_ref_->step[idx].type_code != FRAME_TYPE_INO_SHOWEXISTING) { +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT if (!subgop_data_.step[idx].show_existing_frame) { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT EXPECT_EQ(subgop_cfg_ref_->step[idx].num_references, subgop_cfg_test_.step[idx].num_references) << "Error:Reference frames count doesn't match"; @@ -530,7 +561,13 @@ // config. for (int ref = 0; ref < subgop_cfg_test_.step[idx].num_references; ref++) { +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + if (subgop_cfg_ref_->step[idx].type_code != + FRAME_TYPE_INO_SHOWEXISTING && + subgop_data_.step[idx].is_valid_ref_frame[ref]) { +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT if (subgop_data_.step[idx].is_valid_ref_frame[ref]) { +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT EXPECT_EQ(subgop_cfg_ref_->step[idx].references[ref], subgop_cfg_test_.step[idx].references[ref]) << "Error:Reference frame level doesn't match"; @@ -574,12 +611,21 @@ } } + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + frame_num_in_subgop_ += pkt->data.frame.frame_count; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + (void)pkt; + ++frame_num_in_subgop_; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + } + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, libaom_test::Decoder *decoder) { EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); if (AOM_CODEC_OK != res_dec) return 0; aom_codec_ctx_t *ctx_dec = decoder->GetDecoder(); - frame_num_in_subgop_++; + int is_last_frame_in_subgop = (frame_num_in_subgop_ == subgop_info_.size); if (subgop_info_.is_user_specified || @@ -614,7 +660,6 @@ } ResetSubgop(); } - frame_num_++; return AOM_CODEC_OK == res_dec; } @@ -642,59 +687,62 @@ libaom_test::I420VideoSource video( subgop_test_params_.input_file, subgop_test_params_.frame_w, subgop_test_params_.frame_h, cfg_.g_timebase.den, cfg_.g_timebase.num, - 0, 200); + 0, kFrames); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } else { - ::libaom_test::Y4mVideoSource video(subgop_test_params_.input_file, 0, 200); + ::libaom_test::Y4mVideoSource video(subgop_test_params_.input_file, 0, + kFrames); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } } AV1_INSTANTIATE_TEST_SUITE(SubGopTestLarge, ::testing::ValuesIn(SubGopTestVectors), - ::testing::Values(AOM_Q, AOM_VBR, AOM_CQ, AOM_CBR)); + ::testing::Values(AOM_Q, AOM_VBR + // Disabled to reduce combinations. + //, AOM_CQ, AOM_CBR + )); typedef struct { const char *subgop_str; const char *input_file; int frame_w; int frame_h; - int cpu_used; int lag_in_frames; } SubgopPsnrTestParams; static const SubgopPsnrTestParams SubGopPsnrTestVectors[] = { { subgop_config_str_preset_map[DEFAULT].preset_tag, - "hantro_collage_w352h288.yuv", 352, 288, 3, 35 }, + "hantro_collage_w352h288.yuv", 352, 288, 35 }, { subgop_config_str_preset_map[DEFAULT].preset_tag, "desktop1.320_180.yuv", - 320, 180, 5, 35 }, + 320, 180, 35 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, - "hantro_collage_w352h288.yuv", 352, 288, 3, 35 }, + "hantro_collage_w352h288.yuv", 352, 288, 35 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, - "pixel_capture_w320h240.yuv", 320, 240, 5, 35 }, + "pixel_capture_w320h240.yuv", 320, 240, 35 }, // TODO(any): Enable after fix /* { subgop_config_str_preset_map[ENHANCE].preset_tag, "paris_352_288_30.y4m", - 352, 288, 3, 35 }, + 352, 288, 35 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, "screendata.y4m", 640, - 480, 5, 35 }, + 480, 35 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, "paris_352_288_30.y4m", - 352, 288, 5, 35 }, */ + 352, 288, 35 }, */ { subgop_config_str_preset_map[ASYMMETRIC].preset_tag, - "pixel_capture_w320h240.yuv", 320, 240, 5, 35 }, + "pixel_capture_w320h240.yuv", 320, 240, 35 }, // TODO(any): Enable after fix /* { subgop_config_str_preset_map[ASYMMETRIC].preset_tag, - "desktop1.320_180.yuv", 320, 180, 3, 35 }, */ + "desktop1.320_180.yuv", 320, 180, 35 }, */ { subgop_config_str_preset_map[TEMPORAL_SCALABLE].preset_tag, - "hantro_collage_w352h288.yuv", 352, 288, 5, 35 }, + "hantro_collage_w352h288.yuv", 352, 288, 35 }, // TODO(any): Enable after fix /* { subgop_config_str_preset_map[LOW_DELAY].preset_tag, - "paris_352_288_30.y4m", 352, 288, 5, 0 }, + "paris_352_288_30.y4m", 352, 288, 0 }, { subgop_config_str_preset_map[LOW_DELAY].preset_tag, - "desktop1.320_180.yuv", 320, 180, 3, 0 }, */ + "desktop1.320_180.yuv", 320, 180, 0 }, */ }; std::ostream &operator<<(std::ostream &os, @@ -702,8 +750,7 @@ return os << "SubgopPsnrTestParams { sub_gop_config:" << test_arg.subgop_str << " source_file:" << test_arg.input_file << " frame_width:" << test_arg.frame_w - << " frame_height:" << test_arg.frame_h - << " cpu_used:" << test_arg.cpu_used + << " frame_height:" << test_arg.frame_h << " cpu_used:" << kCpuUsed << " lag_in_frames:" << test_arg.lag_in_frames << " }"; } @@ -748,7 +795,10 @@ virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, ::libaom_test::Encoder *encoder) { if (video->frame() == 0) { - encoder->Control(AOME_SET_CPUUSED, test_params_.cpu_used); + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); + if (rc_end_usage_ == AOM_Q) { + encoder->Control(AOME_SET_QP, 210); + } if (enable_subgop_) encoder->Control(AV1E_SET_SUBGOP_CONFIG_STR, test_params_.subgop_str); } @@ -764,8 +814,7 @@ TEST_P(SubGopPSNRCheckTestLarge, SubGopPSNRCheck) { std::unique_ptr<libaom_test::VideoSource> video; - const unsigned int kFrames = 100; - const double psnr_diff_thresh = 0.3; + const double psnr_diff_thresh = 0.5; if (is_extension_y4m(test_params_.input_file)) { video.reset( new libaom_test::Y4mVideoSource(test_params_.input_file, 0, kFrames)); @@ -800,7 +849,6 @@ const char *input_file; int frame_w; int frame_h; - int cpu_used; int lag_in_frames; int max_gf_interval; } SubGopSwitchTestParams; @@ -810,30 +858,29 @@ return os << "SubGopSwitchTestParams { sub_gop_config:" << test_arg.subgop_str << " source_file:" << test_arg.input_file << " frame_width:" << test_arg.frame_w - << " frame_height:" << test_arg.frame_h - << " cpu_used:" << test_arg.cpu_used + << " frame_height:" << test_arg.frame_h << " cpu_used:" << kCpuUsed << " lag_in_frames:" << test_arg.lag_in_frames << " max_gf_interval:" << test_arg.max_gf_interval << " }"; } static const SubGopSwitchTestParams SubgopSwitchTestVectors[] = { { subgop_config_str_preset_map[DEFAULT].preset_tag, "niklas_640_480_30.yuv", - 640, 480, 5, 35, 16 }, + 640, 480, 35, 16 }, /* TODO(sarahparker/debargha): Enable after adding default 32 subgop config. { subgop_config_str_preset_map[DEFAULT].preset_tag, "niklas_640_480_30.yuv", - 640, 480, 5, 35, 32 },*/ + 640, 480, 35, 32 },*/ { subgop_config_str_preset_map[ENHANCE].preset_tag, "desktop1.320_180.yuv", - 320, 180, 3, 35, 16 }, + 320, 180, 35, 16 }, { subgop_config_str_preset_map[ENHANCE].preset_tag, - "hantro_collage_w352h288.yuv", 352, 288, 5, 35, 16 }, + "hantro_collage_w352h288.yuv", 352, 288, 35, 16 }, { subgop_config_str_preset_map[ASYMMETRIC].preset_tag, - "pixel_capture_w320h240.yuv", 320, 240, 3, 35, 16 }, + "pixel_capture_w320h240.yuv", 320, 240, 35, 16 }, { subgop_config_str_preset_map[TEMPORAL_SCALABLE].preset_tag, - "paris_352_288_30.y4m", 352, 288, 3, 35, 16 }, + "paris_352_288_30.y4m", 352, 288, 35, 16 }, { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "screendata.y4m", 640, - 480, 5, 0, 16 }, + 480, 0, 16 }, { subgop_config_str_preset_map[LOW_DELAY].preset_tag, "screendata.y4m", 640, - 480, 5, 0, 32 }, + 480, 0, 32 }, }; using libaom_test::ACMRandom; @@ -897,18 +944,26 @@ // Set max gf interval if (subgop_str) encoder->Control(AV1E_SET_MAX_GF_INTERVAL, max_gf_interval); + // Keep min gf interval same as max gf interval in most cases, to ensure + // that user-provided subgop config is used. + int min_gf_interval = max_gf_interval; + // In case of no subgop config / enhanced subgop config, test arbitrary gf + // intervals by setting a lower min gf interval. + if (!subgop_str || !strcmp(subgop_str, "enh")) min_gf_interval = 6; + + // Set min gf interval + encoder->Control(AV1E_SET_MIN_GF_INTERVAL, min_gf_interval); + last_subgop_str_ = subgop_str; } virtual void PreEncodeFrameHook(::libaom_test::VideoSource *video, ::libaom_test::Encoder *encoder) { if (video->frame() == 0) { - encoder->Control(AOME_SET_CPUUSED, test_params_.cpu_used); + encoder->Control(AOME_SET_CPUUSED, kCpuUsed); if (rc_end_usage_ == AOM_Q) { encoder->Control(AOME_SET_QP, 210); } - // Set min gf interval - encoder->Control(AV1E_SET_MIN_GF_INTERVAL, 6); set_subgop_config(encoder); } @@ -943,13 +998,20 @@ return 1; } + virtual void FramePktHook(const aom_codec_cx_pkt_t *pkt) { +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + frame_num_in_subgop_ += pkt->data.frame.frame_count; +#else // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + (void)pkt; + ++frame_num_in_subgop_; +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + } + virtual bool HandleDecodeResult(const aom_codec_err_t res_dec, libaom_test::Decoder *decoder) { EXPECT_EQ(AOM_CODEC_OK, res_dec) << decoder->DecodeError(); if (AOM_CODEC_OK != res_dec) return 0; - frame_num_in_subgop_++; - return AOM_CODEC_OK == res_dec; } SubGopSwitchTestParams test_params_; @@ -966,8 +1028,6 @@ TEST_P(SubGopSwitchingTestLarge, SubGopSwitching) { std::unique_ptr<libaom_test::VideoSource> video; - const unsigned int kFrames = 175; - if (is_extension_y4m(test_params_.input_file)) { video.reset( new libaom_test::Y4mVideoSource(test_params_.input_file, 0, kFrames));
diff --git a/test/test.cmake b/test/test.cmake index a402306..7642949 100644 --- a/test/test.cmake +++ b/test/test.cmake
@@ -132,6 +132,7 @@ "${AOM_ROOT}/test/segment_binarization_sync.cc" "${AOM_ROOT}/test/still_picture_test.cc" "${AOM_ROOT}/test/subgop_test.cc" + "${AOM_ROOT}/test/sef_test.cc" "${AOM_ROOT}/test/superframe_test.cc" "${AOM_ROOT}/test/tile_config_test.cc" "${AOM_ROOT}/test/tile_independence_test.cc"
diff --git a/test/tile_config_test.cc b/test/tile_config_test.cc index 2bed6cb..8588c53 100644 --- a/test/tile_config_test.cc +++ b/test/tile_config_test.cc
@@ -324,6 +324,9 @@ tile_group_config_params_.num_tile_cols); encoder->Control(AV1E_SET_TILE_ROWS, tile_group_config_params_.num_tile_rows); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT } }
diff --git a/test/tile_independence_test.cc b/test/tile_independence_test.cc index aca8c8d..9a6689d 100644 --- a/test/tile_independence_test.cc +++ b/test/tile_independence_test.cc
@@ -62,6 +62,9 @@ if (video->frame() == 0) { encoder->Control(AV1E_SET_TILE_COLUMNS, n_tile_cols_); encoder->Control(AV1E_SET_TILE_ROWS, n_tile_rows_); +#if CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT + encoder->Control(AV1E_SET_FRAME_OUTPUT_ORDER_DERIVATION, 0); +#endif // CONFIG_OUTPUT_FRAME_BASED_ON_ORDER_HINT SetCpuUsed(encoder); } else if (video->frame() == 3) { encoder->Control(AV1E_SET_NUM_TG, n_tile_groups_);
diff --git a/test/util.h b/test/util.h index e311207..e0ffe04 100644 --- a/test/util.h +++ b/test/util.h
@@ -22,27 +22,49 @@ // Macros #define GET_PARAM(k) std::get<k>(GetParam()) +// Same as 'aom_sse_to_psnr'. +inline double sse_to_psnr(double samples, double peak, double sse) { + static const double kMinSSE = 0.5; + const bool zero_sse = (sse < kMinSSE); + if (zero_sse) sse = kMinSSE; + assert(sse > 0.0); + double psnr = 10.0 * log10(samples * peak * peak / sse); + if (zero_sse) psnr = ceil(psnr); + return psnr; +} + inline double compute_psnr(const aom_image_t *img1, const aom_image_t *img2) { assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) && (img1->d_h == img2->d_h)); const unsigned int width_y = img1->d_w; const unsigned int height_y = img1->d_h; - unsigned int i, j; - int64_t sqrerr = 0; - for (i = 0; i < height_y; ++i) - for (j = 0; j < width_y; ++j) { - int64_t d = img1->planes[AOM_PLANE_Y][i * img1->stride[AOM_PLANE_Y] + j] - - img2->planes[AOM_PLANE_Y][i * img2->stride[AOM_PLANE_Y] + j]; - sqrerr += d * d; + double sse = 0; + for (unsigned int i = 0; i < height_y; ++i) { + for (unsigned int j = 0; j < width_y; ++j) { + const double d = + img1->planes[AOM_PLANE_Y][i * img1->stride[AOM_PLANE_Y] + j] - + img2->planes[AOM_PLANE_Y][i * img2->stride[AOM_PLANE_Y] + j]; + sse += d * d; } - double mse = static_cast<double>(sqrerr) / (width_y * height_y); - double psnr = 100.0; - if (mse > 0.0) { - psnr = 10 * log10(255.0 * 255.0 / mse); } - return psnr; + return sse_to_psnr(width_y * height_y, 255.0, sse); +} + +// Returns the expected total PSNR for the zero distortion case, based on frame +// dimensions. +// If `is_yuv444` is true: assumes YUV4:4:4 format, otherwise assumes YUV4:2:0. +inline double get_lossless_psnr(unsigned int width, unsigned int height, + unsigned int bit_depth, bool is_yuv444) { +#if CONFIG_AV2CTC_PSNR_PEAK + const double peak = (double)(255 << (bit_depth - 8)); +#else + const double peak = (double)((1 << in_bit_depth) - 1); +#endif // CONFIG_AV2CTC_PSNR_PEAK + const double y_samples = width * height; + const double uv_samples = is_yuv444 ? 2 * y_samples : 2 * y_samples / 4; + return sse_to_psnr(y_samples + uv_samples, peak, 0); } static INLINE double get_time_mark(aom_usec_timer *t) {
diff --git a/test/variance_test.cc b/test/variance_test.cc index df15fcd..4e1942a 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc
@@ -1436,9 +1436,11 @@ const SubpelVarianceParams kArrayHBDSubpelVariance_avx2[] = { #if CONFIG_BLOCK_256X256 // SubpelVarianceParams(8, 8, &aom_highbd_12_sub_pixel_variance256x256_avx2, -// 12), SubpelVarianceParams(8, 7, -// &aom_highbd_12_sub_pixel_variance256x128_avx2, 12), SubpelVarianceParams(7, -// 8, &aom_highbd_12_sub_pixel_variance128x256_avx2, 12), +// 12), +// SubpelVarianceParams(8, 7, &aom_highbd_12_sub_pixel_variance256x128_avx2, +// 12), +// SubpelVarianceParams(7, 8, &aom_highbd_12_sub_pixel_variance128x256_avx2, +// 12), #endif // CONFIG_BLOCK_256X256 // SubpelVarianceParams(7, 7, &aom_highbd_12_sub_pixel_variance128x128_avx2, // 12), @@ -1528,8 +1530,10 @@ SubpelVarianceParams(5, 5, &aom_highbd_12_sub_pixel_variance32x32_sse2, 12), SubpelVarianceParams(5, 4, &aom_highbd_12_sub_pixel_variance32x16_sse2, 12), SubpelVarianceParams(4, 5, &aom_highbd_12_sub_pixel_variance16x32_sse2, 12), +#if !CONFIG_UNEVEN_4WAY SubpelVarianceParams(4, 4, &aom_highbd_12_sub_pixel_variance16x16_sse2, 12), SubpelVarianceParams(4, 3, &aom_highbd_12_sub_pixel_variance16x8_sse2, 12), +#endif // !CONFIG_UNEVEN_4WAY SubpelVarianceParams(3, 4, &aom_highbd_12_sub_pixel_variance8x16_sse2, 12), SubpelVarianceParams(3, 3, &aom_highbd_12_sub_pixel_variance8x8_sse2, 12), SubpelVarianceParams(3, 2, &aom_highbd_12_sub_pixel_variance8x4_sse2, 12), @@ -1542,8 +1546,10 @@ SubpelVarianceParams(5, 5, &aom_highbd_10_sub_pixel_variance32x32_sse2, 10), SubpelVarianceParams(5, 4, &aom_highbd_10_sub_pixel_variance32x16_sse2, 10), SubpelVarianceParams(4, 5, &aom_highbd_10_sub_pixel_variance16x32_sse2, 10), +#if !CONFIG_UNEVEN_4WAY SubpelVarianceParams(4, 4, &aom_highbd_10_sub_pixel_variance16x16_sse2, 10), SubpelVarianceParams(4, 3, &aom_highbd_10_sub_pixel_variance16x8_sse2, 10), +#endif // !CONFIG_UNEVEN_4WAY SubpelVarianceParams(3, 4, &aom_highbd_10_sub_pixel_variance8x16_sse2, 10), SubpelVarianceParams(3, 3, &aom_highbd_10_sub_pixel_variance8x8_sse2, 10), SubpelVarianceParams(3, 2, &aom_highbd_10_sub_pixel_variance8x4_sse2, 10), @@ -1556,8 +1562,10 @@ SubpelVarianceParams(5, 5, &aom_highbd_8_sub_pixel_variance32x32_sse2, 8), SubpelVarianceParams(5, 4, &aom_highbd_8_sub_pixel_variance32x16_sse2, 8), SubpelVarianceParams(4, 5, &aom_highbd_8_sub_pixel_variance16x32_sse2, 8), +#if !CONFIG_UNEVEN_4WAY SubpelVarianceParams(4, 4, &aom_highbd_8_sub_pixel_variance16x16_sse2, 8), SubpelVarianceParams(4, 3, &aom_highbd_8_sub_pixel_variance16x8_sse2, 8), +#endif // !CONFIG_UNEVEN_4WAY SubpelVarianceParams(3, 4, &aom_highbd_8_sub_pixel_variance8x16_sse2, 8), SubpelVarianceParams(3, 3, &aom_highbd_8_sub_pixel_variance8x8_sse2, 8), SubpelVarianceParams(3, 2, &aom_highbd_8_sub_pixel_variance8x4_sse2, 8), @@ -1566,19 +1574,25 @@ SubpelVarianceParams(4, 6, &aom_highbd_12_sub_pixel_variance16x64_sse2, 12), SubpelVarianceParams(5, 3, &aom_highbd_12_sub_pixel_variance32x8_sse2, 12), SubpelVarianceParams(3, 5, &aom_highbd_12_sub_pixel_variance8x32_sse2, 12), +#if !CONFIG_UNEVEN_4WAY SubpelVarianceParams(4, 2, &aom_highbd_12_sub_pixel_variance16x4_sse2, 12), +#endif // !CONFIG_UNEVEN_4WAY // SubpelVarianceParams(2, 4, &aom_highbd_12_sub_pixel_variance4x16_sse2, 12), SubpelVarianceParams(6, 4, &aom_highbd_10_sub_pixel_variance64x16_sse2, 10), SubpelVarianceParams(4, 6, &aom_highbd_10_sub_pixel_variance16x64_sse2, 10), SubpelVarianceParams(5, 3, &aom_highbd_10_sub_pixel_variance32x8_sse2, 10), SubpelVarianceParams(3, 5, &aom_highbd_10_sub_pixel_variance8x32_sse2, 10), +#if !CONFIG_UNEVEN_4WAY SubpelVarianceParams(4, 2, &aom_highbd_10_sub_pixel_variance16x4_sse2, 10), +#endif // !CONFIG_UNEVEN_4WAY // SubpelVarianceParams(2, 4, &aom_highbd_10_sub_pixel_variance4x16_sse2, 10), SubpelVarianceParams(6, 4, &aom_highbd_8_sub_pixel_variance64x16_sse2, 8), SubpelVarianceParams(4, 6, &aom_highbd_8_sub_pixel_variance16x64_sse2, 8), SubpelVarianceParams(5, 3, &aom_highbd_8_sub_pixel_variance32x8_sse2, 8), SubpelVarianceParams(3, 5, &aom_highbd_8_sub_pixel_variance8x32_sse2, 8), +#if !CONFIG_UNEVEN_4WAY SubpelVarianceParams(4, 2, &aom_highbd_8_sub_pixel_variance16x4_sse2, 8), +#endif // !CONFIG_UNEVEN_4WAY // SubpelVarianceParams(2, 4, &aom_highbd_8_sub_pixel_variance4x16_sse2, 8), }; INSTANTIATE_TEST_SUITE_P(SSE2, AvxHBDSubpelVarianceTest, @@ -1597,10 +1611,12 @@ 12), SubpelAvgVarianceParams(4, 5, &aom_highbd_12_sub_pixel_avg_variance16x32_sse2, 12), +#if !CONFIG_UNEVEN_4WAY SubpelAvgVarianceParams(4, 4, &aom_highbd_12_sub_pixel_avg_variance16x16_sse2, 12), SubpelAvgVarianceParams(4, 3, &aom_highbd_12_sub_pixel_avg_variance16x8_sse2, 12), +#endif // !CONFIG_UNEVEN_4WAY SubpelAvgVarianceParams(3, 4, &aom_highbd_12_sub_pixel_avg_variance8x16_sse2, 12), SubpelAvgVarianceParams(3, 3, &aom_highbd_12_sub_pixel_avg_variance8x8_sse2, @@ -1619,10 +1635,12 @@ 10), SubpelAvgVarianceParams(4, 5, &aom_highbd_10_sub_pixel_avg_variance16x32_sse2, 10), +#if !CONFIG_UNEVEN_4WAY SubpelAvgVarianceParams(4, 4, &aom_highbd_10_sub_pixel_avg_variance16x16_sse2, 10), SubpelAvgVarianceParams(4, 3, &aom_highbd_10_sub_pixel_avg_variance16x8_sse2, 10), +#endif // !CONFIG_UNEVEN_4WAY SubpelAvgVarianceParams(3, 4, &aom_highbd_10_sub_pixel_avg_variance8x16_sse2, 10), SubpelAvgVarianceParams(3, 3, &aom_highbd_10_sub_pixel_avg_variance8x8_sse2, @@ -1641,10 +1659,12 @@ 8), SubpelAvgVarianceParams(4, 5, &aom_highbd_8_sub_pixel_avg_variance16x32_sse2, 8), +#if !CONFIG_UNEVEN_4WAY SubpelAvgVarianceParams(4, 4, &aom_highbd_8_sub_pixel_avg_variance16x16_sse2, 8), SubpelAvgVarianceParams(4, 3, &aom_highbd_8_sub_pixel_avg_variance16x8_sse2, 8), +#endif // !CONFIG_UNEVEN_4WAY SubpelAvgVarianceParams(3, 4, &aom_highbd_8_sub_pixel_avg_variance8x16_sse2, 8), SubpelAvgVarianceParams(3, 3, &aom_highbd_8_sub_pixel_avg_variance8x8_sse2, @@ -1660,8 +1680,10 @@ 12), SubpelAvgVarianceParams(3, 5, &aom_highbd_12_sub_pixel_avg_variance8x32_sse2, 12), +#if !CONFIG_UNEVEN_4WAY SubpelAvgVarianceParams(4, 2, &aom_highbd_12_sub_pixel_avg_variance16x4_sse2, 12), +#endif // !CONFIG_UNEVEN_4WAY // SubpelAvgVarianceParams(2, 4, // &aom_highbd_12_sub_pixel_avg_variance4x16_sse2, 12), SubpelAvgVarianceParams(6, 4, &aom_highbd_10_sub_pixel_avg_variance64x16_sse2, @@ -1672,8 +1694,10 @@ 10), SubpelAvgVarianceParams(3, 5, &aom_highbd_10_sub_pixel_avg_variance8x32_sse2, 10), +#if !CONFIG_UNEVEN_4WAY SubpelAvgVarianceParams(4, 2, &aom_highbd_10_sub_pixel_avg_variance16x4_sse2, 10), +#endif // !CONFIG_UNEVEN_4WAY // SubpelAvgVarianceParams(2, 4, // &aom_highbd_10_sub_pixel_avg_variance4x16_sse2, 10), SubpelAvgVarianceParams(6, 4, &aom_highbd_8_sub_pixel_avg_variance64x16_sse2, @@ -1684,8 +1708,10 @@ 8), SubpelAvgVarianceParams(3, 5, &aom_highbd_8_sub_pixel_avg_variance8x32_sse2, 8), +#if !CONFIG_UNEVEN_4WAY SubpelAvgVarianceParams(4, 2, &aom_highbd_8_sub_pixel_avg_variance16x4_sse2, 8), +#endif // !CONFIG_UNEVEN_4WAY // SubpelAvgVarianceParams(2, 4, // &aom_highbd_8_sub_pixel_avg_variance4x16_sse2, 8), };
diff --git a/tools/aom_entropy_optimizer.c b/tools/aom_entropy_optimizer.c index 11b187c..c71fb75 100644 --- a/tools/aom_entropy_optimizer.c +++ b/tools/aom_entropy_optimizer.c
@@ -420,6 +420,15 @@ "default_uv_mode_cdf[CFL_ALLOWED_TYPES][INTRA_MODES]" "[CDF_SIZE(UV_INTRA_MODES)]"); +#if CONFIG_EXT_DIR + /* MRL index */ + cts_each_dim[0] = MRL_INDEX_CONTEXTS; + cts_each_dim[1] = MRL_LINE_NUMBER; + optimize_cdf_table(&fc.mrl_index[0][0], probsfile, 2, cts_each_dim, + "static const aom_cdf_prob default_mrl_index_cdf" + "[MRL_INDEX_CONTEXTS][CDF_SIZE(MRL_LINE_NUMBER)]"); +#endif // CONFIG_EXT_DIR + #if CONFIG_CROSS_CHROMA_TX /* cctx type */ cts_each_dim[0] = EXT_TX_SIZES; @@ -461,6 +470,26 @@ "static aom_cdf_prob default_do_ext_partition_cdf" "[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_" "CONTEXTS][CDF_SIZE(2)]"); +#if CONFIG_UNEVEN_4WAY + cts_each_dim[0] = PARTITION_STRUCTURE_NUM; + cts_each_dim[1] = NUM_RECT_PARTS; + cts_each_dim[2] = PARTITION_CONTEXTS; + cts_each_dim[3] = 2; + optimize_cdf_table(&fc.do_uneven_4way_partition[0][0][0][0], probsfile, 4, + cts_each_dim, + "static aom_cdf_prob default_do_uneven_4way_partition_cdf" + "[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_" + "CONTEXTS][CDF_SIZE(2)]"); + cts_each_dim[0] = PARTITION_STRUCTURE_NUM; + cts_each_dim[1] = NUM_RECT_PARTS; + cts_each_dim[2] = PARTITION_CONTEXTS; + cts_each_dim[3] = NUM_UNEVEN_4WAY_PARTS; + optimize_cdf_table( + &fc.uneven_4way_partition_type[0][0][0][0], probsfile, 4, cts_each_dim, + "static aom_cdf_prob default_uneven_4way_partition_type_cdf" + "[PARTITION_STRUCTURE_NUM][NUM_RECT_PARTS][PARTITION_" + "CONTEXTS][CDF_SIZE(NUM_UNEVEN_4WAY_PARTS)]"); +#endif // CONFIG_UNEVEN_4WAY #else /* block partition */ cts_each_dim[0] = PARTITION_STRUCTURE_NUM; @@ -481,7 +510,7 @@ cts_each_dim[1] = EXT_TX_SIZES; cts_each_dim[2] = INTRA_MODES; cts_each_dim[3] = TX_TYPES; -#if CONFIG_ATC_NEWTXSETS +#if CONFIG_ATC int intra_ext_tx_types_each_ctx[EXT_TX_SETS_INTRA] = { 0, INTRA_TX_SET1 }; optimize_cdf_table_var_modes_4d( &fc.intra_ext_tx[0][0][0][0], probsfile, 4, cts_each_dim, @@ -498,6 +527,19 @@ "[EXT_TX_SIZES][INTRA_MODES][CDF_SIZE(TX_TYPES)]"); #endif +#if CONFIG_ATC_DCTX_ALIGNED + cts_each_dim[0] = EXT_TX_SETS_INTER; + cts_each_dim[1] = EOB_TX_CTXS; + cts_each_dim[2] = EXT_TX_SIZES; + cts_each_dim[3] = TX_TYPES; + int inter_ext_tx_types_each_ctx[EXT_TX_SETS_INTER] = { 0, 16, 12, 2 }; + optimize_cdf_table_var_modes_4d( + &fc.inter_ext_tx[0][0][0][0], probsfile, 4, cts_each_dim, + inter_ext_tx_types_each_ctx, + "static const aom_cdf_prob " + "default_inter_ext_tx_cdf[EXT_TX_SETS_INTER][EOB_TX_CTXS]" + "[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)]"); +#else cts_each_dim[0] = EXT_TX_SETS_INTER; cts_each_dim[1] = EXT_TX_SIZES; cts_each_dim[2] = TX_TYPES; @@ -507,6 +549,7 @@ inter_ext_tx_types_each_ctx, "static const aom_cdf_prob default_inter_ext_tx_cdf[EXT_TX_SETS_INTER]" "[EXT_TX_SIZES][CDF_SIZE(TX_TYPES)]"); +#endif // CONFIG_ATC_DCTX_ALIGNED /* Chroma from Luma */ #if CONFIG_IMPROVED_CFL @@ -699,7 +742,7 @@ "default_bawp_cdf[CDF_SIZE(2)]"); #endif /* Intra/inter flag */ -#if CONFIG_CONTEXT_DERIVATION +#if CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT cts_each_dim[0] = INTRA_INTER_SKIP_TXFM_CONTEXTS; cts_each_dim[1] = INTRA_INTER_CONTEXTS; cts_each_dim[2] = 2; @@ -714,7 +757,7 @@ &fc.intra_inter[0][0], probsfile, 2, cts_each_dim, "static const aom_cdf_prob\n" "default_intra_inter_cdf[INTRA_INTER_CONTEXTS][CDF_SIZE(2)]"); -#endif // CONFIG_CONTEXT_DERIVATION +#endif // CONFIG_CONTEXT_DERIVATION && !CONFIG_SKIP_TXFM_OPT /* Single/comp ref flag */ cts_each_dim[0] = COMP_INTER_CONTEXTS; cts_each_dim[1] = 2; @@ -877,7 +920,7 @@ "static const aom_cdf_prob default_intrabc_cdf[CDF_SIZE(2)]"); #endif // CONFIG_NEW_CONTEXT_MODELING -#if CONFIG_BVP_IMPROVEMENT +#if CONFIG_IBC_BV_IMPROVEMENT /* intrabc mode flag*/ cts_each_dim[0] = 2; optimize_cdf_table(&fc.intrabc_mode[0], probsfile, 1, cts_each_dim, @@ -962,6 +1005,17 @@ "av1_default_idtx_sign_cdfs[TOKEN_CDF_Q_CTXS]" "[IDTX_SIGN_CONTEXTS][CDF_SIZE(2)]"); +#if CONFIG_ATC_DCTX_ALIGNED + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = SIG_COEF_CONTEXTS_BOB; + cts_each_dim[2] = NUM_BASE_LEVELS + 1; + optimize_cdf_table( + &fc.coeff_base_bob_multi[0][0][0], probsfile, 3, cts_each_dim, + "static const aom_cdf_prob av1_default_coeff_base_bob_multi_cdfs" + "[TOKEN_CDF_Q_CTXS][SIG_COEF_CONTEXTS_BOB]" + "[CDF_SIZE(NUM_BASE_LEVELS + 1)]"); +#endif // CONFIG_ATC_DCTX_ALIGNED + #if CONFIG_CONTEXT_DERIVATION cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = V_TXB_SKIP_CONTEXTS; @@ -983,6 +1037,62 @@ "[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS]" "[CDF_SIZE(2)]"); +#if CONFIG_ATC_DCTX_ALIGNED + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = PLANE_TYPES; + cts_each_dim[2] = EOB_MAX_SYMS - 6; + optimize_cdf_table( + &fc.eob_multi16[0][0][0], probsfile, 3, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi16_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 6)]"); + + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = PLANE_TYPES; + cts_each_dim[2] = EOB_MAX_SYMS - 5; + optimize_cdf_table( + &fc.eob_multi32[0][0][0], probsfile, 3, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi32_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 5)]"); + + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = PLANE_TYPES; + cts_each_dim[2] = EOB_MAX_SYMS - 4; + optimize_cdf_table( + &fc.eob_multi64[0][0][0], probsfile, 3, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi64_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 4)]"); + + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = PLANE_TYPES; + cts_each_dim[2] = EOB_MAX_SYMS - 3; + optimize_cdf_table( + &fc.eob_multi128[0][0][0], probsfile, 3, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi128_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 3)]"); + + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = PLANE_TYPES; + cts_each_dim[2] = EOB_MAX_SYMS - 2; + optimize_cdf_table( + &fc.eob_multi256[0][0][0], probsfile, 3, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi256_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 2)]"); + + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = PLANE_TYPES; + cts_each_dim[2] = EOB_MAX_SYMS - 1; + optimize_cdf_table( + &fc.eob_multi512[0][0][0], probsfile, 3, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi512_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS - 1)]"); + + cts_each_dim[0] = TOKEN_CDF_Q_CTXS; + cts_each_dim[1] = PLANE_TYPES; + cts_each_dim[2] = EOB_MAX_SYMS; + optimize_cdf_table(&fc.eob_multi1024[0][0][0], probsfile, 3, cts_each_dim, + "static const aom_cdf_prob av1_default_eob_multi1024_cdfs" + "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][CDF_SIZE(EOB_MAX_SYMS)]"); +#else cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = PLANE_TYPES; cts_each_dim[2] = 2; @@ -1038,8 +1148,9 @@ optimize_cdf_table(&fc.eob_multi1024[0][0][0][0], probsfile, 4, cts_each_dim, "static const aom_cdf_prob av1_default_eob_multi1024_cdfs" "[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][CDF_SIZE(11)]"); +#endif // CONFIG_ATC_DCTX_ALIGNED -#if CONFIG_ATC_COEFCODING +#if CONFIG_ATC cts_each_dim[0] = TOKEN_CDF_Q_CTXS; cts_each_dim[1] = TX_SIZES; cts_each_dim[2] = PLANE_TYPES;
diff --git a/tools/convexhull_framework/bin/AOM_CWG_AS_CTC_v9.7.1.xlsm b/tools/convexhull_framework/bin/AOM_CWG_AS_CTC_v9.7.1.xlsm new file mode 100644 index 0000000..ce27081 --- /dev/null +++ b/tools/convexhull_framework/bin/AOM_CWG_AS_CTC_v9.7.1.xlsm Binary files differ
diff --git a/tools/convexhull_framework/bin/AOM_CWG_Regular_CTCv4_v7.3.2.xlsm b/tools/convexhull_framework/bin/AOM_CWG_Regular_CTCv4_v7.3.2.xlsm new file mode 100644 index 0000000..6311b9b --- /dev/null +++ b/tools/convexhull_framework/bin/AOM_CWG_Regular_CTCv4_v7.3.2.xlsm Binary files differ
diff --git a/tools/convexhull_framework/src/AV2CTCProgress.py b/tools/convexhull_framework/src/AV2CTCProgress.py index 97ef091..f422cb5 100644 --- a/tools/convexhull_framework/src/AV2CTCProgress.py +++ b/tools/convexhull_framework/src/AV2CTCProgress.py
@@ -22,6 +22,7 @@ import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages from CalcBDRate import BD_RATE +from itertools import cycle qtys = ["psnr_y", "psnr_u", "psnr_v", "overall_psnr", "ssim_y", "ms_ssim_y", "vmaf", "vmaf_neg", "psnr_hvs","ciede2k", "apsnr_y", "apsnr_u", @@ -50,51 +51,19 @@ csv_files = { "v1.0.0": { - "AI": "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv", - "LD": "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv", - "RA": "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv", - "Still": "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv", - "AS": "D:\\AV2-CTC\\AV2-CTC-v1.0.0-Final\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv", + "AI": "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv", + "LD": "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv", + "RA": "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv", + "Still": "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv", + "AS": "F:\\Av2-CTC-v4-ToolOffTest\\v1.0-alt\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv", }, - "v1.0.1": + "v4.0.0": { - "AI": "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv", - "LD": "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv", - "RA": "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv", - "Still": "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv", - "AS": "D:\\AV2-CTC\\AV2-CTC-v1.0.1\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv", - }, - "B034": - { - "AI": "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv", - "LD": "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv", - "RA": "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv", - "Still": "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv", - "AS": "D:\\AV2-CTC\\AV2-CTC-B034\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv", - }, - "ext-quant": - { - "AI": "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv", - "LD": "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv", - "RA": "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv", - "Still": "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv", - "AS": "D:\\AV2-CTC\\AV2-CTC-ExtQuant\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv", - }, - "sdp-off": - { - "AI": "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv", - "LD": "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv", - "RA": "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv", - "Still": "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv", - "AS": "D:\\AV2-CTC\\AV2-CTC-SDP-OFF\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv", - }, - "sdp-on": - { - "AI": "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv", - "LD": "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv", - "RA": "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv", - "Still": "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv", - "AS": "D:\\AV2-CTC\\AV2-CTC-SDP-ON\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv", + "AI": "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_AI_Preset_0.csv", + "LD": "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_LD_Preset_0.csv", + "RA": "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_RA_Preset_0.csv", + "Still": "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_STILL_Preset_0.csv", + "AS": "F:\\Av2-CTC-v4-ToolOffTest\\v4.0\\analysis\\rdresult\\RDResults_aom_av2_AS_Preset_0.csv", }, } @@ -108,15 +77,7 @@ formats = { "v1.0.0": ['r', '-', 'o'], - "v1.0.1": ['g', '-', '*'], - "B034": ['k', '-', '^'], - "ext-quant": ['r', '-', '*'], - "sdp-off": ['b', '-', '+'], - "sdp-on": ['r', '-', '<'], - "HM_CloseGOP": ['r', '-', 'o'], - "HM_OpenGOP": ['b', '-', '+'], - "AV1_CloseGOP": ['g', '-', '>'], - "AV1_OpenGOP": ['k', '-', '*'], + "v4.0.0": ['g', '-', '*'], } AS_formats = { @@ -130,6 +91,8 @@ anchor = "v1.0.0" rd_curve_pdf = "rdcurve.pdf" +colors = cycle('bgrycmk') +markers = cycle('o*^+<x') def WriteSheet(csv_file, sht, start_row): csv = open(csv_file, 'rt') @@ -179,7 +142,7 @@ wb.save(xls_file) -def DrawRDCurve(records, anchor, pdf): +def DrawIndividualRDCurve(records, anchor, pdf): with PdfPages(pdf) as export_pdf: for cfg in records[anchor].keys(): videos = records[anchor][cfg].keys() @@ -212,7 +175,7 @@ Int_RDPoints[tag] += int_rdpnts plot_rd_curve(br[res], apsnr[res], "overall_apsnr", res, "bitrate(Kbps)", AS_formats[res][0], AS_formats[res][1], AS_formats[res][2]) - plt.legend() + plt.legend(loc='lower right') plt.grid(True) export_pdf.savefig() plt.close() @@ -224,9 +187,9 @@ lower, upper = convex_hull(Int_RDPoints[tag]) br = [h[0] for h in upper] apsnr = [h[1] for h in upper] - plot_rd_curve(br, apsnr, "overall_apsnr", tag, "bitrate(Kbps)", + plot_rd_curve(br, apsnr, "overall_apsnr(dB)", tag, "bitrate(kbps)", formats[tag][0], formats[tag][1], formats[tag][2]) - plt.legend() + plt.legend(loc='lower right') plt.grid(True) export_pdf.savefig() plt.close() @@ -238,13 +201,102 @@ record = records[tag][cfg][video] br = [record[key].bitrate for key in record.keys()] apsnr = [record[key].overall_apsnr for key in record.keys()] - plot_rd_curve(br, apsnr, "overall_apsnr", tag, "bitrate(Kbps)", + plot_rd_curve(br, apsnr, "overall_apsnr(dB)", tag, "bitrate(kbps)", formats[tag][0], formats[tag][1], formats[tag][2]) - plt.legend() + plt.legend(loc='lower right') plt.grid(True) export_pdf.savefig() plt.close() + +def DrawCombinedRDCurve(records): + pdf = "combined_rdcurve.pdf" + with PdfPages(pdf) as export_pdf: + for tag in csv_files.keys(): + for cfg in csv_files[tag].keys(): + videos = records[tag][cfg].keys() + plt.figure(figsize=(30, 30)) + plt.suptitle("%s : %s" % (tag, cfg)) + + for video in videos: + short_name = video.split('_')[0] + if cfg == "AS": + Int_RDPoints = [] + record = records[tag][cfg][video] + br = {}; + apsnr = {} + for key in record.keys(): + res = re.split('_', key)[0] + if res not in br.keys(): + br[res] = [] + apsnr[res] = [] + br[res].append(record[key].bitrate) + apsnr[res].append(record[key].overall_apsnr) + + for res in br.keys(): + rdpnts = [(brt, qty) for brt, qty in zip(br[res], apsnr[res])] + if UsePCHIPInterpolation: + int_rdpnts = Interpolate_PCHIP(rdpnts, QPs['AS'][:], InterpolatePieces, True) + else: + int_rdpnts = Interpolate_Bilinear(rdpnts, QPs['AS'][:], InterpolatePieces, True) + Int_RDPoints += int_rdpnts + + # draw convex hull + lower, upper = convex_hull(Int_RDPoints) + br = [h[0] for h in upper] + apsnr = [h[1] for h in upper] + plot_rd_curve(br, apsnr, "overall_apsnr(dB)", short_name, "bitrate(kbps)", + next(colors), '-', next(markers)) + else: + record = records[tag][cfg][video] + br = [record[key].bitrate for key in record.keys()] + apsnr = [record[key].overall_apsnr for key in record.keys()] + plot_rd_curve(br, apsnr, "overall_apsnr(dB)", short_name, "bitrate(kbps)", + next(colors), '-', next(markers)) + + plt.legend(loc='lower right') + plt.grid(True) + export_pdf.savefig() + plt.close() + +def DrawCombinedRuntime(records): + pdf = "combined_runtime.pdf" + with PdfPages(pdf) as export_pdf: + for tag in csv_files.keys(): + for cfg in csv_files[tag].keys(): + videos = records[tag][cfg].keys() + plt.figure(figsize=(30, 30)) + plt.suptitle("%s : %s" % (tag, cfg)) + + for video in videos: + short_name = video.split('_')[0] + if cfg == "AS": + record = records[tag][cfg][video] + br = {}; + enc_time = {} + for key in record.keys(): + res = re.split('_', key)[0] + if res not in br.keys(): + br[res] = [] + enc_time[res] = [] + br[res].append(record[key].bitrate) + enc_time[res].append(record[key].enc_time) + + for res in br.keys(): + plot_rd_curve(br[res], enc_time[res], "enc_time(s)", short_name+'_'+res, "bitrate(kbps)", + next(colors), '-', next(markers)) + else: + record = records[tag][cfg][video] + br = [record[key].bitrate for key in record.keys()] + enc_time = [record[key].enc_time for key in record.keys()] + plot_rd_curve(br, enc_time, "enc_time(s)", short_name, "bitrate(kbps)", + next(colors), '-', next(markers)) + + plt.legend(loc='lower right') + plt.grid(True) + export_pdf.savefig() + plt.close() + def GetQty(record, qty): qtys = [] for key in record.keys(): @@ -399,11 +451,13 @@ records[tag][test_cfg] = ParseCSVFile(csv_files[tag][test_cfg]) FillXlsFile() - DrawRDCurve(records, anchor, rd_curve_pdf) + DrawCombinedRDCurve(records) + DrawCombinedRuntime(records) + DrawIndividualRDCurve(records, anchor, rd_curve_pdf) #Calculate BDRate and collect total time for test_cfg in csv_files[anchor].keys(): (bdrate, seq_time, seq_instr) = CalcFullBDRate(test_cfg) #Write output summary xls file - filename = "Summary-HEVC-AV1-%s"%test_cfg + filename = "Summary-AV1-vs-AV2_v4.0_%s"%test_cfg WriteSummaryXlsFile(bdrate, seq_time, seq_instr, filename)
diff --git a/tools/convexhull_framework/src/AV2CTCTest.py b/tools/convexhull_framework/src/AV2CTCTest.py index f8fcc50..42ad0f7 100644 --- a/tools/convexhull_framework/src/AV2CTCTest.py +++ b/tools/convexhull_framework/src/AV2CTCTest.py
@@ -119,13 +119,10 @@ csv_file, perframe_csvfile = GetRDResultCsvFile(EncodeMethod, CodecName, EncodePreset, test_cfg) csv = open(csv_file, 'wt') - # "TestCfg,EncodeMethod,CodecName,EncodePreset,Class,OrigRes,Name,FPS,Bit Depth,CodedRes,QP,Bitrate(kbps)") + # "TestCfg,EncodeMethod,CodecName,EncodePreset,Class,OrigRes,Name,FPS,BitDepth,CodedRes,QP,Bitrate(kbps)") csv.write("TestCfg,EncodeMethod,CodecName,EncodePreset,Class,Name,OrigRes,FPS,"\ - "Bit Depth,CodecRes,QP,") - if (test_cfg == "STILL"): - csv.write("FileSize(bytes)") - else: - csv.write("Bitrate(kbps)") + "BitDepth,CodedRes,QP,") + csv.write("Bitrate(kbps)") for qty in QualityList: csv.write(',' + qty) csv.write(",EncT[s],DecT[s]") @@ -138,7 +135,7 @@ perframe_csv = open(perframe_csvfile, 'wt') perframe_csv.write("TestCfg,EncodeMethod,CodecName,EncodePreset,Class,Name,Res,FPS," \ - "Bit Depth,QP,POC,FrameType,Level,qindex,FrameSize") + "BitDepth,QP,POC,FrameType,Level,qindex,FrameSize") for qty in QualityList: if (qty != "Overall_PSNR" and qty != "Overall_APSNR" and not qty.startswith("APSNR")): perframe_csv.write(',' + qty)
diff --git a/tools/convexhull_framework/src/AV2CTCVideo.py b/tools/convexhull_framework/src/AV2CTCVideo.py index 400dfdf..36cb305 100644 --- a/tools/convexhull_framework/src/AV2CTCVideo.py +++ b/tools/convexhull_framework/src/AV2CTCVideo.py
@@ -255,7 +255,7 @@ "/A1_4k_720p/NocturneDance_1280x720p_10bit_60fps.y4m", "/A1_4k_540p/NocturneDance_960x540p_10bit_60fps.y4m", "/A1_4k_360p/NocturneDance_640x360p_10bit_60fps.y4m"], -"PierSeaSide_3840x2160_2997fps_10bit_420" :["/A1_4k_1440p/PierSeaSide_2560x1440_2997fps_10bit_420_v2.y4m", +"PierSeaSide_3840x2160_2997fps_10bit_420_v2" :["/A1_4k_1440p/PierSeaSide_2560x1440_2997fps_10bit_420_v2.y4m", "/A1_4k_1080p/PierSeaSide_1920x1080_2997fps_10bit_420_v2.y4m", "/A1_4k_720p/PierSeaSide_1280x720_2997fps_10bit_420_v2.y4m", "/A1_4k_540p/PierSeaSide_960x540_2997fps_10bit_420_v2.y4m",
diff --git a/tools/convexhull_framework/src/Config.py b/tools/convexhull_framework/src/Config.py index 47b6ba0..5482c2c 100644 --- a/tools/convexhull_framework/src/Config.py +++ b/tools/convexhull_framework/src/Config.py
@@ -46,8 +46,8 @@ APSNR_V_WEIGHT = 1.0 if CTC_VERSION == '4.0': - CTC_RegularXLSTemplate = os.path.join(BinPath, 'AOM_CWG_Regular_CTCv4_v7.3.xlsm') - CTC_ASXLSTemplate = os.path.join(BinPath, 'AOM_CWG_AS_CTC_v9.8.xlsm') + CTC_RegularXLSTemplate = os.path.join(BinPath, 'AOM_CWG_Regular_CTCv4_v7.3.2.xlsm') + CTC_ASXLSTemplate = os.path.join(BinPath, 'AOM_CWG_AS_CTC_v9.7.1.xlsm') elif CTC_VERSION == '3.0': CTC_RegularXLSTemplate = os.path.join(BinPath, 'AOM_CWG_Regular_CTC_v7.2.xlsm') CTC_ASXLSTemplate = os.path.join(BinPath, 'AOM_CWG_AS_CTC_v9.7.xlsm')
diff --git a/tools/convexhull_framework/src/ConvexHullTest.py b/tools/convexhull_framework/src/ConvexHullTest.py index 72bcf94..abbc931 100755 --- a/tools/convexhull_framework/src/ConvexHullTest.py +++ b/tools/convexhull_framework/src/ConvexHullTest.py
@@ -284,7 +284,7 @@ quality, perframe_vmaf_log = GatherQualityMetrics(reconyuv, Path_QualityLog) qualities.append(quality) - #"TestCfg,EncodeMethod,CodecName,EncodePreset,Class,OrigRes,Name,FPS,Bit Depth,CodedRes,QP,Bitrate(kbps)") + #"TestCfg,EncodeMethod,CodecName,EncodePreset,Class,OrigRes,Name,FPS,BitDepth,CodedRes,QP,Bitrate(kbps)") csv.write("%s,%s,%s,%s,%s,%s,%s,%.4f,%d,%s,%d,%f"% ("AS", EncodeMethod, CodecName, EncodePreset, clip.file_class,contentname, str(clip.width)+"x"+str(clip.height), clip.fps,clip.bit_depth, @@ -460,7 +460,7 @@ csv_file, perframe_csvfile = GetRDResultCsvFile(EncodeMethod, CodecName, EncodePreset, "AS") csv = open(csv_file, "wt") csv.write("TestCfg,EncodeMethod,CodecName,EncodePreset,Class,Name,OrigRes,FPS," \ - "Bit Depth,CodedRes,QP,Bitrate(kbps)") + "BitDepth,CodedRes,QP,Bitrate(kbps)") for qty in QualityList: csv.write(',' + qty) csv.write(",EncT[s],DecT[s]") @@ -473,7 +473,7 @@ perframe_csv = open(perframe_csvfile, 'wt') perframe_csv.write("TestCfg,EncodeMethod,CodecName,EncodePreset,Class,Name,Res,FPS," \ - "Bit Depth,QP,POC,FrameType,Level,qindex,FrameSize") + "BitDepth,QP,POC,FrameType,Level,qindex,FrameSize") for qty in QualityList: if not qty.startswith("APSNR"): perframe_csv.write(',' + qty)
diff --git a/tools/convexhull_framework/src/Utils.py b/tools/convexhull_framework/src/Utils.py index 7d19e34..418ff0f 100755 --- a/tools/convexhull_framework/src/Utils.py +++ b/tools/convexhull_framework/src/Utils.py
@@ -13,6 +13,7 @@ import os import re import sys +from csv import DictReader import subprocess import time import logging @@ -81,6 +82,7 @@ apsnr_u = 0.0 apsnr_v = 0.0 overall_apsnr = 0.0 + cambi = 0.0 enc_time = 0.0 dec_time = 0.0 enc_instr = 0.0 @@ -91,7 +93,7 @@ def __init__(self, test_cfg, encode_mode , codec_name, encode_preset, file_class, file_name, orig_res, fps, bit_depth, coded_res, qp, bitrate, psnr_y, psnr_u, psnr_v, ssim_y, ms_ssim_y, vmaf_y, vmaf_y_neg, psnr_hvs, ciede2k, apsnr_y, apsnr_u, - apsnr_v, enc_time, dec_time, enc_instr, dec_instr, enc_cycle, dec_cycle): + apsnr_v, cambi, enc_time, dec_time, enc_instr, dec_instr, enc_cycle, dec_cycle): self.test_cfg = test_cfg self.encode_mode = encode_mode @@ -124,6 +126,7 @@ APSNR_U_WEIGHT/pow(10, (self.apsnr_u / 10)) + APSNR_V_WEIGHT/pow(10, (self.apsnr_v / 10))) / (APSNR_Y_WEIGHT + APSNR_U_WEIGHT + APSNR_V_WEIGHT))) + self.cambi = float(cambi) self.enc_time = float(enc_time) self.dec_time = float(dec_time) self.enc_instr = float(enc_instr) @@ -133,20 +136,22 @@ def ParseCSVFile(csv_file): records = {} - csv = open(csv_file, 'rt') - for line in csv: - if not line.startswith('TestCfg'): - words = re.split(',', line.strip()) - record = Record(words[0], words[1], words[2], words[3], words[4], words[5], words[6], words[7], words[8], - words[9], words[10], words[11], words[12], words[13], words[14], words[15], words[16], - words[17],words[18], words[19], words[20], words[21], words[22], words[23], words[24], - words[25], words[26],words[27], words[28],words[29]) - key = record.coded_res + "_" + record.qp - if record.file_name not in records.keys(): - records[record.file_name] = {} - records[record.file_name][key] = record + with open(csv_file, 'r') as f: + list_of_data = list(DictReader(f)) + for data in list_of_data: + key = data['CodedRes'] + "_" + data['QP'] + name = data['Name'] + record = Record(data['TestCfg'], data['EncodeMethod'], data['CodecName'], data['EncodePreset'], + data['Class'], data['Name'], data['OrigRes'], data['FPS'], data['BitDepth'], + data['CodedRes'], data['QP'], data['Bitrate(kbps)'], data['PSNR_Y'], data['PSNR_U'], + data['PSNR_V'], data['SSIM_Y(dB)'], data['MS-SSIM_Y(dB)'], data['VMAF_Y'], + data['VMAF_Y-NEG'], data['PSNR-HVS'], data['CIEDE2000'], data['APSNR_Y'], + data['APSNR_U'], data['APSNR_V'], data['CAMBI'], data['EncT[s]'], data['DecT[s]'], + data['EncInstr'], data['DecInstr'], data['EncCycles'], data['DecCycles']) - csv.close() + if name not in records.keys(): + records[name] = {} + records[name][key] = record return records def Cleanfolder(folder):