| /* |
| * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
| * |
| * This source code is subject to the terms of the BSD 2 Clause License and |
| * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
| * was not distributed with this source code in the LICENSE file, you can |
| * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
| * Media Patent License 1.0 was not distributed with this source code in the |
| * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
| */ |
| |
| #include <assert.h> |
| #include <math.h> |
| |
| #include "./aom_dsp_rtcd.h" |
| #include "./av1_rtcd.h" |
| |
| #include "aom_dsp/aom_dsp_common.h" |
| #include "aom_dsp/blend.h" |
| #include "aom_mem/aom_mem.h" |
| #include "aom_ports/mem.h" |
| #include "aom_ports/system_state.h" |
| |
| #if CONFIG_CFL |
| #include "av1/common/cfl.h" |
| #endif |
| #include "av1/common/common.h" |
| #include "av1/common/common_data.h" |
| #include "av1/common/entropy.h" |
| #include "av1/common/entropymode.h" |
| #include "av1/common/idct.h" |
| #include "av1/common/mvref_common.h" |
| #include "av1/common/obmc.h" |
| #include "av1/common/pred_common.h" |
| #include "av1/common/quant_common.h" |
| #include "av1/common/reconinter.h" |
| #include "av1/common/reconintra.h" |
| #include "av1/common/scan.h" |
| #include "av1/common/seg_common.h" |
| #if CONFIG_LV_MAP |
| #include "av1/common/txb_common.h" |
| #endif |
| #include "av1/common/warped_motion.h" |
| |
| #include "av1/encoder/aq_variance.h" |
| #include "av1/encoder/av1_quantize.h" |
| #include "av1/encoder/cost.h" |
| #include "av1/encoder/encodemb.h" |
| #include "av1/encoder/encodemv.h" |
| #include "av1/encoder/encoder.h" |
| #if CONFIG_LV_MAP |
| #include "av1/encoder/encodetxb.h" |
| #endif |
| #include "av1/encoder/hybrid_fwd_txfm.h" |
| #include "av1/encoder/mcomp.h" |
| #include "av1/encoder/palette.h" |
| #include "av1/encoder/ratectrl.h" |
| #include "av1/encoder/rd.h" |
| #include "av1/encoder/rdopt.h" |
| #include "av1/encoder/tokenize.h" |
| #if CONFIG_EXT_TX |
| #include "av1/encoder/tx_prune_model_weights.h" |
| #endif // CONFIG_EXT_TX |
| |
| #if CONFIG_DUAL_FILTER |
| #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS) |
| #if USE_EXTRA_FILTER |
| static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = { |
| { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 }, |
| { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, |
| { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }, |
| }; |
| #else // USE_EXTRA_FILTER |
| static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = { |
| { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 }, |
| { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, |
| }; |
| #endif // USE_EXTRA_FILTER |
| #endif // CONFIG_DUAL_FILTER |
| |
| #define LAST_FRAME_MODE_MASK \ |
| ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \ |
| (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \ |
| (1 << ALTREF_FRAME)) |
| #define LAST2_FRAME_MODE_MASK \ |
| ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \ |
| (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \ |
| (1 << ALTREF_FRAME)) |
| #define LAST3_FRAME_MODE_MASK \ |
| ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \ |
| (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \ |
| (1 << ALTREF_FRAME)) |
| #define GOLDEN_FRAME_MODE_MASK \ |
| ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \ |
| (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \ |
| (1 << ALTREF_FRAME)) |
| #define BWDREF_FRAME_MODE_MASK \ |
| ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \ |
| (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF2_FRAME) | \ |
| (1 << ALTREF_FRAME)) |
| #define ALTREF2_FRAME_MODE_MASK \ |
| ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \ |
| (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \ |
| (1 << ALTREF_FRAME)) |
| #define ALTREF_FRAME_MODE_MASK \ |
| ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \ |
| (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \ |
| (1 << ALTREF2_FRAME)) |
| |
| #if CONFIG_EXT_COMP_REFS |
| #define SECOND_REF_FRAME_MASK \ |
| ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | \ |
| (1 << GOLDEN_FRAME) | (1 << LAST2_FRAME) | 0x01) |
| #else // !CONFIG_EXT_COMP_REFS |
| #define SECOND_REF_FRAME_MASK \ |
| ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | 0x01) |
| #endif // CONFIG_EXT_COMP_REFS |
| |
| #define MIN_EARLY_TERM_INDEX 3 |
| #define NEW_MV_DISCOUNT_FACTOR 8 |
| |
| #if CONFIG_EXT_INTRA |
| #define ANGLE_SKIP_THRESH 10 |
| #define FILTER_FAST_SEARCH 1 |
| #endif // CONFIG_EXT_INTRA |
| |
| // Setting this to 1 will disable trellis optimization within the |
| // transform search. Trellis optimization will still be applied |
| // in the final encode. |
| #ifndef DISABLE_TRELLISQ_SEARCH |
| #define DISABLE_TRELLISQ_SEARCH 0 |
| #endif |
| |
| static const double ADST_FLIP_SVM[8] = { |
| /* vertical */ |
| -6.6623, -2.8062, -3.2531, 3.1671, |
| /* horizontal */ |
| -7.7051, -3.2234, -3.6193, 3.4533 |
| }; |
| |
| typedef struct { |
| PREDICTION_MODE mode; |
| MV_REFERENCE_FRAME ref_frame[2]; |
| } MODE_DEFINITION; |
| |
| typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION; |
| |
| struct rdcost_block_args { |
| const AV1_COMP *cpi; |
| MACROBLOCK *x; |
| ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE]; |
| ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]; |
| RD_STATS rd_stats; |
| int64_t this_rd; |
| int64_t best_rd; |
| int exit_early; |
| int use_fast_coef_costing; |
| }; |
| |
| #define LAST_NEW_MV_INDEX 6 |
| static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { |
| { NEARESTMV, { LAST_FRAME, NONE_FRAME } }, |
| { NEARESTMV, { LAST2_FRAME, NONE_FRAME } }, |
| { NEARESTMV, { LAST3_FRAME, NONE_FRAME } }, |
| { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } }, |
| { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } }, |
| { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } }, |
| { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } }, |
| |
| { DC_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| |
| { NEWMV, { LAST_FRAME, NONE_FRAME } }, |
| { NEWMV, { LAST2_FRAME, NONE_FRAME } }, |
| { NEWMV, { LAST3_FRAME, NONE_FRAME } }, |
| { NEWMV, { BWDREF_FRAME, NONE_FRAME } }, |
| { NEWMV, { ALTREF2_FRAME, NONE_FRAME } }, |
| { NEWMV, { ALTREF_FRAME, NONE_FRAME } }, |
| { NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, |
| |
| { NEARMV, { LAST_FRAME, NONE_FRAME } }, |
| { NEARMV, { LAST2_FRAME, NONE_FRAME } }, |
| { NEARMV, { LAST3_FRAME, NONE_FRAME } }, |
| { NEARMV, { BWDREF_FRAME, NONE_FRAME } }, |
| { NEARMV, { ALTREF2_FRAME, NONE_FRAME } }, |
| { NEARMV, { ALTREF_FRAME, NONE_FRAME } }, |
| { NEARMV, { GOLDEN_FRAME, NONE_FRAME } }, |
| |
| { ZEROMV, { LAST_FRAME, NONE_FRAME } }, |
| { ZEROMV, { LAST2_FRAME, NONE_FRAME } }, |
| { ZEROMV, { LAST3_FRAME, NONE_FRAME } }, |
| { ZEROMV, { BWDREF_FRAME, NONE_FRAME } }, |
| { ZEROMV, { ALTREF2_FRAME, NONE_FRAME } }, |
| { ZEROMV, { GOLDEN_FRAME, NONE_FRAME } }, |
| { ZEROMV, { ALTREF_FRAME, NONE_FRAME } }, |
| |
| // TODO(zoeliu): May need to reconsider the order on the modes to check |
| |
| #if CONFIG_COMPOUND_SINGLEREF |
| // Single ref comp mode |
| { SR_NEAREST_NEARMV, { LAST_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEARMV, { LAST2_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEARMV, { LAST3_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEARMV, { BWDREF_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEARMV, { GOLDEN_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEARMV, { ALTREF_FRAME, NONE_FRAME } }, |
| |
| /* |
| { SR_NEAREST_NEWMV, { LAST_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEWMV, { LAST2_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEWMV, { LAST3_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, |
| { SR_NEAREST_NEWMV, { ALTREF_FRAME, NONE_FRAME } },*/ |
| |
| { SR_NEAR_NEWMV, { LAST_FRAME, NONE_FRAME } }, |
| { SR_NEAR_NEWMV, { LAST2_FRAME, NONE_FRAME } }, |
| { SR_NEAR_NEWMV, { LAST3_FRAME, NONE_FRAME } }, |
| { SR_NEAR_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, |
| { SR_NEAR_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, |
| { SR_NEAR_NEWMV, { ALTREF_FRAME, NONE_FRAME } }, |
| |
| { SR_ZERO_NEWMV, { LAST_FRAME, NONE_FRAME } }, |
| { SR_ZERO_NEWMV, { LAST2_FRAME, NONE_FRAME } }, |
| { SR_ZERO_NEWMV, { LAST3_FRAME, NONE_FRAME } }, |
| { SR_ZERO_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, |
| { SR_ZERO_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, |
| { SR_ZERO_NEWMV, { ALTREF_FRAME, NONE_FRAME } }, |
| |
| { SR_NEW_NEWMV, { LAST_FRAME, NONE_FRAME } }, |
| { SR_NEW_NEWMV, { LAST2_FRAME, NONE_FRAME } }, |
| { SR_NEW_NEWMV, { LAST3_FRAME, NONE_FRAME } }, |
| { SR_NEW_NEWMV, { BWDREF_FRAME, NONE_FRAME } }, |
| { SR_NEW_NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, |
| { SR_NEW_NEWMV, { ALTREF_FRAME, NONE_FRAME } }, |
| #endif // CONFIG_COMPOUND_SINGLEREF |
| |
| { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } }, |
| { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, |
| { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } }, |
| { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, |
| |
| #if CONFIG_EXT_COMP_REFS |
| { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, |
| { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, |
| { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, |
| #endif // CONFIG_EXT_COMP_REFS |
| |
| { PAETH_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| |
| { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| #if CONFIG_SMOOTH_HV |
| { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| #endif // CONFIG_SMOOTH_HV |
| |
| { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, |
| { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } }, |
| { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, |
| { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, |
| { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, |
| { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } }, |
| { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, |
| { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, |
| { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } }, |
| { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } }, |
| { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, |
| { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, |
| { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } }, |
| |
| { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
| { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
| { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
| { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
| { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } }, |
| { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } }, |
| { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, |
| { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } }, |
| { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, |
| { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, |
| { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } }, |
| { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, |
| { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, |
| { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } }, |
| { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, |
| { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, |
| { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } }, |
| { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, |
| { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, |
| { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } }, |
| { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, |
| { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, |
| { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } }, |
| |
| { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, |
| { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, |
| { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, |
| { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, |
| { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, |
| { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, |
| { ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } }, |
| { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } }, |
| { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } }, |
| { ZERO_ZEROMV, { LAST_FRAME, ALTREF2_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } }, |
| { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } }, |
| { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } }, |
| { ZERO_ZEROMV, { LAST2_FRAME, ALTREF2_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } }, |
| { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } }, |
| { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } }, |
| { ZERO_ZEROMV, { LAST3_FRAME, ALTREF2_FRAME } }, |
| |
| { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, |
| { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, |
| { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, |
| { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, |
| { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, |
| |
| #if CONFIG_EXT_COMP_REFS |
| { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } }, |
| { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, |
| { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } }, |
| { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } }, |
| { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } }, |
| { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } }, |
| { ZERO_ZEROMV, { LAST_FRAME, LAST2_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } }, |
| { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, |
| { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } }, |
| { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } }, |
| { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } }, |
| { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } }, |
| { ZERO_ZEROMV, { LAST_FRAME, LAST3_FRAME } }, |
| |
| { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, |
| { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, |
| { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, |
| { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, |
| { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, |
| { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, |
| { ZERO_ZEROMV, { LAST_FRAME, GOLDEN_FRAME } }, |
| |
| { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, |
| { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, |
| { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, |
| { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, |
| { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, |
| { ZERO_ZEROMV, { BWDREF_FRAME, ALTREF_FRAME } }, |
| #endif // CONFIG_EXT_COMP_REFS |
| |
| { H_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| { V_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| { D135_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| { D207_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| { D153_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| { D63_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| { D117_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| { D45_PRED, { INTRA_FRAME, NONE_FRAME } }, |
| |
| { ZEROMV, { LAST_FRAME, INTRA_FRAME } }, |
| { NEARESTMV, { LAST_FRAME, INTRA_FRAME } }, |
| { NEARMV, { LAST_FRAME, INTRA_FRAME } }, |
| { NEWMV, { LAST_FRAME, INTRA_FRAME } }, |
| |
| { ZEROMV, { LAST2_FRAME, INTRA_FRAME } }, |
| { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } }, |
| { NEARMV, { LAST2_FRAME, INTRA_FRAME } }, |
| { NEWMV, { LAST2_FRAME, INTRA_FRAME } }, |
| |
| { ZEROMV, { LAST3_FRAME, INTRA_FRAME } }, |
| { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } }, |
| { NEARMV, { LAST3_FRAME, INTRA_FRAME } }, |
| { NEWMV, { LAST3_FRAME, INTRA_FRAME } }, |
| |
| { ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } }, |
| { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } }, |
| { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } }, |
| { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } }, |
| |
| { ZEROMV, { BWDREF_FRAME, INTRA_FRAME } }, |
| { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } }, |
| { NEARMV, { BWDREF_FRAME, INTRA_FRAME } }, |
| { NEWMV, { BWDREF_FRAME, INTRA_FRAME } }, |
| |
| { ZEROMV, { ALTREF2_FRAME, INTRA_FRAME } }, |
| { NEARESTMV, { ALTREF2_FRAME, INTRA_FRAME } }, |
| { NEARMV, { ALTREF2_FRAME, INTRA_FRAME } }, |
| { NEWMV, { ALTREF2_FRAME, INTRA_FRAME } }, |
| |
| { ZEROMV, { ALTREF_FRAME, INTRA_FRAME } }, |
| { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } }, |
| { NEARMV, { ALTREF_FRAME, INTRA_FRAME } }, |
| { NEWMV, { ALTREF_FRAME, INTRA_FRAME } }, |
| }; |
| |
| static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = { |
| DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED, |
| #if CONFIG_SMOOTH_HV |
| SMOOTH_V_PRED, SMOOTH_H_PRED, |
| #endif // CONFIG_SMOOTH_HV |
| D135_PRED, D207_PRED, D153_PRED, D63_PRED, D117_PRED, D45_PRED, |
| }; |
| |
| #if CONFIG_CFL |
| static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = { |
| UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, |
| UV_V_PRED, UV_SMOOTH_PRED, UV_PAETH_PRED, |
| #if CONFIG_SMOOTH_HV |
| UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED, |
| #endif // CONFIG_SMOOTH_HV |
| UV_D135_PRED, UV_D207_PRED, UV_D153_PRED, |
| UV_D63_PRED, UV_D117_PRED, UV_D45_PRED, |
| }; |
| #else |
| #define uv_rd_search_mode_order intra_rd_search_mode_order |
| #endif // CONFIG_CFL |
| |
| static INLINE int write_uniform_cost(int n, int v) { |
| const int l = get_unsigned_bits(n); |
| const int m = (1 << l) - n; |
| if (l == 0) return 0; |
| if (v < m) |
| return (l - 1) * av1_cost_bit(128, 0); |
| else |
| return l * av1_cost_bit(128, 0); |
| } |
| |
| // constants for prune 1 and prune 2 decision boundaries |
| #define FAST_EXT_TX_CORR_MID 0.0 |
| #define FAST_EXT_TX_EDST_MID 0.1 |
| #define FAST_EXT_TX_CORR_MARGIN 0.5 |
| #define FAST_EXT_TX_EDST_MARGIN 0.3 |
| |
| int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats, |
| BLOCK_SIZE bsize, int64_t ref_best_rd, int fast); |
| int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats, |
| BLOCK_SIZE bsize, int64_t ref_best_rd, int fast); |
| |
| static unsigned pixel_dist_visible_only( |
| const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src, |
| const int src_stride, const uint8_t *dst, const int dst_stride, |
| const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows, |
| int visible_cols) { |
| unsigned sse; |
| |
| if (txb_rows == visible_rows && txb_cols == visible_cols |
| #if CONFIG_RECT_TX_EXT |
| && tx_bsize < BLOCK_SIZES |
| #endif |
| ) { |
| cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); |
| return sse; |
| } |
| #if CONFIG_HIGHBITDEPTH |
| const MACROBLOCKD *xd = &x->e_mbd; |
| |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
| uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride, |
| visible_cols, visible_rows); |
| return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2); |
| } |
| #else |
| (void)x; |
| #endif // CONFIG_HIGHBITDEPTH |
| sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols, |
| visible_rows); |
| return sse; |
| } |
| |
| #if CONFIG_DIST_8X8 |
| static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src, |
| int sstride, int coeff_shift) { |
| uint64_t svar = 0; |
| uint64_t dvar = 0; |
| uint64_t sum_s = 0; |
| uint64_t sum_d = 0; |
| uint64_t sum_s2 = 0; |
| uint64_t sum_d2 = 0; |
| uint64_t sum_sd = 0; |
| uint64_t dist = 0; |
| |
| int i, j; |
| for (i = 0; i < 8; i++) { |
| for (j = 0; j < 8; j++) { |
| sum_s += src[i * sstride + j]; |
| sum_d += dst[i * dstride + j]; |
| sum_s2 += src[i * sstride + j] * src[i * sstride + j]; |
| sum_d2 += dst[i * dstride + j] * dst[i * dstride + j]; |
| sum_sd += src[i * sstride + j] * dst[i * dstride + j]; |
| } |
| } |
| /* Compute the variance -- the calculation cannot go negative. */ |
| svar = sum_s2 - ((sum_s * sum_s + 32) >> 6); |
| dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6); |
| |
| // Tuning of jm's original dering distortion metric used in CDEF tool, |
| // suggested by jm |
| const uint64_t a = 4; |
| const uint64_t b = 2; |
| const uint64_t c1 = (400 * a << 2 * coeff_shift); |
| const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift); |
| |
| dist = |
| (uint64_t)floor(.5 + |
| (sum_d2 + sum_s2 - 2 * sum_sd) * .5 * (svar + dvar + c1) / |
| (sqrt(svar * (double)dvar + c2))); |
| |
| // Calibrate dist to have similar rate for the same QP with MSE only |
| // distortion (as in master branch) |
| dist = (uint64_t)((float)dist * 0.75); |
| |
| return dist; |
| } |
| |
| static int od_compute_var_4x4(uint16_t *x, int stride) { |
| int sum; |
| int s2; |
| int i; |
| sum = 0; |
| s2 = 0; |
| for (i = 0; i < 4; i++) { |
| int j; |
| for (j = 0; j < 4; j++) { |
| int t; |
| |
| t = x[i * stride + j]; |
| sum += t; |
| s2 += t * t; |
| } |
| } |
| |
| return (s2 - (sum * sum >> 4)) >> 4; |
| } |
| |
| /* OD_DIST_LP_MID controls the frequency weighting filter used for computing |
| the distortion. For a value X, the filter is [1 X 1]/(X + 2) and |
| is applied both horizontally and vertically. For X=5, the filter is |
| a good approximation for the OD_QM8_Q4_HVS quantization matrix. */ |
| #define OD_DIST_LP_MID (5) |
| #define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2) |
| |
| static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x, |
| uint16_t *y, od_coeff *e_lp, int stride) { |
| double sum; |
| int min_var; |
| double mean_var; |
| double var_stat; |
| double activity; |
| double calibration; |
| int i; |
| int j; |
| double vardist; |
| |
| vardist = 0; |
| |
| #if 1 |
| min_var = INT_MAX; |
| mean_var = 0; |
| for (i = 0; i < 3; i++) { |
| for (j = 0; j < 3; j++) { |
| int varx; |
| int vary; |
| varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride); |
| vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride); |
| min_var = OD_MINI(min_var, varx); |
| mean_var += 1. / (1 + varx); |
| /* The cast to (double) is to avoid an overflow before the sqrt.*/ |
| vardist += varx - 2 * sqrt(varx * (double)vary) + vary; |
| } |
| } |
| /* We use a different variance statistic depending on whether activity |
| masking is used, since the harmonic mean appeared slightly worse with |
| masking off. The calibration constant just ensures that we preserve the |
| rate compared to activity=1. */ |
| if (use_activity_masking) { |
| calibration = 1.95; |
| var_stat = 9. / mean_var; |
| } else { |
| calibration = 1.62; |
| var_stat = min_var; |
| } |
| /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the |
| activity masking constant. */ |
| activity = calibration * pow(.25 + var_stat, -1. / 6); |
| #else |
| activity = 1; |
| #endif // 1 |
| sum = 0; |
| for (i = 0; i < 8; i++) { |
| for (j = 0; j < 8; j++) |
| sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j]; |
| } |
| /* Normalize the filter to unit DC response. */ |
| sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM * |
| OD_DIST_LP_NORM); |
| return activity * activity * (sum + vardist); |
| } |
| |
| // Note : Inputs x and y are in a pixel domain |
| static double od_compute_dist_common(int activity_masking, uint16_t *x, |
| uint16_t *y, int bsize_w, int bsize_h, |
| int qindex, od_coeff *tmp, |
| od_coeff *e_lp) { |
| int i, j; |
| double sum = 0; |
| const int mid = OD_DIST_LP_MID; |
| |
| for (j = 0; j < bsize_w; j++) { |
| e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j]; |
| e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] + |
| 2 * tmp[(bsize_h - 2) * bsize_w + j]; |
| } |
| for (i = 1; i < bsize_h - 1; i++) { |
| for (j = 0; j < bsize_w; j++) { |
| e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] + |
| tmp[(i - 1) * bsize_w + j] + |
| tmp[(i + 1) * bsize_w + j]; |
| } |
| } |
| for (i = 0; i < bsize_h; i += 8) { |
| for (j = 0; j < bsize_w; j += 8) { |
| sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j], |
| &y[i * bsize_w + j], &e_lp[i * bsize_w + j], |
| bsize_w); |
| } |
| } |
| /* Scale according to linear regression against SSE, for 8x8 blocks. */ |
| if (activity_masking) { |
| sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) + |
| (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0); |
| } else { |
| sum *= qindex >= 128 |
| ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128) |
| : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43) |
| : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43); |
| } |
| |
| return sum; |
| } |
| |
| static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w, |
| int bsize_h, int qindex) { |
| assert(bsize_w >= 8 && bsize_h >= 8); |
| |
| int activity_masking = 0; |
| |
| int i, j; |
| DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]); |
| DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]); |
| DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]); |
| for (i = 0; i < bsize_h; i++) { |
| for (j = 0; j < bsize_w; j++) { |
| e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j]; |
| } |
| } |
| int mid = OD_DIST_LP_MID; |
| for (i = 0; i < bsize_h; i++) { |
| tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1]; |
| tmp[i * bsize_w + bsize_w - 1] = |
| mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2]; |
| for (j = 1; j < bsize_w - 1; j++) { |
| tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] + |
| e[i * bsize_w + j + 1]; |
| } |
| } |
| return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, |
| qindex, tmp, e_lp); |
| } |
| |
| static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w, |
| int bsize_h, int qindex) { |
| assert(bsize_w >= 8 && bsize_h >= 8); |
| |
| int activity_masking = 0; |
| |
| DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]); |
| DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]); |
| DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]); |
| int i, j; |
| for (i = 0; i < bsize_h; i++) { |
| for (j = 0; j < bsize_w; j++) { |
| y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j]; |
| } |
| } |
| int mid = OD_DIST_LP_MID; |
| for (i = 0; i < bsize_h; i++) { |
| tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1]; |
| tmp[i * bsize_w + bsize_w - 1] = |
| mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2]; |
| for (j = 1; j < bsize_w - 1; j++) { |
| tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] + |
| e[i * bsize_w + j + 1]; |
| } |
| } |
| return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, |
| qindex, tmp, e_lp); |
| } |
| |
| int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x, |
| const uint8_t *src, int src_stride, const uint8_t *dst, |
| int dst_stride, const BLOCK_SIZE tx_bsize, int bsw, |
| int bsh, int visible_w, int visible_h, int qindex) { |
| int64_t d = 0; |
| int i, j; |
| const MACROBLOCKD *xd = &x->e_mbd; |
| |
| DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]); |
| DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]); |
| |
| assert(bsw >= 8); |
| assert(bsh >= 8); |
| assert((bsw & 0x07) == 0); |
| assert((bsh & 0x07) == 0); |
| |
| if (x->tune_metric == AOM_TUNE_CDEF_DIST || |
| x->tune_metric == AOM_TUNE_DAALA_DIST) { |
| #if CONFIG_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
| for (j = 0; j < bsh; j++) |
| for (i = 0; i < bsw; i++) |
| orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; |
| |
| if ((bsw == visible_w) && (bsh == visible_h)) { |
| for (j = 0; j < bsh; j++) |
| for (i = 0; i < bsw; i++) |
| rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; |
| } else { |
| for (j = 0; j < visible_h; j++) |
| for (i = 0; i < visible_w; i++) |
| rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; |
| |
| if (visible_w < bsw) { |
| for (j = 0; j < bsh; j++) |
| for (i = visible_w; i < bsw; i++) |
| rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; |
| } |
| |
| if (visible_h < bsh) { |
| for (j = visible_h; j < bsh; j++) |
| for (i = 0; i < bsw; i++) |
| rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; |
| } |
| } |
| } else { |
| #endif |
| for (j = 0; j < bsh; j++) |
| for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; |
| |
| if ((bsw == visible_w) && (bsh == visible_h)) { |
| for (j = 0; j < bsh; j++) |
| for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i]; |
| } else { |
| for (j = 0; j < visible_h; j++) |
| for (i = 0; i < visible_w; i++) |
| rec[j * bsw + i] = dst[j * dst_stride + i]; |
| |
| if (visible_w < bsw) { |
| for (j = 0; j < bsh; j++) |
| for (i = visible_w; i < bsw; i++) |
| rec[j * bsw + i] = src[j * src_stride + i]; |
| } |
| |
| if (visible_h < bsh) { |
| for (j = visible_h; j < bsh; j++) |
| for (i = 0; i < bsw; i++) |
| rec[j * bsw + i] = src[j * src_stride + i]; |
| } |
| } |
| #if CONFIG_HIGHBITDEPTH |
| } |
| #endif // CONFIG_HIGHBITDEPTH |
| } |
| |
| if (x->tune_metric == AOM_TUNE_DAALA_DIST) { |
| d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex); |
| } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) { |
| int coeff_shift = AOMMAX(xd->bd - 8, 0); |
| |
| for (i = 0; i < bsh; i += 8) { |
| for (j = 0; j < bsw; j += 8) { |
| d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j], |
| bsw, coeff_shift); |
| } |
| } |
| #if CONFIG_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
| d = ((uint64_t)d) >> 2 * coeff_shift; |
| #endif |
| } else { |
| // Otherwise, MSE by default |
| d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride, |
| tx_bsize, bsh, bsw, visible_h, visible_w); |
| } |
| |
| return d; |
| } |
| |
| static int64_t av1_dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src, |
| int src_stride, const int16_t *diff, |
| int diff_stride, int bsw, int bsh, |
| int visible_w, int visible_h, int qindex) { |
| int64_t d = 0; |
| int i, j; |
| const MACROBLOCKD *xd = &x->e_mbd; |
| |
| DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]); |
| DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]); |
| |
| assert(bsw >= 8); |
| assert(bsh >= 8); |
| assert((bsw & 0x07) == 0); |
| assert((bsh & 0x07) == 0); |
| |
| if (x->tune_metric == AOM_TUNE_CDEF_DIST || |
| x->tune_metric == AOM_TUNE_DAALA_DIST) { |
| #if CONFIG_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
| for (j = 0; j < bsh; j++) |
| for (i = 0; i < bsw; i++) |
| orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; |
| } else { |
| #endif |
| for (j = 0; j < bsh; j++) |
| for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; |
| #if CONFIG_HIGHBITDEPTH |
| } |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| if ((bsw == visible_w) && (bsh == visible_h)) { |
| for (j = 0; j < bsh; j++) |
| for (i = 0; i < bsw; i++) |
| diff16[j * bsw + i] = diff[j * diff_stride + i]; |
| } else { |
| for (j = 0; j < visible_h; j++) |
| for (i = 0; i < visible_w; i++) |
| diff16[j * bsw + i] = diff[j * diff_stride + i]; |
| |
| if (visible_w < bsw) { |
| for (j = 0; j < bsh; j++) |
| for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0; |
| } |
| |
| if (visible_h < bsh) { |
| for (j = visible_h; j < bsh; j++) |
| for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0; |
| } |
| } |
| } |
| |
| if (x->tune_metric == AOM_TUNE_DAALA_DIST) { |
| d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex); |
| } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) { |
| int coeff_shift = AOMMAX(xd->bd - 8, 0); |
| DECLARE_ALIGNED(16, uint16_t, dst16[MAX_TX_SQUARE]); |
| |
| for (i = 0; i < bsh; i++) { |
| for (j = 0; j < bsw; j++) { |
| dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j]; |
| } |
| } |
| |
| for (i = 0; i < bsh; i += 8) { |
| for (j = 0; j < bsw; j += 8) { |
| d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j], |
| bsw, coeff_shift); |
| } |
| } |
| // Don't scale 'd' for HBD since it will be done by caller side for diff |
| // input |
| } else { |
| // Otherwise, MSE by default |
| d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h); |
| } |
| |
| return d; |
| } |
| #endif // CONFIG_DIST_8X8 |
| |
| static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize, |
| const uint8_t *src, int src_stride, |
| const uint8_t *dst, int dst_stride, |
| double *hordist, double *verdist) { |
| const int bw = block_size_wide[bsize]; |
| const int bh = block_size_high[bsize]; |
| unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; |
| |
| const int f_index = bsize - BLOCK_16X16; |
| if (f_index < 0) { |
| const int w_shift = bw == 8 ? 1 : 2; |
| const int h_shift = bh == 8 ? 1 : 2; |
| #if CONFIG_HIGHBITDEPTH |
| if (cpi->common.use_highbitdepth) { |
| const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); |
| const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); |
| for (int i = 0; i < bh; ++i) |
| for (int j = 0; j < bw; ++j) { |
| const int index = (j >> w_shift) + ((i >> h_shift) << 2); |
| esq[index] += |
| (src16[j + i * src_stride] - dst16[j + i * dst_stride]) * |
| (src16[j + i * src_stride] - dst16[j + i * dst_stride]); |
| } |
| } else { |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| for (int i = 0; i < bh; ++i) |
| for (int j = 0; j < bw; ++j) { |
| const int index = (j >> w_shift) + ((i >> h_shift) << 2); |
| esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) * |
| (src[j + i * src_stride] - dst[j + i * dst_stride]); |
| } |
| #if CONFIG_HIGHBITDEPTH |
| } |
| #endif // CONFIG_HIGHBITDEPTH |
| } else { |
| cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]); |
| cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, |
| &esq[1]); |
| cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, |
| &esq[2]); |
| cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, |
| dst_stride, &esq[3]); |
| src += bh / 4 * src_stride; |
| dst += bh / 4 * dst_stride; |
| |
| cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]); |
| cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, |
| &esq[5]); |
| cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, |
| &esq[6]); |
| cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, |
| dst_stride, &esq[7]); |
| src += bh / 4 * src_stride; |
| dst += bh / 4 * dst_stride; |
| |
| cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]); |
| cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, |
| &esq[9]); |
| cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, |
| &esq[10]); |
| cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, |
| dst_stride, &esq[11]); |
| src += bh / 4 * src_stride; |
| dst += bh / 4 * dst_stride; |
| |
| cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]); |
| cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, |
| &esq[13]); |
| cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, |
| &esq[14]); |
| cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, |
| dst_stride, &esq[15]); |
| } |
| |
| double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + |
| esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + |
| esq[12] + esq[13] + esq[14] + esq[15]; |
| if (total > 0) { |
| const double e_recip = 1.0 / total; |
| hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip; |
| hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip; |
| hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip; |
| verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip; |
| verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip; |
| verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip; |
| } else { |
| hordist[0] = verdist[0] = 0.25; |
| hordist[1] = verdist[1] = 0.25; |
| hordist[2] = verdist[2] = 0.25; |
| } |
| } |
| |
| static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize, |
| const uint8_t *src, int src_stride, |
| const uint8_t *dst, int dst_stride) { |
| int prune_bitmask = 0; |
| double svm_proj_h = 0, svm_proj_v = 0; |
| double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 }; |
| get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride, |
| hdist, vdist); |
| |
| svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] + |
| vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3]; |
| svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] + |
| hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7]; |
| if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN) |
| prune_bitmask |= 1 << FLIPADST_1D; |
| else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN) |
| prune_bitmask |= 1 << ADST_1D; |
| |
| if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN) |
| prune_bitmask |= 1 << (FLIPADST_1D + 8); |
| else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN) |
| prune_bitmask |= 1 << (ADST_1D + 8); |
| |
| return prune_bitmask; |
| } |
| |
| #if CONFIG_EXT_TX |
| static void get_horver_correlation(const int16_t *diff, int stride, int w, |
| int h, double *hcorr, double *vcorr) { |
| // Returns hor/ver correlation coefficient |
| const int num = (h - 1) * (w - 1); |
| double num_r; |
| int i, j; |
| int64_t xy_sum = 0, xz_sum = 0; |
| int64_t x_sum = 0, y_sum = 0, z_sum = 0; |
| int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0; |
| double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n; |
| *hcorr = *vcorr = 1; |
| |
| assert(num > 0); |
| num_r = 1.0 / num; |
| for (i = 1; i < h; ++i) { |
| for (j = 1; j < w; ++j) { |
| const int16_t x = diff[i * stride + j]; |
| const int16_t y = diff[i * stride + j - 1]; |
| const int16_t z = diff[(i - 1) * stride + j]; |
| xy_sum += x * y; |
| xz_sum += x * z; |
| x_sum += x; |
| y_sum += y; |
| z_sum += z; |
| x2_sum += x * x; |
| y2_sum += y * y; |
| z2_sum += z * z; |
| } |
| } |
| x_var_n = x2_sum - (x_sum * x_sum) * num_r; |
| y_var_n = y2_sum - (y_sum * y_sum) * num_r; |
| z_var_n = z2_sum - (z_sum * z_sum) * num_r; |
| xy_var_n = xy_sum - (x_sum * y_sum) * num_r; |
| xz_var_n = xz_sum - (x_sum * z_sum) * num_r; |
| if (x_var_n > 0 && y_var_n > 0) { |
| *hcorr = xy_var_n / sqrt(x_var_n * y_var_n); |
| *hcorr = *hcorr < 0 ? 0 : *hcorr; |
| } |
| if (x_var_n > 0 && z_var_n > 0) { |
| *vcorr = xz_var_n / sqrt(x_var_n * z_var_n); |
| *vcorr = *vcorr < 0 ? 0 : *vcorr; |
| } |
| } |
| |
| int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) { |
| double hcorr, vcorr; |
| int prune_bitmask = 0; |
| get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr); |
| |
| if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN) |
| prune_bitmask |= 1 << IDTX_1D; |
| else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN) |
| prune_bitmask |= 1 << DCT_1D; |
| |
| if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN) |
| prune_bitmask |= 1 << (IDTX_1D + 8); |
| else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN) |
| prune_bitmask |= 1 << (DCT_1D + 8); |
| return prune_bitmask; |
| } |
| |
| // Performance drop: 0.5%, Speed improvement: 24% |
| static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize, |
| MACROBLOCK *x, const MACROBLOCKD *xd, |
| int adst_flipadst, int dct_idtx) { |
| int prune = 0; |
| |
| if (adst_flipadst) { |
| const struct macroblock_plane *const p = &x->plane[0]; |
| const struct macroblockd_plane *const pd = &xd->plane[0]; |
| prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, |
| pd->dst.buf, pd->dst.stride); |
| } |
| if (dct_idtx) { |
| av1_subtract_plane(x, bsize, 0); |
| const struct macroblock_plane *const p = &x->plane[0]; |
| const int bw = 4 << (b_width_log2_lookup[bsize]); |
| const int bh = 4 << (b_height_log2_lookup[bsize]); |
| prune |= dct_vs_idtx(p->src_diff, bw, bw, bh); |
| } |
| |
| return prune; |
| } |
| #endif // CONFIG_EXT_TX |
| |
| // Performance drop: 0.3%, Speed improvement: 5% |
| static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize, |
| const MACROBLOCK *x, const MACROBLOCKD *xd) { |
| const struct macroblock_plane *const p = &x->plane[0]; |
| const struct macroblockd_plane *const pd = &xd->plane[0]; |
| return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf, |
| pd->dst.stride); |
| } |
| |
| #if CONFIG_EXT_TX |
| // 1D Transforms used in inter set, this needs to be changed if |
| // ext_tx_used_inter is changed |
| static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = { |
| { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 }, |
| #if CONFIG_MRC_TX |
| { 1, 0, 0, 1 }, |
| #endif // CONFIG_MRC_TX |
| }; |
| |
| static void get_energy_distribution_finer(const int16_t *diff, int stride, |
| int bw, int bh, float *hordist, |
| float *verdist) { |
| // First compute downscaled block energy values (esq); downscale factors |
| // are defined by w_shift and h_shift. |
| unsigned int esq[256]; |
| const int w_shift = bw <= 8 ? 0 : 1; |
| const int h_shift = bh <= 8 ? 0 : 1; |
| const int esq_w = bw <= 8 ? bw : bw / 2; |
| const int esq_h = bh <= 8 ? bh : bh / 2; |
| const int esq_sz = esq_w * esq_h; |
| int i, j; |
| memset(esq, 0, esq_sz * sizeof(esq[0])); |
| for (i = 0; i < bh; i++) { |
| unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w; |
| const int16_t *cur_diff_row = diff + i * stride; |
| for (j = 0; j < bw; j++) { |
| cur_esq_row[j >> w_shift] += cur_diff_row[j] * cur_diff_row[j]; |
| } |
| } |
| |
| uint64_t total = 0; |
| for (i = 0; i < esq_sz; i++) total += esq[i]; |
| |
| // Output hordist and verdist arrays are normalized 1D projections of esq |
| if (total == 0) { |
| float hor_val = 1.0f / esq_w; |
| for (j = 0; j < esq_w - 1; j++) hordist[j] = hor_val; |
| float ver_val = 1.0f / esq_h; |
| for (i = 0; i < esq_h - 1; i++) verdist[i] = ver_val; |
| return; |
| } |
| |
| const float e_recip = 1.0f / (float)total; |
| memset(hordist, 0, (esq_w - 1) * sizeof(hordist[0])); |
| memset(verdist, 0, (esq_h - 1) * sizeof(verdist[0])); |
| const unsigned int *cur_esq_row; |
| for (i = 0; i < esq_h - 1; i++) { |
| cur_esq_row = esq + i * esq_w; |
| for (j = 0; j < esq_w - 1; j++) { |
| hordist[j] += (float)cur_esq_row[j]; |
| verdist[i] += (float)cur_esq_row[j]; |
| } |
| verdist[i] += (float)cur_esq_row[j]; |
| } |
| cur_esq_row = esq + i * esq_w; |
| for (j = 0; j < esq_w - 1; j++) hordist[j] += (float)cur_esq_row[j]; |
| |
| for (j = 0; j < esq_w - 1; j++) hordist[j] *= e_recip; |
| for (i = 0; i < esq_h - 1; i++) verdist[i] *= e_recip; |
| } |
| |
| // Similar to get_horver_correlation, but also takes into account first |
| // row/column, when computing horizontal/vertical correlation. |
| static void get_horver_correlation_full(const int16_t *diff, int stride, int w, |
| int h, float *hcorr, float *vcorr) { |
| const float num_hor = (float)(h * (w - 1)); |
| const float num_ver = (float)((h - 1) * w); |
| int i, j; |
| |
| // The following notation is used: |
| // x - current pixel |
| // y - left neighbor pixel |
| // z - top neighbor pixel |
| int64_t xy_sum = 0, xz_sum = 0; |
| int64_t xhor_sum = 0, xver_sum = 0, y_sum = 0, z_sum = 0; |
| int64_t x2hor_sum = 0, x2ver_sum = 0, y2_sum = 0, z2_sum = 0; |
| |
| int16_t x, y, z; |
| for (j = 1; j < w; ++j) { |
| x = diff[j]; |
| y = diff[j - 1]; |
| xy_sum += x * y; |
| xhor_sum += x; |
| y_sum += y; |
| x2hor_sum += x * x; |
| y2_sum += y * y; |
| } |
| for (i = 1; i < h; ++i) { |
| x = diff[i * stride]; |
| z = diff[(i - 1) * stride]; |
| xz_sum += x * z; |
| xver_sum += x; |
| z_sum += z; |
| x2ver_sum += x * x; |
| z2_sum += z * z; |
| for (j = 1; j < w; ++j) { |
| x = diff[i * stride + j]; |
| y = diff[i * stride + j - 1]; |
| z = diff[(i - 1) * stride + j]; |
| xy_sum += x * y; |
| xz_sum += x * z; |
| xhor_sum += x; |
| xver_sum += x; |
| y_sum += y; |
| z_sum += z; |
| x2hor_sum += x * x; |
| x2ver_sum += x * x; |
| y2_sum += y * y; |
| z2_sum += z * z; |
| } |
| } |
| const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor; |
| const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor; |
| const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor; |
| const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver; |
| const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver; |
| const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver; |
| |
| *hcorr = *vcorr = 1; |
| if (xhor_var_n > 0 && y_var_n > 0) { |
| *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n); |
| *hcorr = *hcorr < 0 ? 0 : *hcorr; |
| } |
| if (xver_var_n > 0 && z_var_n > 0) { |
| *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n); |
| *vcorr = *vcorr < 0 ? 0 : *vcorr; |
| } |
| } |
| |
| // Performs a forward pass through a neural network with 2 fully-connected |
| // layers, assuming ReLU as activation function. Number of output neurons |
| // is always equal to 4. |
| // fc1, fc2 - weight matrices of the respective layers. |
| // b1, b2 - bias vectors of the respective layers. |
| static void compute_1D_scores(float *features, int num_features, |
| const float *fc1, const float *b1, |
| const float *fc2, const float *b2, |
| int num_hidden_units, float *dst_scores) { |
| assert(num_hidden_units <= 32); |
| float hidden_layer[32]; |
| for (int i = 0; i < num_hidden_units; i++) { |
| const float *cur_coef = fc1 + i * num_features; |
| hidden_layer[i] = 0.0f; |
| for (int j = 0; j < num_features; j++) |
| hidden_layer[i] += cur_coef[j] * features[j]; |
| hidden_layer[i] = AOMMAX(hidden_layer[i] + b1[i], 0.0f); |
| } |
| for (int i = 0; i < 4; i++) { |
| const float *cur_coef = fc2 + i * num_hidden_units; |
| dst_scores[i] = 0.0f; |
| for (int j = 0; j < num_hidden_units; j++) |
| dst_scores[i] += cur_coef[j] * hidden_layer[j]; |
| dst_scores[i] += b2[i]; |
| } |
| } |
| |
| // Transforms raw scores into a probability distribution across 16 TX types |
| static void score_2D_transform_pow8(float *scores_2D, float shift) { |
| float sum = 0.0f; |
| int i; |
| |
| for (i = 0; i < 16; i++) { |
| float v, v2, v4; |
| v = AOMMAX(scores_2D[i] + shift, 0.0f); |
| v2 = v * v; |
| v4 = v2 * v2; |
| scores_2D[i] = v4 * v4; |
| sum += scores_2D[i]; |
| } |
| for (i = 0; i < 16; i++) scores_2D[i] /= sum; |
| } |
| |
| static int prune_tx_types_2D(BLOCK_SIZE bsize, const MACROBLOCK *x, |
| int tx_set_type, int pruning_aggressiveness) { |
| if (bsize >= BLOCK_32X32) return 0; |
| const struct macroblock_plane *const p = &x->plane[0]; |
| const int bidx = AOMMAX(bsize - BLOCK_4X4, 0); |
| const float score_thresh = |
| av1_prune_2D_adaptive_thresholds[bidx][pruning_aggressiveness - 1]; |
| |
| float hfeatures[16], vfeatures[16]; |
| float hscores[4], vscores[4]; |
| float scores_2D[16]; |
| int tx_type_table_2D[16] = { |
| DCT_DCT, DCT_ADST, DCT_FLIPADST, V_DCT, |
| ADST_DCT, ADST_ADST, ADST_FLIPADST, V_ADST, |
| FLIPADST_DCT, FLIPADST_ADST, FLIPADST_FLIPADST, V_FLIPADST, |
| H_DCT, H_ADST, H_FLIPADST, IDTX |
| }; |
| const int bw = block_size_wide[bsize], bh = block_size_high[bsize]; |
| const int hfeatures_num = bw <= 8 ? bw : bw / 2; |
| const int vfeatures_num = bh <= 8 ? bh : bh / 2; |
| assert(hfeatures_num <= 16); |
| assert(vfeatures_num <= 16); |
| |
| get_energy_distribution_finer(p->src_diff, bw, bw, bh, hfeatures, vfeatures); |
| get_horver_correlation_full(p->src_diff, bw, bw, bh, |
| &hfeatures[hfeatures_num - 1], |
| &vfeatures[vfeatures_num - 1]); |
| |
| const float *fc1_hor = av1_prune_2D_learned_weights_hor[bidx]; |
| const float *b1_hor = |
| fc1_hor + av1_prune_2D_num_hidden_units_hor[bidx] * hfeatures_num; |
| const float *fc2_hor = b1_hor + av1_prune_2D_num_hidden_units_hor[bidx]; |
| const float *b2_hor = fc2_hor + av1_prune_2D_num_hidden_units_hor[bidx] * 4; |
| compute_1D_scores(hfeatures, hfeatures_num, fc1_hor, b1_hor, fc2_hor, b2_hor, |
| av1_prune_2D_num_hidden_units_hor[bidx], hscores); |
| |
| const float *fc1_ver = av1_prune_2D_learned_weights_ver[bidx]; |
| const float *b1_ver = |
| fc1_ver + av1_prune_2D_num_hidden_units_ver[bidx] * vfeatures_num; |
| const float *fc2_ver = b1_ver + av1_prune_2D_num_hidden_units_ver[bidx]; |
| const float *b2_ver = fc2_ver + av1_prune_2D_num_hidden_units_ver[bidx] * 4; |
| compute_1D_scores(vfeatures, vfeatures_num, fc1_ver, b1_ver, fc2_ver, b2_ver, |
| av1_prune_2D_num_hidden_units_ver[bidx], vscores); |
| |
| float score_2D_average = 0.0f; |
| for (int i = 0; i < 4; i++) { |
| float *cur_scores_2D = scores_2D + i * 4; |
| cur_scores_2D[0] = vscores[i] * hscores[0]; |
| cur_scores_2D[1] = vscores[i] * hscores[1]; |
| cur_scores_2D[2] = vscores[i] * hscores[2]; |
| cur_scores_2D[3] = vscores[i] * hscores[3]; |
| score_2D_average += cur_scores_2D[0] + cur_scores_2D[1] + cur_scores_2D[2] + |
| cur_scores_2D[3]; |
| } |
| score_2D_average /= 16; |
| score_2D_transform_pow8(scores_2D, (20 - score_2D_average)); |
| |
| // Always keep the TX type with the highest score, prune all others with |
| // score below score_thresh. |
| int max_score_i = 0; |
| float max_score = 0.0f; |
| for (int i = 0; i < 16; i++) { |
| if (scores_2D[i] > max_score && |
| av1_ext_tx_used[tx_set_type][tx_type_table_2D[i]]) { |
| max_score = scores_2D[i]; |
| max_score_i = i; |
| } |
| } |
| |
| int prune_bitmask = 0; |
| for (int i = 0; i < 16; i++) { |
| if (scores_2D[i] < score_thresh && i != max_score_i) |
| prune_bitmask |= (1 << tx_type_table_2D[i]); |
| } |
| |
| return prune_bitmask; |
| } |
| #endif // CONFIG_EXT_TX |
| |
| static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, |
| const MACROBLOCKD *const xd, int tx_set_type) { |
| #if CONFIG_EXT_TX |
| int tx_set = ext_tx_set_index[1][tx_set_type]; |
| assert(tx_set >= 0); |
| const int *tx_set_1D = ext_tx_used_inter_1D[tx_set]; |
| #else |
| const int tx_set_1D[TX_TYPES_1D] = { 0 }; |
| (void)tx_set_type; |
| #endif // CONFIG_EXT_TX |
| |
| switch (cpi->sf.tx_type_search.prune_mode) { |
| case NO_PRUNE: return 0; break; |
| case PRUNE_ONE: |
| if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) return 0; |
| return prune_one_for_sby(cpi, bsize, x, xd); |
| break; |
| #if CONFIG_EXT_TX |
| case PRUNE_TWO: |
| if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) { |
| if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0; |
| return prune_two_for_sby(cpi, bsize, x, xd, 0, 1); |
| } |
| if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) |
| return prune_two_for_sby(cpi, bsize, x, xd, 1, 0); |
| return prune_two_for_sby(cpi, bsize, x, xd, 1, 1); |
| break; |
| case PRUNE_2D_ACCURATE: |
| if (tx_set_type == EXT_TX_SET_ALL16) |
| return prune_tx_types_2D(bsize, x, tx_set_type, 6); |
| else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT) |
| return prune_tx_types_2D(bsize, x, tx_set_type, 4); |
| else |
| return 0; |
| break; |
| case PRUNE_2D_FAST: |
| if (tx_set_type == EXT_TX_SET_ALL16) |
| return prune_tx_types_2D(bsize, x, tx_set_type, 10); |
| else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT) |
| return prune_tx_types_2D(bsize, x, tx_set_type, 7); |
| else |
| return 0; |
| break; |
| #endif // CONFIG_EXT_TX |
| } |
| assert(0); |
| return 0; |
| } |
| |
| static int do_tx_type_search(TX_TYPE tx_type, int prune, |
| TX_TYPE_PRUNE_MODE mode) { |
| // TODO(sarahparker) implement for non ext tx |
| #if CONFIG_EXT_TX |
| if (mode >= PRUNE_2D_ACCURATE) { |
| return !((prune >> tx_type) & 1); |
| } else { |
| return !(((prune >> vtx_tab[tx_type]) & 1) | |
| ((prune >> (htx_tab[tx_type] + 8)) & 1)); |
| } |
| #else |
| // temporary to avoid compiler warnings |
| (void)vtx_tab; |
| (void)htx_tab; |
| (void)tx_type; |
| (void)prune; |
| (void)mode; |
| return 1; |
| #endif // CONFIG_EXT_TX |
| } |
| |
| static void model_rd_from_sse(const AV1_COMP *const cpi, |
| const MACROBLOCKD *const xd, BLOCK_SIZE bsize, |
| int plane, int64_t sse, int *rate, |
| int64_t *dist) { |
| const struct macroblockd_plane *const pd = &xd->plane[plane]; |
| const int dequant_shift = |
| #if CONFIG_HIGHBITDEPTH |
| (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : |
| #endif // CONFIG_HIGHBITDEPTH |
| 3; |
| |
| // Fast approximate the modelling function. |
| if (cpi->sf.simple_model_rd_from_var) { |
| const int64_t square_error = sse; |
| int quantizer = (pd->dequant[1] >> dequant_shift); |
| |
| if (quantizer < 120) |
| *rate = (int)((square_error * (280 - quantizer)) >> |
| (16 - AV1_PROB_COST_SHIFT)); |
| else |
| *rate = 0; |
| *dist = (square_error * quantizer) >> 8; |
| } else { |
| av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize], |
| pd->dequant[1] >> dequant_shift, rate, dist); |
| } |
| |
| *dist <<= 4; |
| } |
| |
| static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize, |
| MACROBLOCK *x, MACROBLOCKD *xd, int plane_from, |
| int plane_to, int *out_rate_sum, |
| int64_t *out_dist_sum, int *skip_txfm_sb, |
| int64_t *skip_sse_sb) { |
| // Note our transform coeffs are 8 times an orthogonal transform. |
| // Hence quantizer step is also 8 times. To get effective quantizer |
| // we need to divide by 8 before sending to modeling function. |
| int plane; |
| const int ref = xd->mi[0]->mbmi.ref_frame[0]; |
| |
| int64_t rate_sum = 0; |
| int64_t dist_sum = 0; |
| int64_t total_sse = 0; |
| |
| x->pred_sse[ref] = 0; |
| |
| for (plane = plane_from; plane <= plane_to; ++plane) { |
| struct macroblock_plane *const p = &x->plane[plane]; |
| struct macroblockd_plane *const pd = &xd->plane[plane]; |
| const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); |
| unsigned int sse; |
| int rate; |
| int64_t dist; |
| |
| if (x->skip_chroma_rd && plane) continue; |
| |
| // TODO(geza): Write direct sse functions that do not compute |
| // variance as well. |
| cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, |
| &sse); |
| |
| if (plane == 0) x->pred_sse[ref] = sse; |
| |
| total_sse += sse; |
| |
| model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist); |
| |
| rate_sum += rate; |
| dist_sum += dist; |
| } |
| |
| *skip_txfm_sb = total_sse == 0; |
| *skip_sse_sb = total_sse << 4; |
| *out_rate_sum = (int)rate_sum; |
| *out_dist_sum = dist_sum; |
| } |
| |
| int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, |
| intptr_t block_size, int64_t *ssz) { |
| int i; |
| int64_t error = 0, sqcoeff = 0; |
| |
| for (i = 0; i < block_size; i++) { |
| const int diff = coeff[i] - dqcoeff[i]; |
| error += diff * diff; |
| sqcoeff += coeff[i] * coeff[i]; |
| } |
| |
| *ssz = sqcoeff; |
| return error; |
| } |
| |
| int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff, |
| int block_size) { |
| int i; |
| int64_t error = 0; |
| |
| for (i = 0; i < block_size; i++) { |
| const int diff = coeff[i] - dqcoeff[i]; |
| error += diff * diff; |
| } |
| |
| return error; |
| } |
| |
| #if CONFIG_HIGHBITDEPTH |
| int64_t av1_highbd_block_error_c(const tran_low_t *coeff, |
| const tran_low_t *dqcoeff, intptr_t block_size, |
| int64_t *ssz, int bd) { |
| int i; |
| int64_t error = 0, sqcoeff = 0; |
| int shift = 2 * (bd - 8); |
| int rounding = shift > 0 ? 1 << (shift - 1) : 0; |
| |
| for (i = 0; i < block_size; i++) { |
| const int64_t diff = coeff[i] - dqcoeff[i]; |
| error += diff * diff; |
| sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; |
| } |
| assert(error >= 0 && sqcoeff >= 0); |
| error = (error + rounding) >> shift; |
| sqcoeff = (sqcoeff + rounding) >> shift; |
| |
| *ssz = sqcoeff; |
| return error; |
| } |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| #if !CONFIG_LV_MAP |
| static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, |
| int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order, |
| const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l, |
| int use_fast_coef_costing) { |
| MACROBLOCKD *const xd = &x->e_mbd; |
| MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; |
| const struct macroblock_plane *p = &x->plane[plane]; |
| const struct macroblockd_plane *pd = &xd->plane[plane]; |
| const PLANE_TYPE type = pd->plane_type; |
| const uint16_t *band_count = &band_count_table[tx_size][1]; |
| const int eob = p->eobs[block]; |
| const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); |
| const TX_SIZE tx_size_ctx = txsize_sqr_map[tx_size]; |
| uint8_t token_cache[MAX_TX_SQUARE]; |
| int pt = combine_entropy_contexts(*a, *l); |
| int c, cost; |
| const int16_t *scan = scan_order->scan; |
| const int16_t *nb = scan_order->neighbors; |
| const int ref = is_inter_block(mbmi); |
| int(*head_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] = |
| x->token_head_costs[tx_size_ctx][type][ref]; |
| int(*tail_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] = |
| x->token_tail_costs[tx_size_ctx][type][ref]; |
| const int seg_eob = av1_get_tx_eob(&cm->seg, mbmi->segment_id, tx_size); |
| int8_t eob_val; |
| |
| #if CONFIG_HIGHBITDEPTH |
| const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd); |
| #else |
| const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8); |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| (void)cm; |
| |
| if (eob == 0) { |
| // block zero |
| cost = (*head_token_costs)[pt][0]; |
| } else { |
| if (use_fast_coef_costing) { |
| int band_left = *band_count++; |
| |
| // dc token |
| int v = qcoeff[0]; |
| int16_t prev_t; |
| cost = av1_get_token_cost(v, &prev_t, cat6_bits); |
| eob_val = (eob == 1) ? EARLY_EOB : NO_EOB; |
| cost += av1_get_coeff_token_cost( |
| prev_t, eob_val, 1, (*head_token_costs)[pt], (*tail_token_costs)[pt]); |
| |
| token_cache[0] = av1_pt_energy_class[prev_t]; |
| ++head_token_costs; |
| ++tail_token_costs; |
| |
| // ac tokens |
| for (c = 1; c < eob; c++) { |
| const int rc = scan[c]; |
| int16_t t; |
| |
| v = qcoeff[rc]; |
| cost += av1_get_token_cost(v, &t, cat6_bits); |
| eob_val = |
| (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB; |
| cost += av1_get_coeff_token_cost(t, eob_val, 0, |
| (*head_token_costs)[!prev_t], |
| (*tail_token_costs)[!prev_t]); |
| prev_t = t; |
| if (!--band_left) { |
| band_left = *band_count++; |
| ++head_token_costs; |
| ++tail_token_costs; |
| } |
| } |
| } else { // !use_fast_coef_costing |
| int band_left = *band_count++; |
| |
| // dc token |
| int v = qcoeff[0]; |
| int16_t tok; |
| cost = av1_get_token_cost(v, &tok, cat6_bits); |
| eob_val = (eob == 1) ? EARLY_EOB : NO_EOB; |
| cost += av1_get_coeff_token_cost(tok, eob_val, 1, (*head_token_costs)[pt], |
| (*tail_token_costs)[pt]); |
| |
| token_cache[0] = av1_pt_energy_class[tok]; |
| ++head_token_costs; |
| ++tail_token_costs; |
| |
| // ac tokens |
| for (c = 1; c < eob; c++) { |
| const int rc = scan[c]; |
| |
| v = qcoeff[rc]; |
| cost += av1_get_token_cost(v, &tok, cat6_bits); |
| pt = get_coef_context(nb, token_cache, c); |
| eob_val = |
| (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB; |
| cost += av1_get_coeff_token_cost( |
| tok, eob_val, 0, (*head_token_costs)[pt], (*tail_token_costs)[pt]); |
| token_cache[rc] = av1_pt_energy_class[tok]; |
| if (!--band_left) { |
| band_left = *band_count++; |
| ++head_token_costs; |
| ++tail_token_costs; |
| } |
| } |
| } |
| } |
| |
| return cost; |
| } |
| #endif // !CONFIG_LV_MAP |
| |
| int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, |
| int blk_row, int blk_col, int block, TX_SIZE tx_size, |
| const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a, |
| const ENTROPY_CONTEXT *l, int use_fast_coef_costing) { |
| const AV1_COMMON *const cm = &cpi->common; |
| #if !CONFIG_LV_MAP |
| (void)blk_row; |
| (void)blk_col; |
| #if CONFIG_MRC_TX |
| const MACROBLOCKD *xd = &x->e_mbd; |
| const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; |
| const TX_TYPE tx_type = av1_get_tx_type(xd->plane[plane].plane_type, xd, |
| blk_row, blk_col, block, tx_size); |
| const int is_inter = is_inter_block(mbmi); |
| if (tx_type == MRC_DCT && ((is_inter && SIGNAL_MRC_MASK_INTER) || |
| (!is_inter && SIGNAL_MRC_MASK_INTRA))) { |
| const int mrc_mask_cost = |
| av1_cost_color_map(x, plane, block, mbmi->sb_type, tx_size, MRC_MAP); |
| return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l, |
| use_fast_coef_costing) + |
| mrc_mask_cost; |
| } |
| #endif |
| return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l, |
| use_fast_coef_costing); |
| #else // !CONFIG_LV_MAP |
| (void)scan_order; |
| (void)use_fast_coef_costing; |
| const MACROBLOCKD *xd = &x->e_mbd; |
| const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; |
| const struct macroblockd_plane *pd = &xd->plane[plane]; |
| const BLOCK_SIZE bsize = mbmi->sb_type; |
| const BLOCK_SIZE plane_bsize = |
| AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); |
| TXB_CTX txb_ctx; |
| get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx); |
| return av1_cost_coeffs_txb(cm, x, plane, blk_row, blk_col, block, tx_size, |
| &txb_ctx); |
| #endif // !CONFIG_LV_MAP |
| } |
| |
| // Get transform block visible dimensions cropped to the MI units. |
| static void get_txb_dimensions(const MACROBLOCKD *xd, int plane, |
| BLOCK_SIZE plane_bsize, int blk_row, int blk_col, |
| BLOCK_SIZE tx_bsize, int *width, int *height, |
| int *visible_width, int *visible_height) { |
| #if !(CONFIG_RECT_TX_EXT) |
| assert(tx_bsize <= plane_bsize); |
| #endif |
| int txb_height = block_size_high[tx_bsize]; |
| int txb_width = block_size_wide[tx_bsize]; |
| const int block_height = block_size_high[plane_bsize]; |
| const int block_width = block_size_wide[plane_bsize]; |
| const struct macroblockd_plane *const pd = &xd->plane[plane]; |
| // TODO(aconverse@google.com): Investigate using crop_width/height here rather |
| // than the MI size |
| const int block_rows = |
| (xd->mb_to_bottom_edge >= 0) |
| ? block_height |
| : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height; |
| const int block_cols = |
| (xd->mb_to_right_edge >= 0) |
| ? block_width |
| : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width; |
| const int tx_unit_size = tx_size_wide_log2[0]; |
| if (width) *width = txb_width; |
| if (height) *height = txb_height; |
| *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width); |
| *visible_height = |
| clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height); |
| } |
| |
| // Compute the pixel domain distortion from src and dst on all visible 4x4s in |
| // the |
| // transform block. |
| static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x, |
| int plane, const uint8_t *src, const int src_stride, |
| const uint8_t *dst, const int dst_stride, |
| int blk_row, int blk_col, |
| const BLOCK_SIZE plane_bsize, |
| const BLOCK_SIZE tx_bsize) { |
| int txb_rows, txb_cols, visible_rows, visible_cols; |
| const MACROBLOCKD *xd = &x->e_mbd; |
| |
| get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, |
| &txb_cols, &txb_rows, &visible_cols, &visible_rows); |
| assert(visible_rows > 0); |
| assert(visible_cols > 0); |
| |
| #if CONFIG_DIST_8X8 |
| if (x->using_dist_8x8 && plane == 0 && txb_cols >= 8 && txb_rows >= 8) |
| return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, |
| tx_bsize, txb_cols, txb_rows, visible_cols, |
| visible_rows, x->qindex); |
| #endif // CONFIG_DIST_8X8 |
| |
| unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst, |
| dst_stride, tx_bsize, txb_rows, |
| txb_cols, visible_rows, visible_cols); |
| |
| return sse; |
| } |
| |
| // Compute the pixel domain distortion from diff on all visible 4x4s in the |
| // transform block. |
| static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane, |
| const int16_t *diff, const int diff_stride, |
| int blk_row, int blk_col, |
| const BLOCK_SIZE plane_bsize, |
| const BLOCK_SIZE tx_bsize) { |
| int visible_rows, visible_cols; |
| const MACROBLOCKD *xd = &x->e_mbd; |
| #if CONFIG_DIST_8X8 |
| int txb_height = block_size_high[tx_bsize]; |
| int txb_width = block_size_wide[tx_bsize]; |
| const int src_stride = x->plane[plane].src.stride; |
| const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0]; |
| const uint8_t *src = &x->plane[plane].src.buf[src_idx]; |
| #endif |
| |
| get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL, |
| NULL, &visible_cols, &visible_rows); |
| |
| #if CONFIG_DIST_8X8 |
| if (x->using_dist_8x8 && plane == 0 && txb_width >= 8 && txb_height >= 8) |
| return av1_dist_8x8_diff(x, src, src_stride, diff, diff_stride, txb_width, |
| txb_height, visible_cols, visible_rows, x->qindex); |
| else |
| #endif |
| return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, |
| visible_rows); |
| } |
| |
| int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) { |
| int val_count[256]; |
| memset(val_count, 0, sizeof(val_count)); |
| for (int r = 0; r < rows; ++r) { |
| for (int c = 0; c < cols; ++c) { |
| ++val_count[src[r * stride + c]]; |
| } |
| } |
| int n = 0; |
| for (int i = 0; i < 256; ++i) { |
| if (val_count[i]) ++n; |
| } |
| return n; |
| } |
| |
| #if CONFIG_HIGHBITDEPTH |
| int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, |
| int bit_depth) { |
| assert(bit_depth <= 12); |
| const uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| int val_count[1 << 12]; |
| memset(val_count, 0, (1 << 12) * sizeof(val_count[0])); |
| for (int r = 0; r < rows; ++r) { |
| for (int c = 0; c < cols; ++c) { |
| ++val_count[src[r * stride + c]]; |
| } |
| } |
| int n = 0; |
| for (int i = 0; i < (1 << bit_depth); ++i) { |
| if (val_count[i]) ++n; |
| } |
| return n; |
| } |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, |
| BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col, |
| TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse, |
| OUTPUT_STATUS output_status) { |
| MACROBLOCKD *const xd = &x->e_mbd; |
| const struct macroblock_plane *const p = &x->plane[plane]; |
| #if CONFIG_DIST_8X8 |
| struct macroblockd_plane *const pd = &xd->plane[plane]; |
| #else // CONFIG_DIST_8X8 |
| const struct macroblockd_plane *const pd = &xd->plane[plane]; |
| #endif // CONFIG_DIST_8X8 |
| |
| if (cpi->sf.use_transform_domain_distortion |
| #if CONFIG_DIST_8X8 |
| && !x->using_dist_8x8 |
| #endif |
| ) { |
| // Transform domain distortion computation is more efficient as it does |
| // not involve an inverse transform, but it is less accurate. |
| const int buffer_length = tx_size_2d[tx_size]; |
| int64_t this_sse; |
| int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; |
| tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); |
| tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| |
| #if CONFIG_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
| *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, |
| &this_sse, xd->bd); |
| else |
| #endif |
| *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse); |
| |
| *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift); |
| *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift); |
| } else { |
| const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; |
| const int bsw = block_size_wide[tx_bsize]; |
| const int bsh = block_size_high[tx_bsize]; |
| const int src_stride = x->plane[plane].src.stride; |
| const int dst_stride = xd->plane[plane].dst.stride; |
| // Scale the transform block index to pixel unit. |
| const int src_idx = (blk_row * src_stride + blk_col) |
| << tx_size_wide_log2[0]; |
| const int dst_idx = (blk_row * dst_stride + blk_col) |
| << tx_size_wide_log2[0]; |
| const uint8_t *src = &x->plane[plane].src.buf[src_idx]; |
| const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx]; |
| const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); |
| const uint16_t eob = p->eobs[block]; |
| |
| assert(cpi != NULL); |
| assert(tx_size_wide_log2[0] == tx_size_high_log2[0]); |
| |
| { |
| const int diff_stride = block_size_wide[plane_bsize]; |
| const int diff_idx = (blk_row * diff_stride + blk_col) |
| << tx_size_wide_log2[0]; |
| const int16_t *diff = &p->src_diff[diff_idx]; |
| *out_sse = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col, |
| plane_bsize, tx_bsize); |
| #if CONFIG_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
| *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2); |
| #endif // CONFIG_HIGHBITDEPTH |
| } |
| *out_sse *= 16; |
| |
| if (eob) { |
| if (output_status == OUTPUT_HAS_DECODED_PIXELS) { |
| *out_dist = pixel_dist(cpi, x, plane, src, src_stride, dst, dst_stride, |
| blk_row, blk_col, plane_bsize, tx_bsize); |
| } else { |
| #if CONFIG_HIGHBITDEPTH |
| uint8_t *recon; |
| DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]); |
| |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
| recon = CONVERT_TO_BYTEPTR(recon16); |
| else |
| recon = (uint8_t *)recon16; |
| #else |
| DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]); |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| #if CONFIG_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
| aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, |
| NULL, 0, bsw, bsh, xd->bd); |
| } else { |
| #endif // CONFIG_HIGHBITDEPTH |
| aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL, |
| 0, bsw, bsh); |
| #if CONFIG_HIGHBITDEPTH |
| } |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK |
| uint8_t *mrc_mask = BLOCK_OFFSET(xd->mrc_mask, block); |
| #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK |
| const PLANE_TYPE plane_type = get_plane_type(plane); |
| TX_TYPE tx_type = |
| av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); |
| av1_inverse_transform_block(xd, dqcoeff, |
| #if CONFIG_LGT_FROM_PRED |
| xd->mi[0]->mbmi.mode, |
| #endif |
| #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK |
| mrc_mask, |
| #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK |
| tx_type, tx_size, recon, MAX_TX_SIZE, eob); |
| |
| #if CONFIG_DIST_8X8 |
| if (x->using_dist_8x8 && plane == 0 && (bsw < 8 || bsh < 8)) { |
| // Save decoded pixels for inter block in pd->pred to avoid |
| // block_8x8_rd_txfm_daala_dist() need to produce them |
| // by calling av1_inverse_transform_block() again. |
| const int pred_stride = block_size_wide[plane_bsize]; |
| const int pred_idx = (blk_row * pred_stride + blk_col) |
| << tx_size_wide_log2[0]; |
| int16_t *pred = &pd->pred[pred_idx]; |
| int i, j; |
| |
| #if CONFIG_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
| for (j = 0; j < bsh; j++) |
| for (i = 0; i < bsw; i++) |
| pred[j * pred_stride + i] = |
| CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i]; |
| } else { |
| #endif |
| for (j = 0; j < bsh; j++) |
| for (i = 0; i < bsw; i++) |
| pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i]; |
| #if CONFIG_HIGHBITDEPTH |
| } |
| #endif // CONFIG_HIGHBITDEPTH |
| } |
| #endif // CONFIG_DIST_8X8 |
| *out_dist = |
| pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE, |
| blk_row, blk_col, plane_bsize, tx_bsize); |
| } |
| *out_dist *= 16; |
| } else { |
| *out_dist = *out_sse; |
| } |
| } |
| } |
| |
| static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, |
| BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { |
| struct rdcost_block_args *args = arg; |
| MACROBLOCK *const x = args->x; |
| MACROBLOCKD *const xd = &x->e_mbd; |
| const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; |
| const AV1_COMP *cpi = args->cpi; |
| ENTROPY_CONTEXT *a = args->t_above + blk_col; |
| ENTROPY_CONTEXT *l = args->t_left + blk_row; |
| const AV1_COMMON *cm = &cpi->common; |
| int64_t rd1, rd2, rd; |
| RD_STATS this_rd_stats; |
| |
| #if CONFIG_DIST_8X8 |
| // If sub8x8 tx, 8x8 or larger partition, and luma channel, |
| // dist-8x8 disables early skip, because the distortion metrics for |
| // sub8x8 tx (MSE) and reference distortion from 8x8 or larger partition |
| // (new distortion metric) are different. |
| // Exception is: dist-8x8 is enabled but still MSE is used, |
| // i.e. "--tune=" encoder option is not used. |
| int bw = block_size_wide[plane_bsize]; |
| int bh = block_size_high[plane_bsize]; |
| int disable_early_skip = |
| x->using_dist_8x8 && plane == 0 && bw >= 8 && bh >= 8 && |
| (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) && |
| x->tune_metric != AOM_TUNE_PSNR; |
| #endif // CONFIG_DIST_8X8 |
| |
| av1_init_rd_stats(&this_rd_stats); |
| |
| if (args->exit_early) return; |
| |
| if (!is_inter_block(mbmi)) { |
| av1_predict_intra_block_facade(cm, xd, plane, block, blk_col, blk_row, |
| tx_size); |
| av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size); |
| } |
| |
| #if !CONFIG_TXK_SEL |
| // full forward transform and quantization |
| const int coeff_ctx = combine_entropy_contexts(*a, *l); |
| #if DISABLE_TRELLISQ_SEARCH |
| av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, |
| coeff_ctx, AV1_XFORM_QUANT_B); |
| #else |
| av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, |
| coeff_ctx, AV1_XFORM_QUANT_FP); |
| |
| const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; |
| tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); |
| tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); |
| const int buffer_length = tx_size_2d[tx_size]; |
| int64_t tmp_dist; |
| int64_t tmp; |
| #if CONFIG_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
| tmp_dist = |
| av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd); |
| else |
| #endif |
| tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp); |
| tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift); |
| |
| if ( |
| #if CONFIG_DIST_8X8 |
| disable_early_skip || |
| #endif |
| RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) { |
| av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size, |
| a, l, CONFIG_LV_MAP); |
| } else { |
| args->exit_early = 1; |
| return; |
| } |
| #endif // DISABLE_TRELLISQ_SEARCH |
| |
| #if CONFIG_MRC_TX |
| if (mbmi->tx_type == MRC_DCT && !mbmi->valid_mrc_mask) { |
| args->exit_early = 1; |
| return; |
| } |
| #endif // CONFIG_MRC_TX |
| |
| if (!is_inter_block(mbmi)) { |
| struct macroblock_plane *const p = &x->plane[plane]; |
| av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col, |
| p->eobs[block]); |
| av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, |
| tx_size, &this_rd_stats.dist, &this_rd_stats.sse, |
| OUTPUT_HAS_DECODED_PIXELS); |
| } else { |
| av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, |
| tx_size, &this_rd_stats.dist, &this_rd_stats.sse, |
| OUTPUT_HAS_PREDICTED_PIXELS); |
| } |
| rd = RDCOST(x->rdmult, 0, this_rd_stats.dist); |
| if (args->this_rd + rd > args->best_rd) { |
| args->exit_early = 1; |
| return; |
| } |
| #if CONFIG_CFL |
| if (plane == AOM_PLANE_Y && xd->cfl->store_y) { |
| assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8); |
| cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); |
| } |
| #endif // CONFIG_CFL |
| const PLANE_TYPE plane_type = get_plane_type(plane); |
| const TX_TYPE tx_type = |
| av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size); |
| const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi); |
| this_rd_stats.rate = |
| av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size, |
| scan_order, a, l, args->use_fast_coef_costing); |
| #else // !CONFIG_TXK_SEL |
| av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, |
| tx_size, a, l, args->use_fast_coef_costing, |
| &this_rd_stats); |
| #endif // !CONFIG_TXK_SEL |
| |
| #if CONFIG_RD_DEBUG |
| av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col, |
| this_rd_stats.rate); |
| #endif // CONFIG_RD_DEBUG |
| av1_set_txb_context(x, plane, block, tx_size, a, l); |
| |
| rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist); |
| rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse); |
| |
| // TODO(jingning): temporarily enabled only for luma component |
| rd = AOMMIN(rd1, rd2); |
| |
| this_rd_stats.skip &= !x->plane[plane].eobs[block]; |
| |
| av1_merge_rd_stats(&args->rd_stats, &this_rd_stats); |
| |
| args->this_rd += rd; |
| |
| #if CONFIG_DIST_8X8 |
| if (!disable_early_skip) |
| #endif |
| if (args->this_rd > args->best_rd) { |
| args->exit_early = 1; |
| return; |
| } |
| } |
| |
| #if CONFIG_DIST_8X8 |
| static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x, |
| BLOCK_SIZE bsize, |
| struct rdcost_block_args *args) { |
| MACROBLOCKD *const xd = &x->e_mbd; |
| const struct macroblockd_plane *const pd = &xd->plane[0]; |
| const struct macroblock_plane *const p = &x->plane[0]; |
| MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; |
| const int src_stride = p->src.stride; |
| const int dst_stride = pd->dst.stride; |
| const uint8_t *src = &p->src.buf[0]; |
| const uint8_t *dst = &pd->dst.buf[0]; |
| const int16_t *pred = &pd->pred[0]; |
| int bw = block_size_wide[bsize]; |
| int bh = block_size_high[bsize]; |
| int visible_w = bw; |
| int visible_h = bh; |
| |
| int i, j; |
| int64_t rd, rd1, rd2; |
| unsigned int tmp1, tmp2; |
| int qindex = x->qindex; |
| |
| assert((bw & 0x07) == 0); |
| assert((bh & 0x07) == 0); |
| |
| get_txb_dimensions(xd, 0, bsize, 0, 0, bsize, &bw, &bh, &visible_w, |
| &visible_h); |
| |
| #if CONFIG_HIGHBITDEPTH |
| uint8_t *pred8; |
| DECLARE_ALIGNED(16, uint16_t, pred16[MAX_TX_SQUARE]); |
| |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) |
| pred8 = CONVERT_TO_BYTEPTR(pred16); |
| else |
| pred8 = (uint8_t *)pred16; |
| #else |
| DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]); |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| #if CONFIG_HIGHBITDEPTH |
| if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
| for (j = 0; j < bh; j++) |
| for (i = 0; i < bw; i++) |
| CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i]; |
| } else { |
| #endif |
| for (j = 0; j < bh; j++) |
| for (i = 0; i < bw; i++) pred8[j * bw + i] = (uint8_t)pred[j * bw + i]; |
| #if CONFIG_HIGHBITDEPTH |
| } |
| #endif // CONFIG_HIGHBITDEPTH |
| |
| tmp1 = (unsigned)av1_dist_8x8(cpi, x, src, src_stride, pred8, bw, bsize, bw, |
| bh, visible_w, visible_h, qindex); |
| tmp2 = (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, bsize, |
| bw, bh, visible_w, visible_h, qindex); |
| |
| if (!is_inter_block(mbmi)) { |
| if (x->tune_metric == AOM_TUNE_PSNR) { |
| assert(args->rd_stats.sse == tmp1 * 16); |
| assert(args->rd_stats.dist == tmp2 * 16); |
| } |
| args->rd_stats.sse = (int64_t)tmp1 * 16; |
| args->rd_stats.dist = (int64_t)tmp2 * 16; |
| } else { |
| // For inter mode, the decoded pixels are provided in pd->pred, |
| // while the predicted pixels are in dst. |
| if (x->tune_metric == AOM_TUNE_PSNR) { |
| assert(args->rd_stats.sse == tmp2 * 16); |
| assert(args->rd_stats.dist == tmp1 * 16); |
| } |
| args->rd_stats.sse = (int64_t)tmp2 * 16; |
| args->rd_stats.dist = (int64_t)tmp1 * 16; |
| } |
| |
| rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist); |
| rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse); |
| rd = AOMMIN(rd1, rd2); |
| |
| args->rd_stats.rdcost = rd; |
| args->this_rd = rd; |
| |
| if (args->this_rd > args->best_rd) args->exit_early = 1; |
| } |
| #endif // CONFIG_DIST_8X8 |
| |
| static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, |
| RD_STATS *rd_stats, int64_t ref_best_rd, int plane, |
| BLOCK_SIZE bsize, TX_SIZE tx_size, |
| int use_fast_coef_casting) { |
| MACROBLOCKD *const xd = &x->e_mbd; |
| const struct macroblockd_plane *const pd = &xd->plane[plane]; |
| struct rdcost_block_args args; |
| av1_zero(args); |
| args.x = x; |
| args.cpi = cpi; |
| args.best_rd = ref_best_rd; |
| args.use_fast_coef_costing = use_fast_coef_casting; |
| av1_init_rd_stats(&args.rd_stats); |
| |
| if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size; |
| |
| av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); |
| |
| av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, |
| &args); |
| #if CONFIG_DIST_8X8 |
| int bw = block_size_wide[bsize]; |
| int bh = block_size_high[bsize]; |
| |
| if (x->using_dist_8x8 && !args.exit_early && plane == 0 && bw >= 8 && |
| bh >= 8 && (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) |
| dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args); |
| #endif |
| |
| if (args.exit_early) { |
| av1_invalid_rd_stats(rd_stats); |
| } else { |
| *rd_stats = args.rd_stats; |
| } |
| } |
| |
| static int tx_size_cost(const AV1_COMMON *const cm, const MACROBLOCK *const x, |
| BLOCK_SIZE bsize, TX_SIZE tx_size) { |
| const MACROBLOCKD *const xd = &x->e_mbd; |
| const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; |
| |
| if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) { |
| const int is_inter = is_inter_block(mbmi); |
| const int32_t tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize] |
| : intra_tx_size_cat_lookup[bsize]; |
| const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size]; |
| const int depth = tx_size_to_depth(coded_tx_size); |
| const int tx_size_ctx = get_tx_size_context(xd); |
| int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth]; |
| #if CONFIG_RECT_TX_EXT |
| if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size) |
| r_tx_size += |
| x->quarter_tx_size_cost[tx_size == quarter_txsize_lookup[bsize]]; |
| #endif |
| return r_tx_size; |
| } else { |
| return 0; |
| } |
| } |
| |
| #if CONFIG_LGT_FROM_PRED |
| int av1_lgt_cost(const AV1_COMMON *cm, const MACROBLOCK *x, |
| const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, |
| TX_SIZE tx_size, int use_lgt) { |
| if (plane > 0) return 0; |
| const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; |
| const int is_inter = is_inter_block(mbmi); |
| |
| assert(is_lgt_allowed(mbmi->mode, tx_size)); |
| if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 && |
| !xd->lossless[xd->mi[0]->mbmi.segment_id]) { |
| const int ext_tx_set = |
| get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used); |
| if (LGT_FROM_PRED_INTRA && !is_inter && ext_tx_set > 0 && |
| ALLOW_INTRA_EXT_TX) |
| return x->intra_lgt_cost[txsize_sqr_map[tx_size]][mbmi->mode][use_lgt]; |
| if (LGT_FROM_PRED_INTRA && is_inter && ext_tx_set > 0) |
| return x->inter_lgt_cost[txsize_sqr_map[tx_size]][use_lgt]; |
| } |
| return 0; |
| } |
| #endif // CONFIG_LGT_FROM_PRED |
| |
| // TODO(angiebird): use this function whenever it's possible |
| int av1_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x, |
| const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane, |
| TX_SIZE tx_size, TX_TYPE tx_type) { |
| if (plane > 0) return 0; |
| |
| #if CONFIG_LGT_FROM_PRED |
| assert(!xd->mi[0]->mbmi.use_lgt); |
| #endif |
| tx_size = get_min_tx_size(tx_size); |
| |
| const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; |
| const int is_inter = is_inter_block(mbmi); |
| |