blob: 7a548d8db1dfb169a9e0a4e45215934e6878896a [file] [log] [blame]
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include <math.h>
#include "./aom_dsp_rtcd.h"
#include "./av1_rtcd.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/blend.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
#if CONFIG_WARPED_MOTION
#include "av1/common/warped_motion.h"
#endif // CONFIG_WARPED_MOTION
#include "av1/encoder/aq_variance.h"
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/mcomp.h"
#if CONFIG_PALETTE
#include "av1/encoder/palette.h"
#endif // CONFIG_PALETTE
#include "av1/encoder/quantize.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/tokenize.h"
#if CONFIG_PVQ
#include "av1/encoder/pvq_encoder.h"
#endif
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#include "av1/common/pvq.h"
#endif
#if CONFIG_DUAL_FILTER
#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
{ 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
{ 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
{ 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
};
#endif // CONFIG_DUAL_FILTER
#if CONFIG_EXT_REFS
#define LAST_FRAME_MODE_MASK \
((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
(1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST2_FRAME_MODE_MASK \
((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
(1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define LAST3_FRAME_MODE_MASK \
((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
(1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
(1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME))
#define BWDREF_FRAME_MODE_MASK \
((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
(1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME))
#define ALTREF_FRAME_MODE_MASK \
((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
(1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME))
#else
#define LAST_FRAME_MODE_MASK \
((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define GOLDEN_FRAME_MODE_MASK \
((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
#define ALTREF_FRAME_MODE_MASK \
((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
#endif // CONFIG_EXT_REFS
#if CONFIG_EXT_REFS
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | (1 << BWDREF_FRAME) | 0x01)
#else
#define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
#endif // CONFIG_EXT_REFS
#define MIN_EARLY_TERM_INDEX 3
#define NEW_MV_DISCOUNT_FACTOR 8
#if CONFIG_EXT_INTRA
#define ANGLE_SKIP_THRESH 10
#define FILTER_FAST_SEARCH 1
#endif // CONFIG_EXT_INTRA
const double ADST_FLIP_SVM[8] = { -6.6623, -2.8062, -3.2531, 3.1671, // vert
-7.7051, -3.2234, -3.6193, 3.4533 }; // horz
typedef struct {
PREDICTION_MODE mode;
MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;
typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
struct rdcost_block_args {
const AV1_COMP *cpi;
MACROBLOCK *x;
ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
RD_STATS rd_stats;
int64_t this_rd;
int64_t best_rd;
int exit_early;
int use_fast_coef_costing;
const SCAN_ORDER *scan_order;
};
#define LAST_NEW_MV_INDEX 6
static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
{ NEARESTMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
{ NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
{ NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
{ NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
#endif // CONFIG_EXT_REFS
{ NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
{ NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
{ DC_PRED, { INTRA_FRAME, NONE_FRAME } },
{ NEWMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
{ NEWMV, { LAST2_FRAME, NONE_FRAME } },
{ NEWMV, { LAST3_FRAME, NONE_FRAME } },
{ NEWMV, { BWDREF_FRAME, NONE_FRAME } },
#endif // CONFIG_EXT_REFS
{ NEWMV, { ALTREF_FRAME, NONE_FRAME } },
{ NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
{ NEARMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
{ NEARMV, { LAST2_FRAME, NONE_FRAME } },
{ NEARMV, { LAST3_FRAME, NONE_FRAME } },
{ NEARMV, { BWDREF_FRAME, NONE_FRAME } },
#endif // CONFIG_EXT_REFS
{ NEARMV, { ALTREF_FRAME, NONE_FRAME } },
{ NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
#if CONFIG_EXT_INTER
{ NEWFROMNEARMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
{ NEWFROMNEARMV, { LAST2_FRAME, NONE_FRAME } },
{ NEWFROMNEARMV, { LAST3_FRAME, NONE_FRAME } },
{ NEWFROMNEARMV, { BWDREF_FRAME, NONE_FRAME } },
#endif // CONFIG_EXT_REFS
{ NEWFROMNEARMV, { ALTREF_FRAME, NONE_FRAME } },
{ NEWFROMNEARMV, { GOLDEN_FRAME, NONE_FRAME } },
#endif // CONFIG_EXT_INTER
{ ZEROMV, { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
{ ZEROMV, { LAST2_FRAME, NONE_FRAME } },
{ ZEROMV, { LAST3_FRAME, NONE_FRAME } },
{ ZEROMV, { BWDREF_FRAME, NONE_FRAME } },
#endif // CONFIG_EXT_REFS
{ ZEROMV, { GOLDEN_FRAME, NONE_FRAME } },
{ ZEROMV, { ALTREF_FRAME, NONE_FRAME } },
// TODO(zoeliu): May need to reconsider the order on the modes to check
#if CONFIG_EXT_INTER
{ NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
#endif // CONFIG_EXT_REFS
{ NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
#endif // CONFIG_EXT_REFS
#else // CONFIG_EXT_INTER
{ NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
#endif // CONFIG_EXT_REFS
{ NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
{ TM_PRED, { INTRA_FRAME, NONE_FRAME } },
#if CONFIG_ALT_INTRA
{ SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
#endif // CONFIG_ALT_INTRA
#if CONFIG_EXT_INTER
{ NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
{ ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
{ ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
#endif // CONFIG_EXT_REFS
{ NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ ZERO_ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
#endif // CONFIG_EXT_REFS
#else // CONFIG_EXT_INTER
{ NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEWMV, { LAST_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
#endif // CONFIG_EXT_REFS
{ NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ NEARMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
#endif // CONFIG_EXT_REFS
{ ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ ZEROMV, { LAST2_FRAME, ALTREF_FRAME } },
{ ZEROMV, { LAST3_FRAME, ALTREF_FRAME } },
#endif // CONFIG_EXT_REFS
{ ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ ZEROMV, { LAST_FRAME, BWDREF_FRAME } },
{ ZEROMV, { LAST2_FRAME, BWDREF_FRAME } },
{ ZEROMV, { LAST3_FRAME, BWDREF_FRAME } },
{ ZEROMV, { GOLDEN_FRAME, BWDREF_FRAME } },
#endif // CONFIG_EXT_REFS
#endif // CONFIG_EXT_INTER
{ H_PRED, { INTRA_FRAME, NONE_FRAME } },
{ V_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D135_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D207_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D153_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D63_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D117_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D45_PRED, { INTRA_FRAME, NONE_FRAME } },
#if CONFIG_EXT_INTER
{ ZEROMV, { LAST_FRAME, INTRA_FRAME } },
{ NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
{ NEARMV, { LAST_FRAME, INTRA_FRAME } },
{ NEWMV, { LAST_FRAME, INTRA_FRAME } },
#if CONFIG_EXT_REFS
{ ZEROMV, { LAST2_FRAME, INTRA_FRAME } },
{ NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
{ NEARMV, { LAST2_FRAME, INTRA_FRAME } },
{ NEWMV, { LAST2_FRAME, INTRA_FRAME } },
{ ZEROMV, { LAST3_FRAME, INTRA_FRAME } },
{ NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
{ NEARMV, { LAST3_FRAME, INTRA_FRAME } },
{ NEWMV, { LAST3_FRAME, INTRA_FRAME } },
#endif // CONFIG_EXT_REFS
{ ZEROMV, { GOLDEN_FRAME, INTRA_FRAME } },
{ NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
{ NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
{ NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
#if CONFIG_EXT_REFS
{ ZEROMV, { BWDREF_FRAME, INTRA_FRAME } },
{ NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
{ NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
{ NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
#endif // CONFIG_EXT_REFS
{ ZEROMV, { ALTREF_FRAME, INTRA_FRAME } },
{ NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
{ NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
{ NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
#endif // CONFIG_EXT_INTER
};
static const REF_DEFINITION av1_ref_order[MAX_REFS] = {
{ { LAST_FRAME, NONE_FRAME } },
#if CONFIG_EXT_REFS
{ { LAST2_FRAME, NONE_FRAME } }, { { LAST3_FRAME, NONE_FRAME } },
{ { BWDREF_FRAME, NONE_FRAME } },
#endif // CONFIG_EXT_REFS
{ { GOLDEN_FRAME, NONE_FRAME } }, { { ALTREF_FRAME, NONE_FRAME } },
{ { LAST_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ { LAST2_FRAME, ALTREF_FRAME } }, { { LAST3_FRAME, ALTREF_FRAME } },
#endif // CONFIG_EXT_REFS
{ { GOLDEN_FRAME, ALTREF_FRAME } },
#if CONFIG_EXT_REFS
{ { LAST_FRAME, BWDREF_FRAME } }, { { LAST2_FRAME, BWDREF_FRAME } },
{ { LAST3_FRAME, BWDREF_FRAME } }, { { GOLDEN_FRAME, BWDREF_FRAME } },
#endif // CONFIG_EXT_REFS
{ { INTRA_FRAME, NONE_FRAME } },
};
#if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
static INLINE int write_uniform_cost(int n, int v) {
int l = get_unsigned_bits(n), m = (1 << l) - n;
if (l == 0) return 0;
if (v < m)
return (l - 1) * av1_cost_bit(128, 0);
else
return l * av1_cost_bit(128, 0);
}
#endif // CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE
// constants for prune 1 and prune 2 decision boundaries
#define FAST_EXT_TX_CORR_MID 0.0
#define FAST_EXT_TX_EDST_MID 0.1
#define FAST_EXT_TX_CORR_MARGIN 0.5
#define FAST_EXT_TX_EDST_MARGIN 0.3
static const TX_TYPE_1D vtx_tab[TX_TYPES] = {
DCT_1D, ADST_1D, DCT_1D, ADST_1D,
#if CONFIG_EXT_TX
FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D,
DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D,
#endif // CONFIG_EXT_TX
};
static const TX_TYPE_1D htx_tab[TX_TYPES] = {
DCT_1D, DCT_1D, ADST_1D, ADST_1D,
#if CONFIG_EXT_TX
DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D,
IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D,
#endif // CONFIG_EXT_TX
};
#if CONFIG_DAALA_DIST
static int od_compute_var_4x4(od_coeff *x, int stride) {
int sum;
int s2;
int i;
sum = 0;
s2 = 0;
for (i = 0; i < 4; i++) {
int j;
for (j = 0; j < 4; j++) {
int t;
t = x[i * stride + j];
sum += t;
s2 += t * t;
}
}
// TODO(yushin) : Check wheter any changes are required for high bit depth.
return (s2 - (sum * sum >> 4)) >> 4;
}
/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
is applied both horizontally and vertically. For X=5, the filter is
a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
#define OD_DIST_LP_MID (5)
#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
static double od_compute_dist_8x8(int qm, int use_activity_masking, od_coeff *x,
od_coeff *y, od_coeff *e_lp, int stride) {
double sum;
int min_var;
double mean_var;
double var_stat;
double activity;
double calibration;
int i;
int j;
double vardist;
vardist = 0;
OD_ASSERT(qm != OD_FLAT_QM);
#if 1
min_var = INT_MAX;
mean_var = 0;
for (i = 0; i < 3; i++) {
for (j = 0; j < 3; j++) {
int varx;
int vary;
varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
min_var = OD_MINI(min_var, varx);
mean_var += 1. / (1 + varx);
/* The cast to (double) is to avoid an overflow before the sqrt.*/
vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
}
}
/* We use a different variance statistic depending on whether activity
masking is used, since the harmonic mean appeared slghtly worse with
masking off. The calibration constant just ensures that we preserve the
rate compared to activity=1. */
if (use_activity_masking) {
calibration = 1.95;
var_stat = 9. / mean_var;
} else {
calibration = 1.62;
var_stat = min_var;
}
/* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
activity masking constant. */
activity = calibration * pow(.25 + var_stat, -1. / 6);
#else
activity = 1;
#endif
sum = 0;
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++)
sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
}
/* Normalize the filter to unit DC response. */
sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
OD_DIST_LP_NORM);
return activity * activity * (sum + vardist);
}
// Note : Inputs x and y are in a pixel domain
static double od_compute_dist(int qm, int activity_masking, od_coeff *x,
od_coeff *y, int bsize_w, int bsize_h,
int qindex) {
int i;
double sum;
sum = 0;
assert(bsize_w >= 8 && bsize_h >= 8);
if (qm == OD_FLAT_QM) {
for (i = 0; i < bsize_w * bsize_h; i++) {
double tmp;
tmp = x[i] - y[i];
sum += tmp * tmp;
}
} else {
int j;
DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
int mid = OD_DIST_LP_MID;
for (i = 0; i < bsize_h; i++) {
for (j = 0; j < bsize_w; j++) {
e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
}
}
for (i = 0; i < bsize_h; i++) {
tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
tmp[i * bsize_w + bsize_w - 1] =
mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
for (j = 1; j < bsize_w - 1; j++) {
tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] +
e[i * bsize_w + j - 1] + e[i * bsize_w + j + 1];
}
}
for (j = 0; j < bsize_w; j++) {
e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
e_lp[(bsize_h - 1) * bsize_w + j] =
mid * tmp[(bsize_h - 1) * bsize_w + j] +
2 * tmp[(bsize_h - 2) * bsize_w + j];
}
for (i = 1; i < bsize_h - 1; i++) {
for (j = 0; j < bsize_w; j++) {
e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
tmp[(i - 1) * bsize_w + j] +
tmp[(i + 1) * bsize_w + j];
}
}
for (i = 0; i < bsize_h; i += 8) {
for (j = 0; j < bsize_w; j += 8) {
sum += od_compute_dist_8x8(qm, activity_masking, &x[i * bsize_w + j],
&y[i * bsize_w + j], &e_lp[i * bsize_w + j],
bsize_w);
}
}
/* Scale according to linear regression against SSE, for 8x8 blocks. */
if (activity_masking) {
sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
(qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
} else {
sum *= qindex >= 128
? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
: qindex <= 43
? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
: 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
}
}
return sum;
}
static int64_t av1_daala_dist(const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride, int tx_size,
int qm, int use_activity_masking, int qindex) {
int i, j;
int64_t d;
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
const int bsw = block_size_wide[tx_bsize];
const int bsh = block_size_high[tx_bsize];
DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]);
DECLARE_ALIGNED(16, od_coeff, rec[MAX_TX_SQUARE]);
assert(qm == OD_HVS_QM);
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
d = (int64_t)od_compute_dist(qm, use_activity_masking, orig, rec, bsw, bsh,
qindex);
return d;
}
#endif // #if CONFIG_DAALA_DIST
static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
double *hordist, double *verdist) {
int bw = block_size_wide[bsize];
int bh = block_size_high[bsize];
unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
unsigned int var[16];
double total = 0;
const int f_index = bsize - BLOCK_16X16;
if (f_index < 0) {
int i, j, index;
int w_shift = bw == 8 ? 1 : 2;
int h_shift = bh == 8 ? 1 : 2;
#if CONFIG_AOM_HIGHBITDEPTH
if (cpi->common.use_highbitdepth) {
uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
for (i = 0; i < bh; ++i)
for (j = 0; j < bw; ++j) {
index = (j >> w_shift) + ((i >> h_shift) << 2);
esq[index] +=
(src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
(src16[j + i * src_stride] - dst16[j + i * dst_stride]);
}
} else {
#endif // CONFIG_AOM_HIGHBITDEPTH
for (i = 0; i < bh; ++i)
for (j = 0; j < bw; ++j) {
index = (j >> w_shift) + ((i >> h_shift) << 2);
esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
(src[j + i * src_stride] - dst[j + i * dst_stride]);
}
#if CONFIG_AOM_HIGHBITDEPTH
}
#endif // CONFIG_AOM_HIGHBITDEPTH
} else {
var[0] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
var[1] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
dst_stride, &esq[1]);
var[2] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
dst_stride, &esq[2]);
var[3] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
dst + 3 * bw / 4, dst_stride, &esq[3]);
src += bh / 4 * src_stride;
dst += bh / 4 * dst_stride;
var[4] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
var[5] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
dst_stride, &esq[5]);
var[6] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
dst_stride, &esq[6]);
var[7] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
dst + 3 * bw / 4, dst_stride, &esq[7]);
src += bh / 4 * src_stride;
dst += bh / 4 * dst_stride;
var[8] = cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
var[9] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
dst_stride, &esq[9]);
var[10] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
dst_stride, &esq[10]);
var[11] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
dst + 3 * bw / 4, dst_stride, &esq[11]);
src += bh / 4 * src_stride;
dst += bh / 4 * dst_stride;
var[12] =
cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
var[13] = cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4,
dst_stride, &esq[13]);
var[14] = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2,
dst_stride, &esq[14]);
var[15] = cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride,
dst + 3 * bw / 4, dst_stride, &esq[15]);
}
total = esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + esq[6] +
esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + esq[12] + esq[13] +
esq[14] + esq[15];
if (total > 0) {
const double e_recip = 1.0 / total;
hordist[0] =
((double)esq[0] + (double)esq[4] + (double)esq[8] + (double)esq[12]) *
e_recip;
hordist[1] =
((double)esq[1] + (double)esq[5] + (double)esq[9] + (double)esq[13]) *
e_recip;
hordist[2] =
((double)esq[2] + (double)esq[6] + (double)esq[10] + (double)esq[14]) *
e_recip;
verdist[0] =
((double)esq[0] + (double)esq[1] + (double)esq[2] + (double)esq[3]) *
e_recip;
verdist[1] =
((double)esq[4] + (double)esq[5] + (double)esq[6] + (double)esq[7]) *
e_recip;
verdist[2] =
((double)esq[8] + (double)esq[9] + (double)esq[10] + (double)esq[11]) *
e_recip;
} else {
hordist[0] = verdist[0] = 0.25;
hordist[1] = verdist[1] = 0.25;
hordist[2] = verdist[2] = 0.25;
}
(void)var[0];
(void)var[1];
(void)var[2];
(void)var[3];
(void)var[4];
(void)var[5];
(void)var[6];
(void)var[7];
(void)var[8];
(void)var[9];
(void)var[10];
(void)var[11];
(void)var[12];
(void)var[13];
(void)var[14];
(void)var[15];
}
static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize, uint8_t *src,
int src_stride, uint8_t *dst, int dst_stride,
double *hdist, double *vdist) {
int prune_bitmask = 0;
double svm_proj_h = 0, svm_proj_v = 0;
get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
hdist, vdist);
svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
prune_bitmask |= 1 << FLIPADST_1D;
else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
prune_bitmask |= 1 << ADST_1D;
if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
prune_bitmask |= 1 << (FLIPADST_1D + 8);
else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
prune_bitmask |= 1 << (ADST_1D + 8);
return prune_bitmask;
}
#if CONFIG_EXT_TX
static void get_horver_correlation(int16_t *diff, int stride, int w, int h,
double *hcorr, double *vcorr) {
// Returns hor/ver correlation coefficient
const int num = (h - 1) * (w - 1);
double num_r;
int i, j;
int64_t xy_sum = 0, xz_sum = 0;
int64_t x_sum = 0, y_sum = 0, z_sum = 0;
int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
*hcorr = *vcorr = 1;
assert(num > 0);
num_r = 1.0 / num;
for (i = 1; i < h; ++i) {
for (j = 1; j < w; ++j) {
const int16_t x = diff[i * stride + j];
const int16_t y = diff[i * stride + j - 1];
const int16_t z = diff[(i - 1) * stride + j];
xy_sum += x * y;
xz_sum += x * z;
x_sum += x;
y_sum += y;
z_sum += z;
x2_sum += x * x;
y2_sum += y * y;
z2_sum += z * z;
}
}
x_var_n = x2_sum - (x_sum * x_sum) * num_r;
y_var_n = y2_sum - (y_sum * y_sum) * num_r;
z_var_n = z2_sum - (z_sum * z_sum) * num_r;
xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
if (x_var_n > 0 && y_var_n > 0) {
*hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
*hcorr = *hcorr < 0 ? 0 : *hcorr;
}
if (x_var_n > 0 && z_var_n > 0) {
*vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
*vcorr = *vcorr < 0 ? 0 : *vcorr;
}
}
int dct_vs_idtx(int16_t *diff, int stride, int w, int h, double *hcorr,
double *vcorr) {
int prune_bitmask = 0;
get_horver_correlation(diff, stride, w, h, hcorr, vcorr);
if (*vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
prune_bitmask |= 1 << IDTX_1D;
else if (*vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
prune_bitmask |= 1 << DCT_1D;
if (*hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
prune_bitmask |= 1 << (IDTX_1D + 8);
else if (*hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
prune_bitmask |= 1 << (DCT_1D + 8);
return prune_bitmask;
}
// Performance drop: 0.5%, Speed improvement: 24%
static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd, int adst_flipadst,
int dct_idtx) {
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
const int bw = 4 << (b_width_log2_lookup[bs]);
const int bh = 4 << (b_height_log2_lookup[bs]);
double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
double hcorr, vcorr;
int prune = 0;
av1_subtract_plane(x, bsize, 0);
if (adst_flipadst)
prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, hdist, vdist);
if (dct_idtx) prune |= dct_vs_idtx(p->src_diff, bw, bw, bh, &hcorr, &vcorr);
return prune;
}
#endif // CONFIG_EXT_TX
// Performance drop: 0.3%, Speed improvement: 5%
static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd) {
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
av1_subtract_plane(x, bsize, 0);
return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
pd->dst.stride, hdist, vdist);
}
static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
MACROBLOCKD *xd, int tx_set) {
#if CONFIG_EXT_TX
const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
#else
const int tx_set_1D[TX_TYPES_1D] = { 0 };
#endif
switch (cpi->sf.tx_type_search.prune_mode) {
case NO_PRUNE: return 0; break;
case PRUNE_ONE:
if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D]))
return 0;
return prune_one_for_sby(cpi, bsize, x, xd);
break;
#if CONFIG_EXT_TX
case PRUNE_TWO:
if ((tx_set >= 0) && !(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
}
if ((tx_set >= 0) && !(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
break;
#endif
}
assert(0);
return 0;
}
static int do_tx_type_search(TX_TYPE tx_type, int prune) {
// TODO(sarahparker) implement for non ext tx
#if CONFIG_EXT_TX
return !(((prune >> vtx_tab[tx_type]) & 1) |
((prune >> (htx_tab[tx_type] + 8)) & 1));
#else
// temporary to avoid compiler warnings
(void)vtx_tab;
(void)htx_tab;
(void)tx_type;
(void)prune;
return 1;
#endif
}
static void model_rd_from_sse(const AV1_COMP *const cpi,
const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
int plane, int64_t sse, int *rate,
int64_t *dist) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int dequant_shift =
#if CONFIG_AOM_HIGHBITDEPTH
(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
#endif // CONFIG_AOM_HIGHBITDEPTH
3;
// Fast approximate the modelling function.
if (cpi->sf.simple_model_rd_from_var) {
const int64_t square_error = sse;
int quantizer = (pd->dequant[1] >> dequant_shift);
if (quantizer < 120)
*rate = (int)((square_error * (280 - quantizer)) >>
(16 - AV1_PROB_COST_SHIFT));
else
*rate = 0;
*dist = (square_error * quantizer) >> 8;
} else {
av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
pd->dequant[1] >> dequant_shift, rate, dist);
}
*dist <<= 4;
}
static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
int plane_to, int *out_rate_sum,
int64_t *out_dist_sum, int *skip_txfm_sb,
int64_t *skip_sse_sb) {
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
int plane;
const int ref = xd->mi[0]->mbmi.ref_frame[0];
int64_t rate_sum = 0;
int64_t dist_sum = 0;
int64_t total_sse = 0;
x->pred_sse[ref] = 0;
for (plane = plane_from; plane <= plane_to; ++plane) {
struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
unsigned int sse;
int rate;
int64_t dist;
// TODO(geza): Write direct sse functions that do not compute
// variance as well.
cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
&sse);
if (plane == 0) x->pred_sse[ref] = sse;
total_sse += sse;
model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
rate_sum += rate;
dist_sum += dist;
}
*skip_txfm_sb = total_sse == 0;
*skip_sse_sb = total_sse << 4;
*out_rate_sum = (int)rate_sum;
*out_dist_sum = dist_sum;
}
#if CONFIG_PVQ
// Without PVQ, av1_block_error_c() return two kind of errors,
// 1) reconstruction (i.e. decoded) error and
// 2) Squared sum of transformed residue (i.e. 'coeff')
// However, if PVQ is enabled, coeff does not keep the transformed residue
// but instead a transformed original is kept.
// Hence, new parameter ref vector (i.e. transformed predicted signal)
// is required to derive the residue signal,
// i.e. coeff - ref = residue (all transformed).
// TODO(yushin) : Since 4x4 case does not need ssz, better to refactor into
// a separate function that does not do the extra computations for ssz.
static int64_t av1_block_error2_c(const tran_low_t *coeff,
const tran_low_t *dqcoeff,
const tran_low_t *ref, intptr_t block_size,
int64_t *ssz) {
int64_t error;
// Use the existing sse codes for calculating distortion of decoded signal:
// i.e. (orig - decoded)^2
error = av1_block_error_fp(coeff, dqcoeff, block_size);
// prediction residue^2 = (orig - ref)^2
*ssz = av1_block_error_fp(coeff, ref, block_size);
return error;
}
#endif
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
int i;
int64_t error = 0, sqcoeff = 0;
for (i = 0; i < block_size; i++) {
const int diff = coeff[i] - dqcoeff[i];
error += diff * diff;
sqcoeff += coeff[i] * coeff[i];
}
*ssz = sqcoeff;
return error;
}
int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
int block_size) {
int i;
int64_t error = 0;
for (i = 0; i < block_size; i++) {
const int diff = coeff[i] - dqcoeff[i];
error += diff * diff;
}
return error;
}
#if CONFIG_AOM_HIGHBITDEPTH
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
const tran_low_t *dqcoeff, intptr_t block_size,
int64_t *ssz, int bd) {
int i;
int64_t error = 0, sqcoeff = 0;
int shift = 2 * (bd - 8);
int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i++) {
const int64_t diff = coeff[i] - dqcoeff[i];
error += diff * diff;
sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
}
assert(error >= 0 && sqcoeff >= 0);
error = (error + rounding) >> shift;
sqcoeff = (sqcoeff + rounding) >> shift;
*ssz = sqcoeff;
return error;
}
#endif // CONFIG_AOM_HIGHBITDEPTH
#if !CONFIG_PVQ
/* The trailing '0' is a terminator which is used inside av1_cost_coeffs() to
* decide whether to include cost of a trailing EOB node or not (i.e. we
* can skip this if the last coefficient in this transform block, e.g. the
* 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
* were non-zero). */
int av1_cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
int block, int coeff_ctx, TX_SIZE tx_size,
const int16_t *scan, const int16_t *nb,
int use_fast_coef_costing) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const struct macroblock_plane *p = &x->plane[plane];
const struct macroblockd_plane *pd = &xd->plane[plane];
const PLANE_TYPE type = pd->plane_type;
const uint16_t *band_count = &band_count_table[tx_size][1];
const int eob = p->eobs[block];
const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
const int tx_size_ctx = txsize_sqr_map[tx_size];
unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
x->token_costs[tx_size_ctx][type][is_inter_block(mbmi)];
uint8_t token_cache[MAX_TX_SQUARE];
int pt = coeff_ctx;
int c, cost;
#if CONFIG_AOM_HIGHBITDEPTH
const int *cat6_high_cost = av1_get_high_cost_table(xd->bd);
#else
const int *cat6_high_cost = av1_get_high_cost_table(8);
#endif
#if !CONFIG_VAR_TX && !CONFIG_SUPERTX
// Check for consistency of tx_size with mode info
assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
: get_uv_tx_size(mbmi, pd) == tx_size);
#endif // !CONFIG_VAR_TX && !CONFIG_SUPERTX
(void)cm;
if (eob == 0) {
// single eob token
cost = token_costs[0][0][pt][EOB_TOKEN];
} else {
if (use_fast_coef_costing) {
int band_left = *band_count++;
// dc token
int v = qcoeff[0];
int16_t prev_t;
cost = av1_get_token_cost(v, &prev_t, cat6_high_cost);
cost += (*token_costs)[0][pt][prev_t];
token_cache[0] = av1_pt_energy_class[prev_t];
++token_costs;
// ac tokens
for (c = 1; c < eob; c++) {
const int rc = scan[c];
int16_t t;
v = qcoeff[rc];
cost += av1_get_token_cost(v, &t, cat6_high_cost);
cost += (*token_costs)[!prev_t][!prev_t][t];
prev_t = t;
if (!--band_left) {
band_left = *band_count++;
++token_costs;
}
}
// eob token
if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
} else { // !use_fast_coef_costing
int band_left = *band_count++;
// dc token
int v = qcoeff[0];
int16_t tok;
unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
cost = av1_get_token_cost(v, &tok, cat6_high_cost);
cost += (*token_costs)[0][pt][tok];
token_cache[0] = av1_pt_energy_class[tok];
++token_costs;
tok_cost_ptr = &((*token_costs)[!tok]);
// ac tokens
for (c = 1; c < eob; c++) {
const int rc = scan[c];
v = qcoeff[rc];
cost += av1_get_token_cost(v, &tok, cat6_high_cost);
pt = get_coef_context(nb, token_cache, c);
cost += (*tok_cost_ptr)[pt][tok];
token_cache[rc] = av1_pt_energy_class[tok];
if (!--band_left) {
band_left = *band_count++;
++token_costs;
}
tok_cost_ptr = &((*token_costs)[!tok]);
}
// eob token
if (band_left) {
pt = get_coef_context(nb, token_cache, c);
cost += (*token_costs)[0][pt][EOB_TOKEN];
}
}
}
return cost;
}
#endif
static void dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, TX_SIZE tx_size,
int64_t *out_dist, int64_t *out_sse) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
#if CONFIG_DAALA_DIST
int qm = OD_HVS_QM;
int use_activity_masking = 0;
#if CONFIG_PVQ
use_activity_masking = x->daala_enc.use_activity_masking;
#endif
#endif
if (cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) {
// Transform domain distortion computation is more accurate as it does
// not involve an inverse transform, but it is less accurate.
const int buffer_length = tx_size_2d[tx_size];
int64_t this_sse;
int shift = (MAX_TX_SCALE - get_tx_scale(tx_size)) * 2;
tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
#if CONFIG_PVQ
tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block);
#endif
#if CONFIG_AOM_HIGHBITDEPTH
const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
*out_dist =
av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, bd) >>
shift;
#elif CONFIG_PVQ
*out_dist = av1_block_error2_c(coeff, dqcoeff, ref_coeff, buffer_length,
&this_sse) >>
shift;
#else
*out_dist =
av1_block_error(coeff, dqcoeff, buffer_length, &this_sse) >> shift;
#endif // CONFIG_AOM_HIGHBITDEPTH
*out_sse = this_sse >> shift;
} else {
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
const int bsw = block_size_wide[tx_bsize];
const int bsh = block_size_high[tx_bsize];
const int src_stride = x->plane[plane].src.stride;
const int dst_stride = xd->plane[plane].dst.stride;
// Scale the transform block index to pixel unit.
const int src_idx = (blk_row * src_stride + blk_col)
<< tx_size_wide_log2[0];
const int dst_idx = (blk_row * dst_stride + blk_col)
<< tx_size_wide_log2[0];
const uint8_t *src = &x->plane[plane].src.buf[src_idx];
const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
const uint16_t eob = p->eobs[block];
unsigned int tmp;
assert(cpi != NULL);
assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
#if CONFIG_DAALA_DIST
if (plane == 0) {
if (bsw >= 8 && bsh >= 8)
tmp = av1_daala_dist(src, src_stride, dst, dst_stride, tx_size, qm,
use_activity_masking, x->qindex);
else
tmp = 0;
} else
#endif
cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &tmp);
*out_sse = (int64_t)tmp * 16;
if (eob) {
const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
#if CONFIG_AOM_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
uint8_t *recon = (uint8_t *)recon16;
#else
DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
#endif // CONFIG_AOM_HIGHBITDEPTH
const PLANE_TYPE plane_type = plane == 0 ? PLANE_TYPE_Y : PLANE_TYPE_UV;
INV_TXFM_PARAM inv_txfm_param;
const int block_raster_idx =
av1_block_index_to_raster_order(tx_size, block);
inv_txfm_param.tx_type =
get_tx_type(plane_type, xd, block_raster_idx, tx_size);
inv_txfm_param.tx_size = tx_size;
inv_txfm_param.eob = eob;
inv_txfm_param.lossless = xd->lossless[mbmi->segment_id];
#if CONFIG_AOM_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
recon = CONVERT_TO_BYTEPTR(recon);
inv_txfm_param.bd = xd->bd;
aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
NULL, 0, bsw, bsh, xd->bd);
highbd_inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
} else
#endif // CONFIG_AOM_HIGHBITDEPTH
{
#if !CONFIG_PVQ
aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL, 0,
bsw, bsh);
#else
int i, j;
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++) recon[j * MAX_TX_SIZE + i] = 0;
#endif
inv_txfm_add(dqcoeff, recon, MAX_TX_SIZE, &inv_txfm_param);
}
#if CONFIG_DAALA_DIST
if (plane == 0) {
if (bsw >= 8 && bsh >= 8)
tmp = av1_daala_dist(src, src_stride, recon, MAX_TX_SIZE, tx_size, qm,
use_activity_masking, x->qindex);
else
tmp = 0;
} else
#endif
cpi->fn_ptr[tx_bsize].vf(src, src_stride, recon, MAX_TX_SIZE, &tmp);
}
*out_dist = (int64_t)tmp * 16;
}
}
#if !CONFIG_PVQ
static int rate_block(int plane, int block, int coeff_ctx, TX_SIZE tx_size,
struct rdcost_block_args *args) {
return av1_cost_coeffs(&args->cpi->common, args->x, plane, block, coeff_ctx,
tx_size, args->scan_order->scan,
args->scan_order->neighbors,
args->use_fast_coef_costing);
}
#endif
static uint64_t sum_squares_2d(const int16_t *diff, int diff_stride,
TX_SIZE tx_size) {
uint64_t sse;
switch (tx_size) {
#if CONFIG_CB4X4
case TX_2X2:
sse = aom_sum_squares_2d_i16_c(diff, diff_stride, tx_size_wide[tx_size]);
break;
#endif
case TX_4X8:
sse = aom_sum_squares_2d_i16(diff, diff_stride, 4) +
aom_sum_squares_2d_i16(diff + 4 * diff_stride, diff_stride, 4);
break;
case TX_8X4:
sse = aom_sum_squares_2d_i16(diff, diff_stride, 4) +
aom_sum_squares_2d_i16(diff + 4, diff_stride, 4);
break;
case TX_8X16:
sse = aom_sum_squares_2d_i16(diff, diff_stride, 8) +
aom_sum_squares_2d_i16(diff + 8 * diff_stride, diff_stride, 8);
break;
case TX_16X8:
sse = aom_sum_squares_2d_i16(diff, diff_stride, 8) +
aom_sum_squares_2d_i16(diff + 8, diff_stride, 8);
break;
case TX_16X32:
sse = aom_sum_squares_2d_i16(diff, diff_stride, 16) +
aom_sum_squares_2d_i16(diff + 16 * diff_stride, diff_stride, 16);
break;
case TX_32X16:
sse = aom_sum_squares_2d_i16(diff, diff_stride, 16) +
aom_sum_squares_2d_i16(diff + 16, diff_stride, 16);
break;
default:
assert(tx_size < TX_SIZES);
sse = aom_sum_squares_2d_i16(diff, diff_stride, tx_size_wide[tx_size]);
break;
}
return sse;
}
static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
struct rdcost_block_args *args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const AV1_COMMON *cm = &args->cpi->common;
int64_t rd1, rd2, rd;
int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col),
*(args->t_left + blk_row));
RD_STATS this_rd_stats;
#if CONFIG_DAALA_DIST
int qm = OD_HVS_QM;
int use_activity_masking = 0;
#if CONFIG_PVQ
use_activity_masking = x->daala_enc.use_activity_masking;
#endif
#endif
av1_init_rd_stats(&this_rd_stats);
if (args->exit_early) return;
if (!is_inter_block(mbmi)) {
struct encode_b_args b_args = {
(AV1_COMMON *)cm, x, NULL, &mbmi->skip, args->t_above, args->t_left, 1
};
av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
&b_args);
if (args->cpi->sf.use_transform_domain_distortion && !CONFIG_DAALA_DIST) {
dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size,
&this_rd_stats.dist, &this_rd_stats.sse);
} else {
// Note that the encode block_intra call above already calls
// inv_txfm_add, so we can't just call dist_block here.
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
const aom_variance_fn_t variance = args->cpi->fn_ptr[tx_bsize].vf;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
const int diff_stride = block_size_wide[plane_bsize];
const uint8_t *src =
&p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
const uint8_t *dst =
&pd->dst
.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
const int16_t *diff = &p->src_diff[(blk_row * diff_stride + blk_col)
<< tx_size_wide_log2[0]];
unsigned int tmp;
#if CONFIG_DAALA_DIST
if (plane == 0) {
const int bsw = block_size_wide[tx_bsize];
const int bsh = block_size_high[tx_bsize];
if (bsw >= 8 && bsh >= 8) {
const int16_t *pred = &pd->pred[(blk_row * diff_stride + blk_col)
<< tx_size_wide_log2[0]];
int i, j;
DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++)
pred8[j * bsw + i] = pred[j * diff_stride + i];
this_rd_stats.sse =
av1_daala_dist(src, src_stride, pred8, bsw, tx_size, qm,
use_activity_masking, x->qindex);
} else {
this_rd_stats.sse = 0;
}
} else
#endif
{
this_rd_stats.sse = sum_squares_2d(diff, diff_stride, tx_size);
#if CONFIG_AOM_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
this_rd_stats.sse =
ROUND_POWER_OF_TWO(this_rd_stats.sse, (xd->bd - 8) * 2);
#endif // CONFIG_AOM_HIGHBITDEPTH
}
this_rd_stats.sse = this_rd_stats.sse * 16;
#if CONFIG_DAALA_DIST
if (plane == 0) {
const int bsw = block_size_wide[tx_bsize];
const int bsh = block_size_high[tx_bsize];
if (bsw >= 8 && bsh >= 8)
tmp = av1_daala_dist(src, src_stride, dst, dst_stride, tx_size, qm,
use_activity_masking, x->qindex);
else
tmp = 0;
} else
#endif
variance(src, src_stride, dst, dst_stride, &tmp);
this_rd_stats.dist = (int64_t)tmp * 16;
}
} else {
// full forward transform and quantization
#if CONFIG_NEW_QUANT
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP_NUQ);
#else
av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
coeff_ctx, AV1_XFORM_QUANT_FP);
#endif // CONFIG_NEW_QUANT
#if !CONFIG_PVQ
if (x->plane[plane].eobs[block] && !xd->lossless[mbmi->segment_id]) {
args->t_above[blk_col] = args->t_left[blk_row] =
(av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx) > 0);
} else {
args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0);
args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0);
}
#endif
dist_block(args->cpi, x, plane, block, blk_row, blk_col, tx_size,
&this_rd_stats.dist, &this_rd_stats.sse);
}
rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
if (args->this_rd + rd > args->best_rd) {
args->exit_early = 1;
return;
}
#if !CONFIG_PVQ
this_rd_stats.rate = rate_block(plane, block, coeff_ctx, tx_size, args);
#if CONFIG_RD_DEBUG
av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
this_rd_stats.rate);
#endif
#else
this_rd_stats.rate = x->rate;
args->t_above[blk_col] = !x->pvq_skip[plane];
args->t_left[blk_row] = !x->pvq_skip[plane];
#endif
rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
// TODO(jingning): temporarily enabled only for luma component
rd = AOMMIN(rd1, rd2);
#if CONFIG_DAALA_DIST
if (plane == 0 && tx_size <= TX_4X4) {
rd = 0;
x->rate_4x4[block] = this_rd_stats.rate;
}
#endif
#if !CONFIG_PVQ
this_rd_stats.skip &= !x->plane[plane].eobs[block];
#else
this_rd_stats.skip &= x->pvq_skip[plane];
#endif
av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
args->this_rd += rd;
if (args->this_rd > args->best_rd) {
args->exit_early = 1;
return;
}
}
#if CONFIG_DAALA_DIST
static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct rdcost_block_args *args = arg;
MACROBLOCK *const x = args->x;
MACROBLOCKD *const xd = &x->e_mbd;
// MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
// const AV1_COMMON *cm = &args->cpi->common;
int64_t rd1, rd2, rd;
RD_STATS this_rd_stats;
int qm = OD_HVS_QM;
int use_activity_masking = 0;
#if CONFIG_PVQ
use_activity_masking = x->daala_enc.use_activity_masking;
#endif
av1_init_rd_stats(&this_rd_stats);
if (args->exit_early) return;
{
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
const int diff_stride = block_size_wide[plane_bsize];
const uint8_t *src =
&p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
const uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
unsigned int tmp;
int qindex = x->qindex;
const int16_t *pred =
&pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
int i, j;
const int tx_blk_size = 1 << (tx_size + 2);
DECLARE_ALIGNED(16, uint8_t, pred8[MAX_TX_SQUARE]);
for (j = 0; j < tx_blk_size; j++)
for (i = 0; i < tx_blk_size; i++)
pred8[j * tx_blk_size + i] = pred[j * diff_stride + i];
this_rd_stats.sse =
av1_daala_dist(src, src_stride, pred8, tx_blk_size, tx_size, qm,
use_activity_masking, qindex);
this_rd_stats.sse = this_rd_stats.sse * 16;
tmp = av1_daala_dist(src, src_stride, dst, dst_stride, tx_size, qm,
use_activity_masking, qindex);
this_rd_stats.dist = (int64_t)tmp * 16;
}
rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
if (args->this_rd + rd > args->best_rd) {
args->exit_early = 1;
return;
}
{
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
// The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
this_rd_stats.rate = x->rate_4x4[block - max_blocks_wide - 1] +
x->rate_4x4[block - max_blocks_wide] +
x->rate_4x4[block - 1] + x->rate_4x4[block];
}
rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
rd = AOMMIN(rd1, rd2);
args->rd_stats.dist += this_rd_stats.dist;
args->rd_stats.sse += this_rd_stats.sse;
args->this_rd += rd;
if (args->this_rd > args->best_rd) {
args->exit_early = 1;
return;
}
}
#endif // #if CONFIG_DAALA_DIST
static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
BLOCK_SIZE bsize, TX_SIZE tx_size,
int use_fast_coef_casting) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblockd_plane *const pd = &xd->plane[plane];
TX_TYPE tx_type;
struct rdcost_block_args args;
av1_zero(args);
args.x = x;
args.cpi = cpi;
args.best_rd = ref_best_rd;
args.use_fast_coef_costing = use_fast_coef_casting;
av1_init_rd_stats(&args.rd_stats);
if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
args.scan_order =
get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
#if CONFIG_DAALA_DIST
if (plane == 0 &&
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
av1_foreach_8x8_transformed_block_in_plane(
xd, bsize, plane, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
else
#endif
av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
&args);
if (args.exit_early) {
av1_invalid_rd_stats(rd_stats);
} else {
*rd_stats = args.rd_stats;
}
}
#if CONFIG_SUPERTX
void av1_txfm_rd_in_plane_supertx(MACROBLOCK *x, const AV1_COMP *cpi, int *rate,
int64_t *distortion, int *skippable,
int64_t *sse, int64_t ref_best_rd, int plane,
BLOCK_SIZE bsize, TX_SIZE tx_size,
int use_fast_coef_casting) {
const AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblockd_plane *const pd = &xd->plane[plane];
struct rdcost_block_args args;
TX_TYPE tx_type;
av1_zero(args);
args.cpi = cpi;
args.x = x;
args.best_rd = ref_best_rd;
args.use_fast_coef_costing = use_fast_coef_casting;
#if CONFIG_EXT_TX
assert(tx_size < TX_SIZES);
#endif // CONFIG_EXT_TX
if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
tx_type = get_tx_type(pd->plane_type, xd, 0, tx_size);
args.scan_order =
get_scan(cm, tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
block_rd_txfm(plane, 0, 0, 0, get_plane_block_size(bsize, pd), tx_size,
&args);
if (args.exit_early) {
*rate = INT_MAX;
*distortion = INT64_MAX;
*sse = INT64_MAX;
*skippable = 0;
} else {
*distortion = args.rd_stats.dist;
*rate = args.rd_stats.rate;
*sse = args.rd_stats.sse;
*skippable = !x->plane[plane].eobs[0];
}
}
#endif // CONFIG_SUPERTX
static int tx_size_cost(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, TX_SIZE tx_size) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int tx_select =
cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
if (tx_select) {
const int is_inter = is_inter_block(mbmi);
const int tx_size_cat = is_inter ? inter_tx_size_cat_lookup[bsize]
: intra_tx_size_cat_lookup[bsize];
const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size];
const int depth = tx_size_to_depth(coded_tx_size);
const int tx_size_ctx = get_tx_size_context(xd);
const int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
return r_tx_size;
} else {
return 0;
}
}
static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
TX_TYPE tx_type, int tx_size) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int64_t rd = INT64_MAX;
aom_prob skip_prob = av1_get_skip_prob(cm, xd);
int s0, s1;
const int is_inter = is_inter_block(mbmi);
const int tx_select =
cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
const int r_tx_size = tx_size_cost(cpi, x, bs, tx_size);
assert(skip_prob > 0);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
s0 = av1_cost_bit(skip_prob, 0);
s1 = av1_cost_bit(skip_prob, 1);
mbmi->tx_type = tx_type;
mbmi->tx_size = tx_size;
txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, tx_size,
cpi->sf.use_fast_coef_costing);
if (rd_stats->rate == INT_MAX) return INT64_MAX;
#if CONFIG_EXT_TX
if (get_ext_tx_types(tx_size, bs, is_inter) > 1 &&
!xd->lossless[xd->mi[0]->mbmi.segment_id]) {
const int ext_tx_set = get_ext_tx_set(tx_size, bs, is_inter);
if (is_inter) {
if (ext_tx_set > 0)
rd_stats->rate +=
cpi->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->tx_size]]
[mbmi->tx_type];
} else {
if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
rd_stats->rate +=
cpi->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->tx_size]]
[mbmi->mode][mbmi->tx_type];
}
}
#else
if (tx_size < TX_32X32 && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
!FIXED_TX_TYPE) {
if (is_inter) {
rd_stats->rate += cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
} else {
rd_stats->rate +=
cpi->intra_tx_type_costs[mbmi->tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
}
}
#endif // CONFIG_EXT_TX
if (rd_stats->skip) {
if (is_inter) {
rd = RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse);
} else {
rd = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size * tx_select,
rd_stats->sse);
}
} else {
rd = RDCOST(x->rdmult, x->rddiv,
rd_stats->rate + s0 + r_tx_size * tx_select, rd_stats->dist);
}
if (tx_select) rd_stats->rate += r_tx_size;
if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
!(rd_stats->skip))
rd = AOMMIN(rd, RDCOST(x->rdmult, x->rddiv, s1, rd_stats->sse));
return rd;
}
static int64_t choose_tx_size_fix_type(const AV1_COMP *const cpi, BLOCK_SIZE bs,
MACROBLOCK *x, RD_STATS *rd_stats,
int64_t ref_best_rd, TX_TYPE tx_type,
#if CONFIG_PVQ
od_rollback_buffer buf,
#endif
int prune) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int64_t rd = INT64_MAX;
int n;
int start_tx, end_tx;
int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
const TX_SIZE max_tx_size = max_txsize_lookup[bs];
TX_SIZE best_tx_size = max_tx_size;
const int tx_select = cm->tx_mode == TX_MODE_SELECT;
const int is_inter = is_inter_block(mbmi);
#if CONFIG_EXT_TX
#if CONFIG_RECT_TX
int evaluate_rect_tx = 0;
#endif // CONFIG_RECT_TX
int ext_tx_set;
#endif // CONFIG_EXT_TX
if (tx_select) {
#if CONFIG_EXT_TX && CONFIG_RECT_TX
evaluate_rect_tx = is_rect_tx_allowed(xd, mbmi);
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
start_tx = max_tx_size;
end_tx = (max_tx_size >= TX_32X32) ? TX_8X8 : TX_4X4;
} else {
const TX_SIZE chosen_tx_size =
tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
#if CONFIG_EXT_TX && CONFIG_RECT_TX
evaluate_rect_tx = is_rect_tx(chosen_tx_size);
assert(IMPLIES(evaluate_rect_tx, is_rect_tx_allowed(xd, mbmi)));
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
start_tx = chosen_tx_size;
end_tx = chosen_tx_size;
}
av1_invalid_rd_stats(rd_stats);
mbmi->tx_type = tx_type;
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (evaluate_rect_tx) {
const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs];
RD_STATS this_rd_stats;
ext_tx_set = get_ext_tx_set(rect_tx_size, bs, is_inter);
if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) ||
(!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) {
rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type,
rect_tx_size);
best_tx_size = rect_tx_size;
best_rd = rd;
*rd_stats = this_rd_stats;
}
}
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
last_rd = INT64_MAX;
for (n = start_tx; n >= end_tx; --n) {
RD_STATS this_rd_stats;
#if CONFIG_EXT_TX && CONFIG_RECT_TX
if (is_rect_tx(n)) break;
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX
if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n)) continue;
if (!is_inter && x->use_default_intra_tx_type &&
tx_type != get_default_tx_type(0, xd, 0, n))
continue;
if (is_inter && x->use_default_inter_tx_type &&
tx_type != get_default_tx_type(0, xd, 0, n))
continue;
if (max_tx_size >= TX_32X32 && n == TX_4X4) continue;
#if CONFIG_EXT_TX
ext_tx_set = get_ext_tx_set(n, bs, is_inter);
if (is_inter) {
if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
if (!do_tx_type_search(tx_type, prune)) continue;
}
} else {
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
}
if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
}
#else // CONFIG_EXT_TX
if (n >= TX_32X32 && tx_type != DCT_DCT) continue;
if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
!do_tx_type_search(tx_type, prune))
continue;
#endif // CONFIG_EXT_TX
rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, n);
#if CONFIG_PVQ
od_encode_rollback(&x->daala_enc, &buf);
#endif
// Early termination in transform size search.
if (cpi->sf.tx_size_search_breakout &&
(rd == INT64_MAX ||
(this_rd_stats.skip == 1 && tx_type != DCT_DCT && n < start_tx) ||
(n < (int)max_tx_size && rd > last_rd)))
break;
last_rd = rd;
if (rd < best_rd) {
best_tx_size = n;
best_rd = rd;
*rd_stats = this_rd_stats;
}
}
mbmi->tx_size = best_tx_size;
return best_rd;
}
#if CONFIG_EXT_INTER
static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
MACROBLOCK *x, int *r, int64_t *d, int *s,
int64_t *sse, int64_t ref_best_rd) {
RD_STATS rd_stats;
int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs, DCT_DCT,
max_txsize_lookup[bs]);
*r = rd_stats.rate;
*d = rd_stats.dist;
*s = rd_stats.skip;
*sse = rd_stats.sse;
return rd;
}
#endif // CONFIG_EXT_INTER
static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
RD_STATS *rd_stats, int64_t ref_best_rd,
BLOCK_SIZE bs) {
const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
TX_TYPE tx_type, best_tx_type = DCT_DCT;
int64_t this_rd, best_rd = INT64_MAX;
aom_prob skip_prob = av1_get_skip_prob(cm, xd);
int s0 = av1_cost_bit(skip_prob, 0);
int s1 = av1_cost_bit(skip_prob, 1);
const int is_inter = is_inter_block(mbmi);
int prune = 0;
#if CONFIG_EXT_TX
int ext_tx_set;
#endif // CONFIG_EXT_TX
av1_invalid_rd_stats(rd_stats);
mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
#if CONFIG_VAR_TX
mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
#endif
#if CONFIG_EXT_TX
ext_tx_set = get_ext_tx_set(mbmi->tx_size, bs, is_inter);
#endif // CONFIG_EXT_TX
if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
#if CONFIG_EXT_TX
prune = prune_tx_types(cpi, bs, x, xd, ext_tx_set);
#else
prune = prune_tx_types(cpi, bs, x, xd, 0);
#endif
#if CONFIG_EXT_TX
if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1 &&
!xd->lossless[mbmi->segment_id]) {
#if CONFIG_PVQ
od_rollback_buffer pre_buf, post_buf;
od_encode_checkpoint(&x->daala_enc, &pre_buf);
od_encode_checkpoint(&x->daala_enc, &post_buf);
#endif
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
RD_STATS this_rd_stats;
if (is_inter) {
if (x->use_default_inter_tx_type &&
tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
continue;
if (!ext_tx_used_inter[ext_tx_set][tx_type]) continue;
if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
if (!do_tx_type_search(tx_type, prune)) continue;
}
} else {
if (x->use_default_intra_tx_type &&
tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
continue;
if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
}
if (!ext_tx_used_intra[ext_tx_set][tx_type]) continue;
}
mbmi->tx_type = tx_type;
txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
#if CONFIG_PVQ
od_encode_rollback(&x->daala_enc, &pre_buf);
#endif
if (this_rd_stats.rate == INT_MAX) continue;
if (get_ext_tx_types(mbmi->tx_size, bs, is_inter) > 1) {
if (is_inter) {
if (ext_tx_set > 0)
this_rd_stats.rate +=
cpi->inter_tx_type_costs[ext_tx_set]
[txsize_sqr_map[mbmi->tx_size]]
[mbmi->tx_type];
} else {
if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX)
this_rd_stats.rate +=
cpi->intra_tx_type_costs[ext_tx_set]
[txsize_sqr_map[mbmi->tx_size]]
[mbmi->mode][mbmi->tx_type];
}
}
if (this_rd_stats.skip)
this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
else
this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
this_rd_stats.dist);
if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
!this_rd_stats.skip)
this_rd =
AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
if (this_rd < best_rd) {
best_rd = this_rd;
best_tx_type = mbmi->tx_type;
*rd_stats = this_rd_stats;
#if CONFIG_PVQ
od_encode_checkpoint(&x->daala_enc, &post_buf);
#endif
}
}
#if CONFIG_PVQ
od_encode_rollback(&x->daala_enc, &post_buf);
#endif
} else {
mbmi->tx_type = DCT_DCT;
txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
cpi->sf.use_fast_coef_costing);
}
#else // CONFIG_EXT_TX
if (mbmi->tx_size < TX_32X32 && !xd->lossless[mbmi->segment_id]) {
for (tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
RD_STATS this_rd_stats;
if (!is_inter && x->use_default_intra_tx_type &&
tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
continue;
if (is_inter && x->use_default_inter_tx_type &&
tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
continue;
mbmi->tx_type = tx_type;
txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
mbmi->tx_size, cpi->sf.use_fast_coef_costing);
if (this_rd_stats.rate == INT_MAX) continue;
if (is_inter) {
this_rd_stats.rate +=
cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type];
if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
!do_tx_type_search(tx_type, prune))
continue;
} else {
this_rd_stats.rate +=
cpi->intra_tx_type_costs[mbmi->tx_size]
[intra_mode_to_tx_type_context[mbmi->mode]]
[mbmi->tx_type];
}
if (this_rd_stats.skip)
this_rd = RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse);
else
this_rd = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate + s0,
this_rd_stats.dist);
if (is_inter && !xd->lossless[mbmi->segment_id] && !this_rd_stats.skip)
this_rd =
AOMMIN(this_rd, RDCOST(x->rdmult, x->rddiv, s1, this_rd_stats.sse));
if (this_rd < best_rd) {
best_rd = this_rd;
best_tx_type = mbmi->tx_type;
*rd_stats = this_rd_stats;
}
}
} else {
mbmi->tx_type = DCT_DCT;
txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
cpi->sf.use_fast_coef_costing);
}
#endif // CONFIG_EXT_TX
mbmi->tx_type = best_tx_type;
}
static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
RD_STATS *rd_stats, int64_t ref_best_rd,
BLOCK_SIZE bs) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
mbmi->tx_size = TX_4X4;
mbmi->tx_type = DCT_DCT;
#if CONFIG_VAR_TX
mbmi->min_tx_size = get_min_tx_size(TX_4X4);
#endif
txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
cpi->sf.use_fast_coef_costing);
}
static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
MACROBLOCK *x, RD_STATS *rd_stats,
int64_t ref_best_rd, BLOCK_SIZE bs) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int64_t rd = INT64_MAX;
int64_t best_rd = INT64_MAX;
TX_SIZE best_tx = max_txsize_lookup[bs];
const int is_inter = is_inter_block(mbmi);
TX_TYPE tx_type, best_tx_type = DCT_DCT;
int prune = 0;
#if CONFIG_PVQ
od_rollback_buffer buf;
#endif
if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE)
// passing -1 in for tx_type indicates that all 1D
// transforms should be considered for pruning
prune = prune_tx_types(cpi, bs, x, xd, -1);
av1_invalid_rd_stats(rd_stats);
#if CONFIG_PVQ
od_encode_checkpoint(&x->daala_enc, &buf);
#endif
for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
RD_STATS this_rd_stats;
#if CONFIG_REF_MV
if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue;
#endif
rd = choose_tx_size_fix_type(cpi, bs, x, &this_rd_stats, ref_best_rd,
tx_type,
#if CONFIG_PVQ
buf,
#endif
prune);
if (rd < best_rd) {
best_rd = rd;
*rd_stats = this_rd_stats;
best_tx_type = tx_type;
best_tx = mbmi->tx_size;
}
}
mbmi->tx_size = best_tx;
mbmi->tx_type = best_tx_type;
#if CONFIG_VAR_TX
mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
#endif
#if !CONFIG_EXT_TX
if (mbmi->tx_size >= TX_32X32) assert(mbmi->tx_type == DCT_DCT);
#endif
#if CONFIG_PVQ
if (best_rd != INT64_MAX) {
txfm_yrd(cpi, x, rd_stats, ref_best_rd, bs, best_tx_type, best_tx);
}
#endif
}
static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
RD_STATS *rd_stats, BLOCK_SIZE bs,
int64_t ref_best_rd) {
MACROBLOCKD *xd = &x->e_mbd;
av1_init_rd_stats(rd_stats);
assert(bs == xd->mi[0]->mbmi.sb_type);
if (xd->lossless[0]) {
choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
} else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
} else {
choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
}
}
static int conditional_skipintra(PREDICTION_MODE mode,
PREDICTION_MODE best_intra_mode) {
if (mode == D117_PRED && best_intra_mode != V_PRED &&
best_intra_mode != D135_PRED)
return 1;
if (mode == D63_PRED && best_intra_mode != V_PRED &&
best_intra_mode != D45_PRED)
return 1;
if (mode == D207_PRED && best_intra_mode != H_PRED &&
best_intra_mode != D45_PRED)
return 1;
if (mode == D153_PRED && best_intra_mode != H_PRED &&
best_intra_mode != D135_PRED)
return 1;
return 0;
}
// Model based RD estimation for luma intra blocks.
static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
BLOCK_SIZE bsize, int mode_cost) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
RD_STATS this_rd_stats;
int row, col;
int64_t temp_sse, this_rd;
const TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cpi->common.tx_mode, 0);
const int stepr = tx_size_high_unit[tx_size];
const int stepc = tx_size_wide_unit[tx_size];
const int max_blocks_wide = max_block_wide(xd, bsize, 0);
const int max_blocks_high = max_block_high(xd, bsize, 0);
mbmi->tx_size = tx_size;
// Prediction.
for (row = 0; row < max_blocks_high; row += stepr) {
for (col = 0; col < max_blocks_wide; col += stepc) {
struct macroblockd_plane *const pd = &xd->plane[0];
uint8_t *dst =
&pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
av1_predict_intra_block(xd, pd->width, pd->height,
txsize_to_bsize[tx_size], mbmi->mode, dst,
pd->dst.stride, dst, pd->dst.stride, col, row, 0);
}
}
// RD estimation.
model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
&this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
#if CONFIG_EXT_INTRA
if (av1_is_directional_mode(mbmi->mode, bsize)) {
const int max_angle_