blob: 4c13788fc5c776e6a248dc97def5eeb5118afc7d [file] [log] [blame]
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include <math.h>
#include <stdbool.h>
#include "config/aom_dsp_rtcd.h"
#include "config/av1_rtcd.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/blend.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/aom_timer.h"
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
#include "av1/common/cfl.h"
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "av1/common/entropy.h"
#include "av1/common/entropymode.h"
#include "av1/common/idct.h"
#include "av1/common/mvref_common.h"
#include "av1/common/obmc.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/pred_common.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h"
#include "av1/common/reconintra.h"
#include "av1/common/scan.h"
#include "av1/common/seg_common.h"
#include "av1/common/txb_common.h"
#include "av1/common/warped_motion.h"
#include "av1/encoder/aq_variance.h"
#include "av1/encoder/av1_quantize.h"
#include "av1/encoder/cost.h"
#include "av1/encoder/compound_type.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/encodetxb.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
#include "av1/encoder/interp_search.h"
#include "av1/encoder/mcomp.h"
#include "av1/encoder/ml.h"
#include "av1/encoder/mode_prune_model_weights.h"
#include "av1/encoder/model_rd.h"
#include "av1/encoder/motion_search.h"
#include "av1/encoder/palette.h"
#include "av1/encoder/pustats.h"
#include "av1/encoder/random.h"
#include "av1/encoder/ratectrl.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
#include "av1/encoder/rdopt_utils.h"
#include "av1/encoder/reconinter_enc.h"
#include "av1/encoder/tokenize.h"
#include "av1/encoder/tpl_model.h"
#include "av1/encoder/tx_search.h"
typedef struct {
PREDICTION_MODE mode;
MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;
#define LAST_NEW_MV_INDEX 6
// This array defines the mapping from the enums in THR_MODES to the actual
// prediction modes and refrence frames
static const MODE_DEFINITION av1_mode_defs[MAX_MODES] = {
{ NEARESTMV, { LAST_FRAME, NONE_FRAME } },
{ NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
{ NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
{ NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
{ NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } },
{ NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
{ NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
{ NEWMV, { LAST_FRAME, NONE_FRAME } },
{ NEWMV, { LAST2_FRAME, NONE_FRAME } },
{ NEWMV, { LAST3_FRAME, NONE_FRAME } },
{ NEWMV, { BWDREF_FRAME, NONE_FRAME } },
{ NEWMV, { ALTREF2_FRAME, NONE_FRAME } },
{ NEWMV, { ALTREF_FRAME, NONE_FRAME } },
{ NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
{ NEARMV, { LAST_FRAME, NONE_FRAME } },
{ NEARMV, { LAST2_FRAME, NONE_FRAME } },
{ NEARMV, { LAST3_FRAME, NONE_FRAME } },
{ NEARMV, { BWDREF_FRAME, NONE_FRAME } },
{ NEARMV, { ALTREF2_FRAME, NONE_FRAME } },
{ NEARMV, { ALTREF_FRAME, NONE_FRAME } },
{ NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
{ GLOBALMV, { LAST_FRAME, NONE_FRAME } },
{ GLOBALMV, { LAST2_FRAME, NONE_FRAME } },
{ GLOBALMV, { LAST3_FRAME, NONE_FRAME } },
{ GLOBALMV, { BWDREF_FRAME, NONE_FRAME } },
{ GLOBALMV, { ALTREF2_FRAME, NONE_FRAME } },
{ GLOBALMV, { ALTREF_FRAME, NONE_FRAME } },
{ GLOBALMV, { GOLDEN_FRAME, NONE_FRAME } },
// TODO(zoeliu): May need to reconsider the order on the modes to check
{ NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
{ NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
{ NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST2_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
{ GLOBAL_GLOBALMV, { LAST3_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ GLOBAL_GLOBALMV, { GOLDEN_FRAME, BWDREF_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF2_FRAME } },
{ NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF2_FRAME } },
{ NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF2_FRAME } },
{ NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
{ NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, LAST2_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
{ NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, LAST3_FRAME } },
{ NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
{ GLOBAL_GLOBALMV, { LAST_FRAME, GOLDEN_FRAME } },
{ NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
{ GLOBAL_GLOBALMV, { BWDREF_FRAME, ALTREF_FRAME } },
// intra modes
{ DC_PRED, { INTRA_FRAME, NONE_FRAME } },
{ PAETH_PRED, { INTRA_FRAME, NONE_FRAME } },
{ SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
{ SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
{ SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
{ H_PRED, { INTRA_FRAME, NONE_FRAME } },
{ V_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D135_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D203_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D157_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D67_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D113_PRED, { INTRA_FRAME, NONE_FRAME } },
{ D45_PRED, { INTRA_FRAME, NONE_FRAME } },
};
static const THR_MODES av1_default_mode_order[MAX_MODES] = {
THR_NEARESTMV,
THR_NEARESTL2,
THR_NEARESTL3,
THR_NEARESTB,
THR_NEARESTA2,
THR_NEARESTA,
THR_NEARESTG,
THR_NEWMV,
THR_NEWL2,
THR_NEWL3,
THR_NEWB,
THR_NEWA2,
THR_NEWA,
THR_NEWG,
THR_NEARMV,
THR_NEARL2,
THR_NEARL3,
THR_NEARB,
THR_NEARA2,
THR_NEARA,
THR_NEARG,
THR_GLOBALMV,
THR_GLOBALL2,
THR_GLOBALL3,
THR_GLOBALB,
THR_GLOBALA2,
THR_GLOBALA,
THR_GLOBALG,
THR_COMP_NEAREST_NEARESTLA,
THR_COMP_NEAREST_NEARESTL2A,
THR_COMP_NEAREST_NEARESTL3A,
THR_COMP_NEAREST_NEARESTGA,
THR_COMP_NEAREST_NEARESTLB,
THR_COMP_NEAREST_NEARESTL2B,
THR_COMP_NEAREST_NEARESTL3B,
THR_COMP_NEAREST_NEARESTGB,
THR_COMP_NEAREST_NEARESTLA2,
THR_COMP_NEAREST_NEARESTL2A2,
THR_COMP_NEAREST_NEARESTL3A2,
THR_COMP_NEAREST_NEARESTGA2,
THR_COMP_NEAREST_NEARESTLL2,
THR_COMP_NEAREST_NEARESTLL3,
THR_COMP_NEAREST_NEARESTLG,
THR_COMP_NEAREST_NEARESTBA,
THR_COMP_NEAR_NEARLA,
THR_COMP_NEW_NEARESTLA,
THR_COMP_NEAREST_NEWLA,
THR_COMP_NEW_NEARLA,
THR_COMP_NEAR_NEWLA,
THR_COMP_NEW_NEWLA,
THR_COMP_GLOBAL_GLOBALLA,
THR_COMP_NEAR_NEARL2A,
THR_COMP_NEW_NEARESTL2A,
THR_COMP_NEAREST_NEWL2A,
THR_COMP_NEW_NEARL2A,
THR_COMP_NEAR_NEWL2A,
THR_COMP_NEW_NEWL2A,
THR_COMP_GLOBAL_GLOBALL2A,
THR_COMP_NEAR_NEARL3A,
THR_COMP_NEW_NEARESTL3A,
THR_COMP_NEAREST_NEWL3A,
THR_COMP_NEW_NEARL3A,
THR_COMP_NEAR_NEWL3A,
THR_COMP_NEW_NEWL3A,
THR_COMP_GLOBAL_GLOBALL3A,
THR_COMP_NEAR_NEARGA,
THR_COMP_NEW_NEARESTGA,
THR_COMP_NEAREST_NEWGA,
THR_COMP_NEW_NEARGA,
THR_COMP_NEAR_NEWGA,
THR_COMP_NEW_NEWGA,
THR_COMP_GLOBAL_GLOBALGA,
THR_COMP_NEAR_NEARLB,
THR_COMP_NEW_NEARESTLB,
THR_COMP_NEAREST_NEWLB,
THR_COMP_NEW_NEARLB,
THR_COMP_NEAR_NEWLB,
THR_COMP_NEW_NEWLB,
THR_COMP_GLOBAL_GLOBALLB,
THR_COMP_NEAR_NEARL2B,
THR_COMP_NEW_NEARESTL2B,
THR_COMP_NEAREST_NEWL2B,
THR_COMP_NEW_NEARL2B,
THR_COMP_NEAR_NEWL2B,
THR_COMP_NEW_NEWL2B,
THR_COMP_GLOBAL_GLOBALL2B,
THR_COMP_NEAR_NEARL3B,
THR_COMP_NEW_NEARESTL3B,
THR_COMP_NEAREST_NEWL3B,
THR_COMP_NEW_NEARL3B,
THR_COMP_NEAR_NEWL3B,
THR_COMP_NEW_NEWL3B,
THR_COMP_GLOBAL_GLOBALL3B,
THR_COMP_NEAR_NEARGB,
THR_COMP_NEW_NEARESTGB,
THR_COMP_NEAREST_NEWGB,
THR_COMP_NEW_NEARGB,
THR_COMP_NEAR_NEWGB,
THR_COMP_NEW_NEWGB,
THR_COMP_GLOBAL_GLOBALGB,
THR_COMP_NEAR_NEARLA2,
THR_COMP_NEW_NEARESTLA2,
THR_COMP_NEAREST_NEWLA2,
THR_COMP_NEW_NEARLA2,
THR_COMP_NEAR_NEWLA2,
THR_COMP_NEW_NEWLA2,
THR_COMP_GLOBAL_GLOBALLA2,
THR_COMP_NEAR_NEARL2A2,
THR_COMP_NEW_NEARESTL2A2,
THR_COMP_NEAREST_NEWL2A2,
THR_COMP_NEW_NEARL2A2,
THR_COMP_NEAR_NEWL2A2,
THR_COMP_NEW_NEWL2A2,
THR_COMP_GLOBAL_GLOBALL2A2,
THR_COMP_NEAR_NEARL3A2,
THR_COMP_NEW_NEARESTL3A2,
THR_COMP_NEAREST_NEWL3A2,
THR_COMP_NEW_NEARL3A2,
THR_COMP_NEAR_NEWL3A2,
THR_COMP_NEW_NEWL3A2,
THR_COMP_GLOBAL_GLOBALL3A2,
THR_COMP_NEAR_NEARGA2,
THR_COMP_NEW_NEARESTGA2,
THR_COMP_NEAREST_NEWGA2,
THR_COMP_NEW_NEARGA2,
THR_COMP_NEAR_NEWGA2,
THR_COMP_NEW_NEWGA2,
THR_COMP_GLOBAL_GLOBALGA2,
THR_COMP_NEAR_NEARLL2,
THR_COMP_NEW_NEARESTLL2,
THR_COMP_NEAREST_NEWLL2,
THR_COMP_NEW_NEARLL2,
THR_COMP_NEAR_NEWLL2,
THR_COMP_NEW_NEWLL2,
THR_COMP_GLOBAL_GLOBALLL2,
THR_COMP_NEAR_NEARLL3,
THR_COMP_NEW_NEARESTLL3,
THR_COMP_NEAREST_NEWLL3,
THR_COMP_NEW_NEARLL3,
THR_COMP_NEAR_NEWLL3,
THR_COMP_NEW_NEWLL3,
THR_COMP_GLOBAL_GLOBALLL3,
THR_COMP_NEAR_NEARLG,
THR_COMP_NEW_NEARESTLG,
THR_COMP_NEAREST_NEWLG,
THR_COMP_NEW_NEARLG,
THR_COMP_NEAR_NEWLG,
THR_COMP_NEW_NEWLG,
THR_COMP_GLOBAL_GLOBALLG,
THR_COMP_NEAR_NEARBA,
THR_COMP_NEW_NEARESTBA,
THR_COMP_NEAREST_NEWBA,
THR_COMP_NEW_NEARBA,
THR_COMP_NEAR_NEWBA,
THR_COMP_NEW_NEWBA,
THR_COMP_GLOBAL_GLOBALBA,
THR_DC,
THR_PAETH,
THR_SMOOTH,
THR_SMOOTH_V,
THR_SMOOTH_H,
THR_H_PRED,
THR_V_PRED,
THR_D135_PRED,
THR_D203_PRED,
THR_D157_PRED,
THR_D67_PRED,
THR_D113_PRED,
THR_D45_PRED,
};
static int find_last_single_ref_mode_idx(const THR_MODES *mode_order) {
uint8_t mode_found[NUM_SINGLE_REF_MODES];
av1_zero(mode_found);
int num_single_ref_modes_left = NUM_SINGLE_REF_MODES;
for (int idx = 0; idx < MAX_MODES; idx++) {
const THR_MODES curr_mode = mode_order[idx];
if (curr_mode < SINGLE_REF_MODE_END) {
num_single_ref_modes_left--;
}
if (!num_single_ref_modes_left) {
return idx;
}
}
return -1;
}
static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED,
SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED, D157_PRED,
D67_PRED, D113_PRED, D45_PRED,
};
static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED,
UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
UV_D135_PRED, UV_D203_PRED, UV_D157_PRED, UV_D67_PRED,
UV_D113_PRED, UV_D45_PRED,
};
typedef struct SingleInterModeState {
int64_t rd;
MV_REFERENCE_FRAME ref_frame;
int valid;
} SingleInterModeState;
typedef struct IntraModeSearchState {
int skip_intra_modes;
PREDICTION_MODE best_intra_mode;
int angle_stats_ready;
uint8_t directional_mode_skip_mask[INTRA_MODES];
int rate_uv_intra;
int rate_uv_tokenonly;
int64_t dist_uvs;
int skip_uvs;
UV_PREDICTION_MODE mode_uv;
PALETTE_MODE_INFO pmi_uv;
int8_t uv_angle_delta;
int64_t best_pred_rd[REFERENCE_MODES];
} IntraModeSearchState;
typedef struct InterModeSearchState {
int64_t best_rd;
MB_MODE_INFO best_mbmode;
int best_rate_y;
int best_rate_uv;
int best_mode_skippable;
int best_skip2;
THR_MODES best_mode_index;
int num_available_refs;
int64_t dist_refs[REF_FRAMES];
int dist_order_refs[REF_FRAMES];
int64_t mode_threshold[MAX_MODES];
int64_t best_intra_rd;
unsigned int best_pred_sse;
int64_t best_pred_diff[REFERENCE_MODES];
// Save a set of single_newmv for each checked ref_mv.
int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
// The rd of simple translation in single inter modes
int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
// Single search results by [directions][modes][reference frames]
SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
[FWD_REFS];
int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
IntraModeSearchState intra_search_state;
} InterModeSearchState;
void av1_inter_mode_data_init(TileDataEnc *tile_data) {
for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
md->ready = 0;
md->num = 0;
md->dist_sum = 0;
md->ld_sum = 0;
md->sse_sum = 0;
md->sse_sse_sum = 0;
md->sse_ld_sum = 0;
}
}
static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
int64_t sse, int *est_residue_cost,
int64_t *est_dist) {
aom_clear_system_state();
const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
if (md->ready) {
if (sse < md->dist_mean) {
*est_residue_cost = 0;
*est_dist = sse;
} else {
*est_dist = (int64_t)round(md->dist_mean);
const double est_ld = md->a * sse + md->b;
// Clamp estimated rate cost by INT_MAX / 2.
// TODO(angiebird@google.com): find better solution than clamping.
if (fabs(est_ld) < 1e-2) {
*est_residue_cost = INT_MAX / 2;
} else {
double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
if (est_residue_cost_dbl < 0) {
*est_residue_cost = 0;
} else {
*est_residue_cost =
(int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
}
}
if (*est_residue_cost <= 0) {
*est_residue_cost = 0;
*est_dist = sse;
}
}
return 1;
}
return 0;
}
void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
aom_clear_system_state();
for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
const int block_idx = inter_mode_data_block_idx(bsize);
InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
if (block_idx == -1) continue;
if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
continue;
} else {
if (md->ready == 0) {
md->dist_mean = md->dist_sum / md->num;
md->ld_mean = md->ld_sum / md->num;
md->sse_mean = md->sse_sum / md->num;
md->sse_sse_mean = md->sse_sse_sum / md->num;
md->sse_ld_mean = md->sse_ld_sum / md->num;
} else {
const double factor = 3;
md->dist_mean =
(md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
md->ld_mean =
(md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
md->sse_mean =
(md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
md->sse_sse_mean =
(md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
(factor + 1);
md->sse_ld_mean =
(md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
(factor + 1);
}
const double my = md->ld_mean;
const double mx = md->sse_mean;
const double dx = sqrt(md->sse_sse_mean);
const double dxy = md->sse_ld_mean;
md->a = (dxy - mx * my) / (dx * dx - mx * mx);
md->b = my - md->a * mx;
md->ready = 1;
md->num = 0;
md->dist_sum = 0;
md->ld_sum = 0;
md->sse_sum = 0;
md->sse_sse_sum = 0;
md->sse_ld_sum = 0;
}
(void)rdmult;
}
}
static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
BLOCK_SIZE bsize, int64_t sse,
int64_t dist, int residue_cost) {
if (residue_cost == 0 || sse == dist) return;
const int block_idx = inter_mode_data_block_idx(bsize);
if (block_idx == -1) return;
InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
aom_clear_system_state();
const double ld = (sse - dist) * 1. / residue_cost;
++rd_model->num;
rd_model->dist_sum += dist;
rd_model->ld_sum += ld;
rd_model->sse_sum += sse;
rd_model->sse_sse_sum += (double)sse * (double)sse;
rd_model->sse_ld_sum += sse * ld;
}
}
static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
int mode_rate, int64_t sse,
int64_t rd, RD_STATS *rd_cost,
RD_STATS *rd_cost_y,
RD_STATS *rd_cost_uv,
const MB_MODE_INFO *mbmi) {
const int num = inter_modes_info->num;
assert(num < MAX_INTER_MODES);
inter_modes_info->mbmi_arr[num] = *mbmi;
inter_modes_info->mode_rate_arr[num] = mode_rate;
inter_modes_info->sse_arr[num] = sse;
inter_modes_info->est_rd_arr[num] = rd;
inter_modes_info->rd_cost_arr[num] = *rd_cost;
inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
++inter_modes_info->num;
}
static int compare_rd_idx_pair(const void *a, const void *b) {
if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
return 0;
} else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
return 1;
} else {
return -1;
}
}
static AOM_INLINE void inter_modes_info_sort(
const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
if (inter_modes_info->num == 0) {
return;
}
for (int i = 0; i < inter_modes_info->num; ++i) {
rd_idx_pair_arr[i].idx = i;
rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
}
qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
compare_rd_idx_pair);
}
static INLINE int write_uniform_cost(int n, int v) {
const int l = get_unsigned_bits(n);
const int m = (1 << l) - n;
if (l == 0) return 0;
if (v < m)
return av1_cost_literal(l - 1);
else
return av1_cost_literal(l);
}
// Similar to store_cfl_required(), but for use during the RDO process,
// where we haven't yet determined whether this block uses CfL.
static INLINE CFL_ALLOWED_TYPE store_cfl_required_rdo(const AV1_COMMON *cm,
const MACROBLOCK *x) {
const MACROBLOCKD *xd = &x->e_mbd;
if (cm->seq_params.monochrome || x->skip_chroma_rd) return CFL_DISALLOWED;
if (!xd->cfl.is_chroma_reference) {
// For non-chroma-reference blocks, we should always store the luma pixels,
// in case the corresponding chroma-reference block uses CfL.
// Note that this can only happen for block sizes which are <8 on
// their shortest side, as otherwise they would be chroma reference
// blocks.
return CFL_ALLOWED;
}
// For chroma reference blocks, we should store data in the encoder iff we're
// allowed to try out CfL.
return is_cfl_allowed(xd);
}
#if CONFIG_DIST_8X8
static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
int sstride, int coeff_shift) {
uint64_t svar = 0;
uint64_t dvar = 0;
uint64_t sum_s = 0;
uint64_t sum_d = 0;
uint64_t sum_s2 = 0;
uint64_t sum_d2 = 0;
uint64_t sum_sd = 0;
uint64_t dist = 0;
int i, j;
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++) {
sum_s += src[i * sstride + j];
sum_d += dst[i * dstride + j];
sum_s2 += src[i * sstride + j] * src[i * sstride + j];
sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
sum_sd += src[i * sstride + j] * dst[i * dstride + j];
}
}
/* Compute the variance -- the calculation cannot go negative. */
svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
// Tuning of jm's original dering distortion metric used in CDEF tool,
// suggested by jm
const uint64_t a = 4;
const uint64_t b = 2;
const uint64_t c1 = (400 * a << 2 * coeff_shift);
const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift);
dist = (uint64_t)floor(.5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
(svar + dvar + c1) /
(sqrt(svar * (double)dvar + c2)));
// Calibrate dist to have similar rate for the same QP with MSE only
// distortion (as in master branch)
dist = (uint64_t)((float)dist * 0.75);
return dist;
}
static int od_compute_var_4x4(uint16_t *x, int stride) {
int sum;
int s2;
int i;
sum = 0;
s2 = 0;
for (i = 0; i < 4; i++) {
int j;
for (j = 0; j < 4; j++) {
int t;
t = x[i * stride + j];
sum += t;
s2 += t * t;
}
}
return (s2 - (sum * sum >> 4)) >> 4;
}
/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
is applied both horizontally and vertically. For X=5, the filter is
a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
#define OD_DIST_LP_MID (5)
#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
uint16_t *y, od_coeff *e_lp, int stride) {
double sum;
int min_var;
double mean_var;
double var_stat;
double activity;
double calibration;
int i;
int j;
double vardist;
vardist = 0;
#if 1
min_var = INT_MAX;
mean_var = 0;
for (i = 0; i < 3; i++) {
for (j = 0; j < 3; j++) {
int varx;
int vary;
varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
min_var = OD_MINI(min_var, varx);
mean_var += 1. / (1 + varx);
/* The cast to (double) is to avoid an overflow before the sqrt.*/
vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
}
}
/* We use a different variance statistic depending on whether activity
masking is used, since the harmonic mean appeared slightly worse with
masking off. The calibration constant just ensures that we preserve the
rate compared to activity=1. */
if (use_activity_masking) {
calibration = 1.95;
var_stat = 9. / mean_var;
} else {
calibration = 1.62;
var_stat = min_var;
}
/* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
activity masking constant. */
activity = calibration * pow(.25 + var_stat, -1. / 6);
#else
activity = 1;
#endif // 1
sum = 0;
for (i = 0; i < 8; i++) {
for (j = 0; j < 8; j++)
sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
}
/* Normalize the filter to unit DC response. */
sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
OD_DIST_LP_NORM);
return activity * activity * (sum + vardist);
}
// Note : Inputs x and y are in a pixel domain
static double od_compute_dist_common(int activity_masking, uint16_t *x,
uint16_t *y, int bsize_w, int bsize_h,
int qindex, od_coeff *tmp,
od_coeff *e_lp) {
int i, j;
double sum = 0;
const int mid = OD_DIST_LP_MID;
for (j = 0; j < bsize_w; j++) {
e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] +
2 * tmp[(bsize_h - 2) * bsize_w + j];
}
for (i = 1; i < bsize_h - 1; i++) {
for (j = 0; j < bsize_w; j++) {
e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
tmp[(i - 1) * bsize_w + j] +
tmp[(i + 1) * bsize_w + j];
}
}
for (i = 0; i < bsize_h; i += 8) {
for (j = 0; j < bsize_w; j += 8) {
sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j],
&y[i * bsize_w + j], &e_lp[i * bsize_w + j],
bsize_w);
}
}
/* Scale according to linear regression against SSE, for 8x8 blocks. */
if (activity_masking) {
sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
(qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
} else {
sum *= qindex >= 128
? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
: qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
: 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
}
return sum;
}
static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
int bsize_h, int qindex) {
assert(bsize_w >= 8 && bsize_h >= 8);
int activity_masking = 0;
int i, j;
DECLARE_ALIGNED(16, od_coeff, e[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]);
for (i = 0; i < bsize_h; i++) {
for (j = 0; j < bsize_w; j++) {
e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
}
}
int mid = OD_DIST_LP_MID;
for (i = 0; i < bsize_h; i++) {
tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
tmp[i * bsize_w + bsize_w - 1] =
mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
for (j = 1; j < bsize_w - 1; j++) {
tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
e[i * bsize_w + j + 1];
}
}
return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
qindex, tmp, e_lp);
}
static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
int bsize_h, int qindex) {
assert(bsize_w >= 8 && bsize_h >= 8);
int activity_masking = 0;
DECLARE_ALIGNED(16, uint16_t, y[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]);
int i, j;
for (i = 0; i < bsize_h; i++) {
for (j = 0; j < bsize_w; j++) {
y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
}
}
int mid = OD_DIST_LP_MID;
for (i = 0; i < bsize_h; i++) {
tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
tmp[i * bsize_w + bsize_w - 1] =
mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
for (j = 1; j < bsize_w - 1; j++) {
tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
e[i * bsize_w + j + 1];
}
}
return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
qindex, tmp, e_lp);
}
int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
const uint8_t *src, int src_stride, const uint8_t *dst,
int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
int bsh, int visible_w, int visible_h, int qindex) {
int64_t d = 0;
int i, j;
const MACROBLOCKD *xd = &x->e_mbd;
DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint16_t, rec[MAX_SB_SQUARE]);
assert(bsw >= 8);
assert(bsh >= 8);
assert((bsw & 0x07) == 0);
assert((bsh & 0x07) == 0);
if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
x->tune_metric == AOM_TUNE_DAALA_DIST) {
if (is_cur_buf_hbd(xd)) {
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++)
orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
if ((bsw == visible_w) && (bsh == visible_h)) {
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++)
rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
} else {
for (j = 0; j < visible_h; j++)
for (i = 0; i < visible_w; i++)
rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
if (visible_w < bsw) {
for (j = 0; j < bsh; j++)
for (i = visible_w; i < bsw; i++)
rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
}
if (visible_h < bsh) {
for (j = visible_h; j < bsh; j++)
for (i = 0; i < bsw; i++)
rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
}
}
} else {
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
if ((bsw == visible_w) && (bsh == visible_h)) {
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
} else {
for (j = 0; j < visible_h; j++)
for (i = 0; i < visible_w; i++)
rec[j * bsw + i] = dst[j * dst_stride + i];
if (visible_w < bsw) {
for (j = 0; j < bsh; j++)
for (i = visible_w; i < bsw; i++)
rec[j * bsw + i] = src[j * src_stride + i];
}
if (visible_h < bsh) {
for (j = visible_h; j < bsh; j++)
for (i = 0; i < bsw; i++)
rec[j * bsw + i] = src[j * src_stride + i];
}
}
}
}
if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
} else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
int coeff_shift = AOMMAX(xd->bd - 8, 0);
for (i = 0; i < bsh; i += 8) {
for (j = 0; j < bsw; j += 8) {
d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j],
bsw, coeff_shift);
}
}
if (is_cur_buf_hbd(xd)) d = ((uint64_t)d) >> 2 * coeff_shift;
} else {
// Otherwise, MSE by default
d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride,
tx_bsize, bsh, bsw, visible_h, visible_w);
}
return d;
}
static int64_t dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src,
int src_stride, const int16_t *diff,
int diff_stride, int bsw, int bsh, int visible_w,
int visible_h, int qindex) {
int64_t d = 0;
int i, j;
const MACROBLOCKD *xd = &x->e_mbd;
DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int16_t, diff16[MAX_SB_SQUARE]);
assert(bsw >= 8);
assert(bsh >= 8);
assert((bsw & 0x07) == 0);
assert((bsh & 0x07) == 0);
if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
x->tune_metric == AOM_TUNE_DAALA_DIST) {
if (is_cur_buf_hbd(xd)) {
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++)
orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
} else {
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
}
if ((bsw == visible_w) && (bsh == visible_h)) {
for (j = 0; j < bsh; j++)
for (i = 0; i < bsw; i++)
diff16[j * bsw + i] = diff[j * diff_stride + i];
} else {
for (j = 0; j < visible_h; j++)
for (i = 0; i < visible_w; i++)
diff16[j * bsw + i] = diff[j * diff_stride + i];
if (visible_w < bsw) {
for (j = 0; j < bsh; j++)
for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
}
if (visible_h < bsh) {
for (j = visible_h; j < bsh; j++)
for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
}
}
}
if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
} else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
int coeff_shift = AOMMAX(xd->bd - 8, 0);
DECLARE_ALIGNED(16, uint16_t, dst16[MAX_SB_SQUARE]);
for (i = 0; i < bsh; i++) {
for (j = 0; j < bsw; j++) {
dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j];
}
}
for (i = 0; i < bsh; i += 8) {
for (j = 0; j < bsw; j += 8) {
d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j],
bsw, coeff_shift);
}
}
// Don't scale 'd' for HBD since it will be done by caller side for diff
// input
} else {
// Otherwise, MSE by default
d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h);
}
return d;
}
#endif // CONFIG_DIST_8X8
// Similar to get_horver_correlation, but also takes into account first
// row/column, when computing horizontal/vertical correlation.
void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
int width, int height, float *hcorr,
float *vcorr) {
// The following notation is used:
// x - current pixel
// y - left neighbor pixel
// z - top neighbor pixel
int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
// First, process horizontal correlation on just the first row
x_sum += diff[0];
x2_sum += diff[0] * diff[0];
x_firstrow += diff[0];
x2_firstrow += diff[0] * diff[0];
for (int j = 1; j < width; ++j) {
const int16_t x = diff[j];
const int16_t y = diff[j - 1];
x_sum += x;
x_firstrow += x;
x2_sum += x * x;
x2_firstrow += x * x;
xy_sum += x * y;
}
// Process vertical correlation in the first column
x_firstcol += diff[0];
x2_firstcol += diff[0] * diff[0];
for (int i = 1; i < height; ++i) {
const int16_t x = diff[i * stride];
const int16_t z = diff[(i - 1) * stride];
x_sum += x;
x_firstcol += x;
x2_sum += x * x;
x2_firstcol += x * x;
xz_sum += x * z;
}
// Now process horiz and vert correlation through the rest unit
for (int i = 1; i < height; ++i) {
for (int j = 1; j < width; ++j) {
const int16_t x = diff[i * stride + j];
const int16_t y = diff[i * stride + j - 1];
const int16_t z = diff[(i - 1) * stride + j];
x_sum += x;
x2_sum += x * x;
xy_sum += x * y;
xz_sum += x * z;
}
}
for (int j = 0; j < width; ++j) {
x_finalrow += diff[(height - 1) * stride + j];
x2_finalrow +=
diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
}
for (int i = 0; i < height; ++i) {
x_finalcol += diff[i * stride + width - 1];
x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
}
int64_t xhor_sum = x_sum - x_finalcol;
int64_t xver_sum = x_sum - x_finalrow;
int64_t y_sum = x_sum - x_firstcol;
int64_t z_sum = x_sum - x_firstrow;
int64_t x2hor_sum = x2_sum - x2_finalcol;
int64_t x2ver_sum = x2_sum - x2_finalrow;
int64_t y2_sum = x2_sum - x2_firstcol;
int64_t z2_sum = x2_sum - x2_firstrow;
const float num_hor = (float)(height * (width - 1));
const float num_ver = (float)((height - 1) * width);
const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
if (xhor_var_n > 0 && y_var_n > 0) {
*hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
*hcorr = *hcorr < 0 ? 0 : *hcorr;
} else {
*hcorr = 1.0;
}
if (xver_var_n > 0 && z_var_n > 0) {
*vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
*vcorr = *vcorr < 0 ? 0 : *vcorr;
} else {
*vcorr = 1.0;
}
}
static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x) {
const AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
const MACROBLOCKD *xd = &x->e_mbd;
const MB_MODE_INFO *mbmi = xd->mi[0];
int64_t total_sse = 0;
for (int plane = 0; plane < num_planes; ++plane) {
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE bs = get_plane_block_size(mbmi->sb_type, pd->subsampling_x,
pd->subsampling_y);
unsigned int sse;
if (x->skip_chroma_rd && plane) continue;
cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
&sse);
total_sse += sse;
}
total_sse <<= 4;
return total_sse;
}
int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
intptr_t block_size, int64_t *ssz) {
int i;
int64_t error = 0, sqcoeff = 0;
for (i = 0; i < block_size; i++) {
const int diff = coeff[i] - dqcoeff[i];
error += diff * diff;
sqcoeff += coeff[i] * coeff[i];
}
*ssz = sqcoeff;
return error;
}
#if CONFIG_AV1_HIGHBITDEPTH
int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
const tran_low_t *dqcoeff, intptr_t block_size,
int64_t *ssz, int bd) {
int i;
int64_t error = 0, sqcoeff = 0;
int shift = 2 * (bd - 8);
int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i++) {
const int64_t diff = coeff[i] - dqcoeff[i];
error += diff * diff;
sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
}
assert(error >= 0 && sqcoeff >= 0);
error = (error + rounding) >> shift;
sqcoeff = (sqcoeff + rounding) >> shift;
*ssz = sqcoeff;
return error;
}
#endif
int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
int *val_count) {
const int max_pix_val = 1 << 8;
memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
for (int r = 0; r < rows; ++r) {
for (int c = 0; c < cols; ++c) {
const int this_val = src[r * stride + c];
assert(this_val < max_pix_val);
++val_count[this_val];
}
}
int n = 0;
for (int i = 0; i < max_pix_val; ++i) {
if (val_count[i]) ++n;
}
return n;
}
int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
int bit_depth, int *val_count) {
assert(bit_depth <= 12);
const int max_pix_val = 1 << bit_depth;
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
for (int r = 0; r < rows; ++r) {
for (int c = 0; c < cols; ++c) {
const int this_val = src[r * stride + c];
assert(this_val < max_pix_val);
if (this_val >= max_pix_val) return 0;
++val_count[this_val];
}
}
int n = 0;
for (int i = 0; i < max_pix_val; ++i) {
if (val_count[i]) ++n;
}
return n;
}
// Return the rate cost for luma prediction mode info. of intra blocks.
static int intra_mode_info_cost_y(const AV1_COMP *cpi, const MACROBLOCK *x,
const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
int mode_cost) {
int total_rate = mode_cost;
const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0;
const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra;
const int use_intrabc = mbmi->use_intrabc;
// Can only activate one mode.
assert(((mbmi->mode != DC_PRED) + use_palette + use_intrabc +
use_filter_intra) <= 1);
const int try_palette =
av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
if (try_palette && mbmi->mode == DC_PRED) {
const MACROBLOCKD *xd = &x->e_mbd;
const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
const int mode_ctx = av1_get_palette_mode_ctx(xd);
total_rate += x->palette_y_mode_cost[bsize_ctx][mode_ctx][use_palette];
if (use_palette) {
const uint8_t *const color_map = xd->plane[0].color_index_map;
int block_width, block_height, rows, cols;
av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
&cols);
const int plt_size = mbmi->palette_mode_info.palette_size[0];
int palette_mode_cost =
x->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
write_uniform_cost(plt_size, color_map[0]);
uint16_t color_cache[2 * PALETTE_MAX_SIZE];
const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
palette_mode_cost +=
av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache,
n_cache, cpi->common.seq_params.bit_depth);
palette_mode_cost +=
av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP);
total_rate += palette_mode_cost;
}
}
if (av1_filter_intra_allowed(&cpi->common, mbmi)) {
total_rate += x->filter_intra_cost[mbmi->sb_type][use_filter_intra];
if (use_filter_intra) {
total_rate += x->filter_intra_mode_cost[mbmi->filter_intra_mode_info
.filter_intra_mode];
}
}
if (av1_is_directional_mode(mbmi->mode)) {
if (av1_use_angle_delta(bsize)) {
total_rate += x->angle_delta_cost[mbmi->mode - V_PRED]
[MAX_ANGLE_DELTA +
mbmi->angle_delta[PLANE_TYPE_Y]];
}
}
if (av1_allow_intrabc(&cpi->common))
total_rate += x->intrabc_cost[use_intrabc];
return total_rate;
}
// Return the rate cost for chroma prediction mode info. of intra blocks.
static int intra_mode_info_cost_uv(const AV1_COMP *cpi, const MACROBLOCK *x,
const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
int mode_cost) {
int total_rate = mode_cost;
const int use_palette = mbmi->palette_mode_info.palette_size[1] > 0;
const UV_PREDICTION_MODE mode = mbmi->uv_mode;
// Can only activate one mode.
assert(((mode != UV_DC_PRED) + use_palette + mbmi->use_intrabc) <= 1);
const int try_palette =
av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
if (try_palette && mode == UV_DC_PRED) {
const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
total_rate +=
x->palette_uv_mode_cost[pmi->palette_size[0] > 0][use_palette];
if (use_palette) {
const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
const int plt_size = pmi->palette_size[1];
const MACROBLOCKD *xd = &x->e_mbd;
const uint8_t *const color_map = xd->plane[1].color_index_map;
int palette_mode_cost =
x->palette_uv_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
write_uniform_cost(plt_size, color_map[0]);
uint16_t color_cache[2 * PALETTE_MAX_SIZE];
const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
palette_mode_cost += av1_palette_color_cost_uv(
pmi, color_cache, n_cache, cpi->common.seq_params.bit_depth);
palette_mode_cost +=
av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP);
total_rate += palette_mode_cost;
}
}
if (av1_is_directional_mode(get_uv_mode(mode))) {
if (av1_use_angle_delta(bsize)) {
total_rate +=
x->angle_delta_cost[mode - V_PRED][mbmi->angle_delta[PLANE_TYPE_UV] +
MAX_ANGLE_DELTA];
}
}
return total_rate;
}
static int conditional_skipintra(PREDICTION_MODE mode,
PREDICTION_MODE best_intra_mode) {
if (mode == D113_PRED && best_intra_mode != V_PRED &&
best_intra_mode != D135_PRED)
return 1;
if (mode == D67_PRED && best_intra_mode != V_PRED &&
best_intra_mode != D45_PRED)
return 1;
if (mode == D203_PRED && best_intra_mode != H_PRED &&
best_intra_mode != D45_PRED)
return 1;
if (mode == D157_PRED && best_intra_mode != H_PRED &&
best_intra_mode != D135_PRED)
return 1;
return 0;
}
// Model based RD estimation for luma intra blocks.
static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
BLOCK_SIZE bsize, int mode_cost) {
const AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
assert(!is_inter_block(mbmi));
RD_STATS this_rd_stats;
int row, col;
int64_t temp_sse, this_rd;
TX_SIZE tx_size = tx_size_from_tx_mode(bsize, x->tx_mode_search_type);
const int stepr = tx_size_high_unit[tx_size];
const int stepc = tx_size_wide_unit[tx_size];
const int max_blocks_wide = max_block_wide(xd, bsize, 0);
const int max_blocks_high = max_block_high(xd, bsize, 0);
mbmi->tx_size = tx_size;
// Prediction.
for (row = 0; row < max_blocks_high; row += stepr) {
for (col = 0; col < max_blocks_wide; col += stepc) {
av1_predict_intra_block_facade(cm, xd, 0, col, row, tx_size);
}
}
// RD estimation.
model_rd_sb_fn[cpi->sf.rt_sf.use_simple_rd_model ? MODELRD_LEGACY
: MODELRD_TYPE_INTRA](
cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate, &this_rd_stats.dist,
&this_rd_stats.skip, &temp_sse, NULL, NULL, NULL);
if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
mode_cost +=
x->angle_delta_cost[mbmi->mode - V_PRED]
[MAX_ANGLE_DELTA + mbmi->angle_delta[PLANE_TYPE_Y]];
}
if (mbmi->mode == DC_PRED &&
av1_filter_intra_allowed_bsize(cm, mbmi->sb_type)) {
if (mbmi->filter_intra_mode_info.use_filter_intra) {
const int mode = mbmi->filter_intra_mode_info.filter_intra_mode;
mode_cost += x->filter_intra_cost[mbmi->sb_type][1] +
x->filter_intra_mode_cost[mode];
} else {
mode_cost += x->filter_intra_cost[mbmi->sb_type][0];
}
}
this_rd =
RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
return this_rd;
}
// Update the intra model yrd and prune the current mode if the new estimate
// y_rd > 1.5 * best_model_rd.
static AOM_INLINE int model_intra_yrd_and_prune(const AV1_COMP *const cpi,
MACROBLOCK *x, BLOCK_SIZE bsize,
int mode_info_cost,
int64_t *best_model_rd) {
const int64_t this_model_rd = intra_model_yrd(cpi, x, bsize, mode_info_cost);
if (*best_model_rd != INT64_MAX &&
this_model_rd > *best_model_rd + (*best_model_rd >> 1)) {
return 1;
} else if (this_model_rd < *best_model_rd) {
*best_model_rd = this_model_rd;
}
return 0;
}
// Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
// new_height'. Extra rows and columns are filled in by copying last valid
// row/column.
static AOM_INLINE void extend_palette_color_map(uint8_t *const color_map,
int orig_width, int orig_height,
int new_width, int new_height) {
int j;
assert(new_width >= orig_width);
assert(new_height >= orig_height);
if (new_width == orig_width && new_height == orig_height) return;
for (j = orig_height - 1; j >= 0; --j) {
memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
// Copy last column to extra columns.
memset(color_map + j * new_width + orig_width,
color_map[j * new_width + orig_width - 1], new_width - orig_width);
}
// Copy last row to extra rows.
for (j = orig_height; j < new_height; ++j) {
memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
new_width);
}
}
// Bias toward using colors in the cache.
// TODO(huisu): Try other schemes to improve compression.
static AOM_INLINE void optimize_palette_colors(uint16_t *color_cache,
int n_cache, int n_colors,
int stride, int *centroids) {
if (n_cache <= 0) return;
for (int i = 0; i < n_colors * stride; i += stride) {
int min_diff = abs(centroids[i] - (int)color_cache[0]);
int idx = 0;
for (int j = 1; j < n_cache; ++j) {
const int this_diff = abs(centroids[i] - color_cache[j]);
if (this_diff < min_diff) {
min_diff = this_diff;
idx = j;
}
}
if (min_diff <= 1) centroids[i] = color_cache[idx];
}
}
// Store best mode stats for winner mode processing
static void store_winner_mode_stats(const AV1_COMMON *const cm, MACROBLOCK *x,
MB_MODE_INFO *mbmi, RD_STATS *rd_cost,
RD_STATS *rd_cost_y, RD_STATS *rd_cost_uv,
THR_MODES mode_index, uint8_t *color_map,
BLOCK_SIZE bsize, int64_t this_rd,
int enable_multiwinner_mode_process,
int txfm_search_done) {
WinnerModeStats *winner_mode_stats = x->winner_mode_stats;
int mode_idx = 0;
int is_palette_mode = mbmi->palette_mode_info.palette_size[PLANE_TYPE_Y] > 0;
// Mode stat is not required when multiwinner mode processing is disabled
if (!enable_multiwinner_mode_process) return;
// Ignore mode with maximum rd
if (this_rd == INT64_MAX) return;
// TODO(any): Winner mode processing is currently not applicable for palette
// mode in Inter frames. Clean-up the following code, once support is added
if (!frame_is_intra_only(cm) && is_palette_mode) return;
const int max_winner_mode_count = frame_is_intra_only(cm)
? MAX_WINNER_MODE_COUNT_INTRA
: MAX_WINNER_MODE_COUNT_INTER;
assert(x->winner_mode_count >= 0 &&
x->winner_mode_count <= max_winner_mode_count);
if (x->winner_mode_count) {
// Find the mode which has higher rd cost than this_rd
for (mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++)
if (winner_mode_stats[mode_idx].rd > this_rd) break;
if (mode_idx == max_winner_mode_count) {
// No mode has higher rd cost than this_rd
return;
} else if (mode_idx < max_winner_mode_count - 1) {
// Create a slot for current mode and move others to the next slot
memmove(
&winner_mode_stats[mode_idx + 1], &winner_mode_stats[mode_idx],
(max_winner_mode_count - mode_idx - 1) * sizeof(*winner_mode_stats));
}
}
// Add a mode stat for winner mode processing
winner_mode_stats[mode_idx].mbmi = *mbmi;
winner_mode_stats[mode_idx].rd = this_rd;
winner_mode_stats[mode_idx].mode_index = mode_index;
// Update rd stats required for inter frame
if (!frame_is_intra_only(cm) && rd_cost && rd_cost_y && rd_cost_uv) {
const MACROBLOCKD *xd = &x->e_mbd;
const int skip_ctx = av1_get_skip_context(xd);
const int is_intra_mode = av1_mode_defs[mode_index].mode < INTRA_MODE_END;
const int skip = mbmi->skip && !is_intra_mode;
winner_mode_stats[mode_idx].rd_cost = *rd_cost;
if (txfm_search_done) {
winner_mode_stats[mode_idx].rate_y =
rd_cost_y->rate + x->skip_cost[skip_ctx][rd_cost->skip || skip];
winner_mode_stats[mode_idx].rate_uv = rd_cost_uv->rate;
}
}
if (color_map) {
// Store color_index_map for palette mode
const MACROBLOCKD *const xd = &x->e_mbd;
int block_width, block_height;
av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width,
&block_height, NULL, NULL);
memcpy(winner_mode_stats[mode_idx].color_index_map, color_map,
block_width * block_height * sizeof(color_map[0]));
}
x->winner_mode_count =
AOMMIN(x->winner_mode_count + 1, max_winner_mode_count);
}
// Given the base colors as specified in centroids[], calculate the RD cost
// of palette mode.
static AOM_INLINE void palette_rd_y(
const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
BLOCK_SIZE bsize, int dc_mode_cost, const int *data, int *centroids, int n,
uint16_t *color_cache, int n_cache, MB_MODE_INFO *best_mbmi,
uint8_t *best_palette_color_map, int64_t *best_rd, int64_t *best_model_rd,
int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *blk_skip,
uint8_t *tx_type_map, int *beat_best_pallette_rd) {
optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
int k = av1_remove_duplicates(centroids, n);
if (k < PALETTE_MIN_SIZE) {
// Too few unique colors to create a palette. And DC_PRED will work
// well for that case anyway. So skip.
return;
}
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
if (cpi->common.seq_params.use_highbitdepth)
for (int i = 0; i < k; ++i)
pmi->palette_colors[i] = clip_pixel_highbd(
(int)centroids[i], cpi->common.seq_params.bit_depth);
else
for (int i = 0; i < k; ++i)
pmi->palette_colors[i] = clip_pixel(centroids[i]);
pmi->palette_size[0] = k;
MACROBLOCKD *const xd = &x->e_mbd;
uint8_t *const color_map = xd->plane[0].color_index_map;
int block_width, block_height, rows, cols;
av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
&cols);
av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
extend_palette_color_map(color_map, cols, rows, block_width, block_height);
const int palette_mode_cost =
intra_mode_info_cost_y(cpi, x, mbmi, bsize, dc_mode_cost);
if (model_intra_yrd_and_prune(cpi, x, bsize, palette_mode_cost,
best_model_rd)) {
return;
}
RD_STATS tokenonly_rd_stats;
super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
if (tokenonly_rd_stats.rate == INT_MAX) return;
int this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
tokenonly_rd_stats.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
}
// Collect mode stats for multiwinner mode processing
const int txfm_search_done = 1;
store_winner_mode_stats(
&cpi->common, x, mbmi, NULL, NULL, NULL, THR_DC, color_map, bsize,
this_rd, cpi->sf.winner_mode_sf.enable_multiwinner_mode_process,
txfm_search_done);
if (this_rd < *best_rd) {
*best_rd = this_rd;
// Setting beat_best_rd flag because current mode rd is better than best_rd.
// This flag need to be updated only for palette evaluation in key frames
if (beat_best_rd) *beat_best_rd = 1;
memcpy(best_palette_color_map, color_map,
block_width * block_height * sizeof(color_map[0]));
*best_mbmi = *mbmi;
memcpy(blk_skip, x->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
av1_copy_array(tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
if (rate) *rate = this_rate;
if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
if (distortion) *distortion = tokenonly_rd_stats.dist;
if (skippable) *skippable = tokenonly_rd_stats.skip;
if (beat_best_pallette_rd) *beat_best_pallette_rd = 1;
}
}
static AOM_INLINE int perform_top_color_coarse_palette_search(
const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
BLOCK_SIZE bsize, int dc_mode_cost, const int *data,
const int *const top_colors, int start_n, int end_n, int step_size,
uint16_t *color_cache, int n_cache, MB_MODE_INFO *best_mbmi,
uint8_t *best_palette_color_map, int64_t *best_rd, int64_t *best_model_rd,
int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip,
uint8_t *tx_type_map) {
int centroids[PALETTE_MAX_SIZE];
int n = start_n;
int top_color_winner = end_n + 1;
while (1) {
int beat_best_pallette_rd = 0;
for (int i = 0; i < n; ++i) centroids[i] = top_colors[i];
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
color_cache, n_cache, best_mbmi, best_palette_color_map,
best_rd, best_model_rd, rate, rate_tokenonly, distortion,
skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
&beat_best_pallette_rd);
// Break if current palette colors is not winning
if (beat_best_pallette_rd) top_color_winner = n;
n += step_size;
if (n > end_n) break;
}
return top_color_winner;
}
static AOM_INLINE int perform_k_means_coarse_palette_search(
const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
BLOCK_SIZE bsize, int dc_mode_cost, const int *data, int lb, int ub,
int start_n, int end_n, int step_size, uint16_t *color_cache, int n_cache,
MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
int64_t *best_model_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
int *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
uint8_t *best_blk_skip, uint8_t *tx_type_map, uint8_t *color_map,
int data_points) {
int centroids[PALETTE_MAX_SIZE];
const int max_itr = 50;
int n = start_n;
int k_means_winner = end_n + 1;
while (1) {
int beat_best_pallette_rd = 0;
for (int i = 0; i < n; ++i) {
centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
}
av1_k_means(data, centroids, color_map, data_points, n, 1, max_itr);
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
color_cache, n_cache, best_mbmi, best_palette_color_map,
best_rd, best_model_rd, rate, rate_tokenonly, distortion,
skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
&beat_best_pallette_rd);
// Break if current palette colors is not winning
if (beat_best_pallette_rd) k_means_winner = n;
n += step_size;
if (n > end_n) break;
}
return k_means_winner;
}
// Perform palette search for top colors from minimum palette colors (/maximum)
// with a step-size of 1 (/-1)
static AOM_INLINE int perform_top_color_palette_search(
const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
BLOCK_SIZE bsize, int dc_mode_cost, const int *data, int *top_colors,
int start_n, int end_n, int step_size, uint16_t *color_cache, int n_cache,
MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
int64_t *best_model_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
int *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
uint8_t *best_blk_skip, uint8_t *tx_type_map) {
int centroids[PALETTE_MAX_SIZE];
int n = start_n;
assert((step_size == -1) || (step_size == 1) || (step_size == 0) ||
(step_size == 2));
assert(IMPLIES(step_size == -1, start_n > end_n));
assert(IMPLIES(step_size == 1, start_n < end_n));
while (1) {
int beat_best_pallette_rd = 0;
for (int i = 0; i < n; ++i) centroids[i] = top_colors[i];
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
color_cache, n_cache, best_mbmi, best_palette_color_map,
best_rd, best_model_rd, rate, rate_tokenonly, distortion,
skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
&beat_best_pallette_rd);
// Break if current palette colors is not winning
if ((cpi->sf.intra_sf.prune_palette_search_level == 2) &&
!beat_best_pallette_rd)
return n;
n += step_size;
if (n == end_n) break;
}
return n;
}
// Perform k-means based palette search from minimum palette colors (/maximum)
// with a step-size of 1 (/-1)
static AOM_INLINE int perform_k_means_palette_search(
const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
BLOCK_SIZE bsize, int dc_mode_cost, const int *data, int lb, int ub,
int start_n, int end_n, int step_size, uint16_t *color_cache, int n_cache,
MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
int64_t *best_model_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
int *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
uint8_t *best_blk_skip, uint8_t *tx_type_map, uint8_t *color_map,
int data_points) {
int centroids[PALETTE_MAX_SIZE];
const int max_itr = 50;
int n = start_n;
assert((step_size == -1) || (step_size == 1) || (step_size == 0) ||
(step_size == 2));
assert(IMPLIES(step_size == -1, start_n > end_n));
assert(IMPLIES(step_size == 1, start_n < end_n));
while (1) {
int beat_best_pallette_rd = 0;
for (int i = 0; i < n; ++i) {
centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
}
av1_k_means(data, centroids, color_map, data_points, n, 1, max_itr);
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
color_cache, n_cache, best_mbmi, best_palette_color_map,
best_rd, best_model_rd, rate, rate_tokenonly, distortion,
skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
&beat_best_pallette_rd);
// Break if current palette colors is not winning
if ((cpi->sf.intra_sf.prune_palette_search_level == 2) &&
!beat_best_pallette_rd)
return n;
n += step_size;
if (n == end_n) break;
}
return n;
}
#define START_N_STAGE2(x) \
((x == PALETTE_MIN_SIZE) ? PALETTE_MIN_SIZE + 1 \
: AOMMAX(x - 1, PALETTE_MIN_SIZE));
#define END_N_STAGE2(x, end_n) \
((x == end_n) ? x - 1 : AOMMIN(x + 1, PALETTE_MAX_SIZE));
static AOM_INLINE void update_start_end_stage_2(int *start_n_stage2,
int *end_n_stage2,
int *step_size_stage2,
int winner, int end_n) {
*start_n_stage2 = START_N_STAGE2(winner);
*end_n_stage2 = END_N_STAGE2(winner, end_n);
*step_size_stage2 = *end_n_stage2 - *start_n_stage2;
}
// Start index and step size below are chosen to evaluate unique
// candidates in neighbor search, in case a winner candidate is found in
// coarse search. Example,
// 1) 8 colors (end_n = 8): 2,3,4,5,6,7,8. start_n is chosen as 2 and step
// size is chosen as 3. Therefore, coarse search will evaluate 2, 5 and 8.
// If winner is found at 5, then 4 and 6 are evaluated. Similarly, for 2
// (3) and 8 (7).
// 2) 7 colors (end_n = 7): 2,3,4,5,6,7. If start_n is chosen as 2 (same
// as for 8 colors) then step size should also be 2, to cover all
// candidates. Coarse search will evaluate 2, 4 and 6. If winner is either
// 2 or 4, 3 will be evaluated. Instead, if start_n=3 and step_size=3,
// coarse search will evaluate 3 and 6. For the winner, unique neighbors
// (3: 2,4 or 6: 5,7) would be evaluated.
// start index for coarse palette search for dominant colors and k-means
static const uint8_t start_n_lookup_table[PALETTE_MAX_SIZE + 1] = { 0, 0, 0,
3, 3, 2,
3, 3, 2 };
// step size for coarse palette search for dominant colors and k-means
static const uint8_t step_size_lookup_table[PALETTE_MAX_SIZE + 1] = { 0, 0, 0,
3, 3, 3,
3, 3, 3 };
static void rd_pick_palette_intra_sby(
const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int dc_mode_cost, MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map,
int64_t *best_rd, int64_t *best_model_rd, int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable, int *beat_best_rd,
PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip, uint8_t *tx_type_map) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
assert(!is_inter_block(mbmi));
assert(av1_allow_palette(cpi->common.allow_screen_content_tools, bsize));
const int src_stride = x->plane[0].src.stride;
const uint8_t *const src = x->plane[0].src.buf;
int block_width, block_height, rows, cols;
av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
&cols);
const SequenceHeader *const seq_params = &cpi->common.seq_params;
const int is_hbd = seq_params->use_highbitdepth;
const int bit_depth = seq_params->bit_depth;
int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
int colors;
if (is_hbd) {
colors = av1_count_colors_highbd(src, src_stride, rows, cols, bit_depth,
count_buf);
} else {
colors = av1_count_colors(src, src_stride, rows, cols, count_buf);
}
uint8_t *const color_map = xd->plane[0].color_index_map;
if (colors > 1 && colors <= 64) {
int *const data = x->palette_buffer->kmeans_data_buf;
int centroids[PALETTE_MAX_SIZE];
int lb, ub;
if (is_hbd) {
int *data_pt = data;
const uint16_t *src_pt = CONVERT_TO_SHORTPTR(src);
lb = ub = src_pt[0];
for (int r = 0; r < rows; ++r) {
for (int c = 0; c < cols; ++c) {
const int val = src_pt[c];
data_pt[c] = val;
lb = AOMMIN(lb, val);
ub = AOMMAX(ub, val);
}
src_pt += src_stride;
data_pt += cols;
}
} else {
int *data_pt = data;
const uint8_t *src_pt = src;
lb = ub = src[0];
for (int r = 0; r < rows; ++r) {
for (int c = 0; c < cols; ++c) {
const int val = src_pt[c];
data_pt[c] = val;
lb = AOMMIN(lb, val);
ub = AOMMAX(ub, val);
}
src_pt += src_stride;
data_pt += cols;
}
}
mbmi->mode = DC_PRED;
mbmi->filter_intra_mode_info.use_filter_intra = 0;
uint16_t color_cache[2 * PALETTE_MAX_SIZE];
const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
// Find the dominant colors, stored in top_colors[].
int top_colors[PALETTE_MAX_SIZE] = { 0 };
for (int i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i) {
int max_count = 0;
for (int j = 0; j < (1 << bit_depth); ++j) {
if (count_buf[j] > max_count) {
max_count = count_buf[j];
top_colors[i] = j;
}
}
assert(max_count > 0);
count_buf[top_colors[i]] = 0;
}
// Try the dominant colors directly.
// TODO(huisu@google.com): Try to avoid duplicate computation in cases
// where the dominant colors and the k-means results are similar.
if ((cpi->sf.intra_sf.prune_palette_search_level == 1) &&
(colors > PALETTE_MIN_SIZE)) {
const int end_n = AOMMIN(colors, PALETTE_MAX_SIZE);
assert(PALETTE_MAX_SIZE == 8);
assert(PALETTE_MIN_SIZE == 2);
// Choose the start index and step size for coarse search based on number
// of colors
const int start_n = start_n_lookup_table[end_n];
const int step_size = step_size_lookup_table[end_n];
// Perform top color coarse palette search to find the winner candidate
const int top_color_winner = perform_top_color_coarse_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, start_n, end_n,
step_size, color_cache, n_cache, best_mbmi, best_palette_color_map,
best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable,
beat_best_rd, ctx, best_blk_skip, tx_type_map);
// Evaluate neighbors for the winner color (if winner is found) in the
// above coarse search for dominant colors
if (top_color_winner <= end_n) {
int start_n_stage2, end_n_stage2, step_size_stage2;
update_start_end_stage_2(&start_n_stage2, &end_n_stage2,
&step_size_stage2, top_color_winner, end_n);
// perform finer search for the winner candidate
perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, start_n_stage2,
end_n_stage2 + step_size_stage2, step_size_stage2, color_cache,
n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd,
rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
best_blk_skip, tx_type_map);
}
// K-means clustering.
// Perform k-means coarse palette search to find the winner candidate
const int k_means_winner = perform_k_means_coarse_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lb, ub, start_n, end_n,
step_size, color_cache, n_cache, best_mbmi, best_palette_color_map,
best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable,
beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map,
rows * cols);
// Evaluate neighbors for the winner color (if winner is found) in the
// above coarse search for k-means
if (k_means_winner <= end_n) {
int start_n_stage2, end_n_stage2, step_size_stage2;
update_start_end_stage_2(&start_n_stage2, &end_n_stage2,
&step_size_stage2, k_means_winner, end_n);
// perform finer search for the winner candidate
perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lb, ub, start_n_stage2,
end_n_stage2 + step_size_stage2, step_size_stage2, color_cache,
n_cache, best_mbmi, best_palette_color_map, best_rd, best_model_rd,
rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
best_blk_skip, tx_type_map, color_map, rows * cols);
}
} else {
const int start_n = AOMMIN(colors, PALETTE_MAX_SIZE),
end_n = PALETTE_MIN_SIZE;
// Perform top color palette search from start_n
const int top_color_winner = perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, start_n,
end_n - 1, -1, color_cache, n_cache, best_mbmi,
best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly,
distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
if (top_color_winner > end_n) {
// Perform top color palette search in reverse order for the remaining
// colors
perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, end_n,
top_color_winner, 1, color_cache, n_cache, best_mbmi,
best_palette_color_map, best_rd, best_model_rd, rate,
rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
best_blk_skip, tx_type_map);
}
// K-means clustering.
if (colors == PALETTE_MIN_SIZE) {
// Special case: These colors automatically become the centroids.
assert(colors == 2);
centroids[0] = lb;
centroids[1] = ub;
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, colors,
color_cache, n_cache, best_mbmi, best_palette_color_map,
best_rd, best_model_rd, rate, rate_tokenonly, distortion,
skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
NULL);
} else {
// Perform k-means palette search from start_n
const int k_means_winner = perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lb, ub, start_n, end_n - 1,
-1, color_cache, n_cache, best_mbmi, best_palette_color_map,
best_rd, best_model_rd, rate, rate_tokenonly, distortion, skippable,
beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map,
rows * cols);
if (k_means_winner > end_n) {
// Perform k-means palette search in reverse order for the remaining
// colors
perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lb, ub, end_n,
k_means_winner, 1, color_cache, n_cache, best_mbmi,
best_palette_color_map, best_rd, best_model_rd, rate,
rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
best_blk_skip, tx_type_map, color_map, rows * cols);
}
}
}
}
if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
memcpy(color_map, best_palette_color_map,
block_width * block_height * sizeof(best_palette_color_map[0]));
}
*mbmi = *best_mbmi;
}
// Return 1 if an filter intra mode is selected; return 0 otherwise.
static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, int mode_cost,
int64_t *best_rd, int64_t *best_model_rd,
PICK_MODE_CONTEXT *ctx) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = xd->mi[0];
int filter_intra_selected_flag = 0;
FILTER_INTRA_MODE mode;
TX_SIZE best_tx_size = TX_8X8;
FILTER_INTRA_MODE_INFO filter_intra_mode_info;
uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
(void)ctx;
av1_zero(filter_intra_mode_info);
mbmi->filter_intra_mode_info.use_filter_intra = 1;
mbmi->mode = DC_PRED;
mbmi->palette_mode_info.palette_size[0] = 0;
for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
int64_t this_rd;
RD_STATS tokenonly_rd_stats;
mbmi->filter_intra_mode_info.filter_intra_mode = mode;
if (model_intra_yrd_and_prune(cpi, x, bsize, mode_cost, best_model_rd)) {
continue;
}
super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
if (tokenonly_rd_stats.rate == INT_MAX) continue;
const int this_rate =
tokenonly_rd_stats.rate +
intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
// Collect mode stats for multiwinner mode processing
const int txfm_search_done = 1;
store_winner_mode_stats(
&cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd,
cpi->sf.winner_mode_sf.enable_multiwinner_mode_process,
txfm_search_done);
if (this_rd < *best_rd) {
*best_rd = this_rd;
best_tx_size = mbmi->tx_size;
filter_intra_mode_info = mbmi->filter_intra_mode_info;
av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
memcpy(ctx->blk_skip, x->blk_skip,
sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
*rate = this_rate;
*rate_tokenonly = tokenonly_rd_stats.rate;
*distortion = tokenonly_rd_stats.dist;
*skippable = tokenonly_rd_stats.skip;
filter_intra_selected_flag = 1;
}
}
if (filter_intra_selected_flag) {
mbmi->mode = DC_PRED;
mbmi->tx_size = best_tx_size;
mbmi->filter_intra_mode_info = filter_intra_mode_info;
av1_copy_array(ctx->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
return 1;
} else {
return 0;
}
}
// Run RD calculation with given luma intra prediction angle., and return
// the RD cost. Update the best mode info. if the RD cost is the best so far.
static int64_t calc_rd_given_intra_angle(
const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
int64_t *best_rd, int64_t *best_model_rd, uint8_t *best_tx_type_map,
uint8_t *best_blk_skip, int skip_model_rd) {
RD_STATS tokenonly_rd_stats;
int64_t this_rd;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = xd->mi[0];
const int n4 = bsize_to_num_blk(bsize);
assert(!is_inter_block(mbmi));
mbmi->angle_delta[PLANE_TYPE_Y] = angle_delta;
if (!skip_model_rd) {
if (model_intra_yrd_and_prune(cpi, x, bsize, mode_cost, best_model_rd)) {
return INT64_MAX;
}
}
super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
int this_rate =
mode_cost + tokenonly_rd_stats.rate +
x->angle_delta_cost[mbmi->mode - V_PRED][max_angle_delta + angle_delta];
this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < *best_rd) {
memcpy(best_blk_skip, x->blk_skip, sizeof(best_blk_skip[0]) * n4);
av1_copy_array(best_tx_type_map, xd->tx_type_map, n4);
*best_rd = this_rd;
*best_angle_delta = mbmi->angle_delta[PLANE_TYPE_Y];
*best_tx_size = mbmi->tx_size;
*rate = this_rate;
rd_stats->rate = tokenonly_rd_stats.rate;
rd_stats->dist = tokenonly_rd_stats.dist;
rd_stats->skip = tokenonly_rd_stats.skip;
}
return this_rd;
}
// With given luma directional intra prediction mode, pick the best angle delta
// Return the RD cost corresponding to the best angle delta.
static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
int *rate, RD_STATS *rd_stats,
BLOCK_SIZE bsize, int mode_cost,
int64_t best_rd, int64_t *best_model_rd,
int skip_model_rd_for_zero_deg) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = xd->mi[0];
assert(!is_inter_block(mbmi));
int best_angle_delta = 0;
int64_t rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
TX_SIZE best_tx_size = mbmi->tx_size;
uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
for (int i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
int first_try = 1;
for (int angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
for (int i = 0; i < 2; ++i) {
const int64_t best_rd_in =
(best_rd == INT64_MAX) ? INT64_MAX
: (best_rd + (best_rd >> (first_try ? 3 : 5)));
const int64_t this_rd = calc_rd_given_intra_angle(
cpi, x, bsize, mode_cost, best_rd_in, (1 - 2 * i) * angle_delta,
MAX_ANGLE_DELTA, rate, rd_stats, &best_angle_delta, &best_tx_size,
&best_rd, best_model_rd, best_tx_type_map, best_blk_skip,
(skip_model_rd_for_zero_deg & !angle_delta));
rd_cost[2 * angle_delta + i] = this_rd;
if (first_try && this_rd == INT64_MAX) return best_rd;
first_try = 0;
if (angle_delta == 0) {
rd_cost[1] = this_rd;
break;
}
}
}
assert(best_rd != INT64_MAX);
for (int angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
for (int i = 0; i < 2; ++i) {
int skip_search = 0;
const int64_t rd_thresh = best_rd + (best_rd >> 5);
if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
skip_search = 1;
if (!skip_search) {
calc_rd_given_intra_angle(
cpi, x, bsize, mode_cost, best_rd, (1 - 2 * i) * angle_delta,
MAX_ANGLE_DELTA, rate, rd_stats, &best_angle_delta, &best_tx_size,
&best_rd, best_model_rd, best_tx_type_map, best_blk_skip, 0);
}
}
}
if (rd_stats->rate != INT_MAX) {
mbmi->tx_size = best_tx_size;
mbmi->angle_delta[PLANE_TYPE_Y] = best_angle_delta;
const int n4 = bsize_to_num_blk(bsize);
memcpy(x->blk_skip, best_blk_skip, sizeof(best_blk_skip[0]) * n4);
av1_copy_array(xd->tx_type_map, best_tx_type_map, n4);
}
return best_rd;
}
// Given selected prediction mode, search for the best tx type and size.
static AOM_INLINE int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, const int *bmode_costs,
int64_t *best_rd, int *rate,
int *rate_tokenonly, int64_t *distortion,