blob: 78525e5bed245c4af3aa4cdfb6aaaa770dbc8fee [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <assert.h>
13#include <math.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_dsp_rtcd.h"
Jingning Han1aab8182016-06-03 11:09:06 -070016#include "./av1_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070017
Yaowu Xuf883b422016-08-30 14:01:10 -070018#include "aom_dsp/aom_dsp_common.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "aom_dsp/blend.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070020#include "aom_mem/aom_mem.h"
Angie Chiang4d55d762017-12-13 16:18:37 -080021#include "aom_ports/aom_timer.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070022#include "aom_ports/mem.h"
23#include "aom_ports/system_state.h"
24
David Michael Barr5b2021e2017-08-17 18:12:39 +090025#if CONFIG_CFL
26#include "av1/common/cfl.h"
27#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -070028#include "av1/common/common.h"
29#include "av1/common/common_data.h"
30#include "av1/common/entropy.h"
31#include "av1/common/entropymode.h"
32#include "av1/common/idct.h"
33#include "av1/common/mvref_common.h"
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010034#include "av1/common/obmc.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070035#include "av1/common/pred_common.h"
36#include "av1/common/quant_common.h"
37#include "av1/common/reconinter.h"
38#include "av1/common/reconintra.h"
39#include "av1/common/scan.h"
40#include "av1/common/seg_common.h"
Angie Chiang47e4b362017-03-24 11:25:10 -070041#if CONFIG_LV_MAP
42#include "av1/common/txb_common.h"
43#endif
Yue Chen69f18e12016-09-08 14:48:15 -070044#include "av1/common/warped_motion.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070045
Jingning Han1aab8182016-06-03 11:09:06 -070046#include "av1/encoder/aq_variance.h"
Tom Finegan17ce8b12017-02-08 12:46:31 -080047#include "av1/encoder/av1_quantize.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070048#include "av1/encoder/cost.h"
49#include "av1/encoder/encodemb.h"
50#include "av1/encoder/encodemv.h"
51#include "av1/encoder/encoder.h"
Angie Chiang47e4b362017-03-24 11:25:10 -070052#if CONFIG_LV_MAP
53#include "av1/encoder/encodetxb.h"
54#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -070055#include "av1/encoder/hybrid_fwd_txfm.h"
56#include "av1/encoder/mcomp.h"
57#include "av1/encoder/palette.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070058#include "av1/encoder/ratectrl.h"
59#include "av1/encoder/rd.h"
60#include "av1/encoder/rdopt.h"
Debargha Mukherjeeceebb702016-10-11 05:26:50 -070061#include "av1/encoder/tokenize.h"
Alexander Bokov0c7eb102017-09-07 18:49:00 -070062#include "av1/encoder/tx_prune_model_weights.h"
Yushin Chod0b77ac2017-10-20 17:33:16 -070063
Yaowu Xuc27fc142016-08-22 16:08:15 -070064#if CONFIG_DUAL_FILTER
Angie Chiang5678ad92016-11-21 09:38:40 -080065#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
Angie Chiangaadbb022017-06-01 16:08:03 -070066static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
67 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
68 { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
69};
Angie Chiang5678ad92016-11-21 09:38:40 -080070#endif // CONFIG_DUAL_FILTER
Yaowu Xuc27fc142016-08-22 16:08:15 -070071
Zoe Liue9b15e22017-07-19 15:53:01 -070072#define LAST_FRAME_MODE_MASK \
73 ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
74 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
75 (1 << ALTREF_FRAME))
76#define LAST2_FRAME_MODE_MASK \
77 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
78 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
79 (1 << ALTREF_FRAME))
80#define LAST3_FRAME_MODE_MASK \
81 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
82 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
83 (1 << ALTREF_FRAME))
84#define GOLDEN_FRAME_MODE_MASK \
85 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
86 (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
87 (1 << ALTREF_FRAME))
88#define BWDREF_FRAME_MODE_MASK \
89 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
90 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF2_FRAME) | \
91 (1 << ALTREF_FRAME))
92#define ALTREF2_FRAME_MODE_MASK \
93 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
94 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
95 (1 << ALTREF_FRAME))
96#define ALTREF_FRAME_MODE_MASK \
97 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
98 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
99 (1 << ALTREF2_FRAME))
100
Zoe Liuc082bbc2017-05-17 13:31:37 -0700101#if CONFIG_EXT_COMP_REFS
Zoe Liuac889702017-08-23 14:22:58 -0700102#define SECOND_REF_FRAME_MASK \
103 ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | \
104 (1 << GOLDEN_FRAME) | (1 << LAST2_FRAME) | 0x01)
Zoe Liu3ac20932017-08-30 16:35:55 -0700105#else // !CONFIG_EXT_COMP_REFS
Zoe Liue9b15e22017-07-19 15:53:01 -0700106#define SECOND_REF_FRAME_MASK \
107 ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | 0x01)
Zoe Liuc082bbc2017-05-17 13:31:37 -0700108#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -0700109
Yaowu Xuc27fc142016-08-22 16:08:15 -0700110#define NEW_MV_DISCOUNT_FACTOR 8
111
Yaowu Xuc27fc142016-08-22 16:08:15 -0700112#define ANGLE_SKIP_THRESH 10
Yaowu Xuc27fc142016-08-22 16:08:15 -0700113
James Zern67932792017-08-21 11:13:19 -0700114static const double ADST_FLIP_SVM[8] = {
115 /* vertical */
116 -6.6623, -2.8062, -3.2531, 3.1671,
117 /* horizontal */
118 -7.7051, -3.2234, -3.6193, 3.4533
119};
Yaowu Xuc27fc142016-08-22 16:08:15 -0700120
121typedef struct {
122 PREDICTION_MODE mode;
123 MV_REFERENCE_FRAME ref_frame[2];
124} MODE_DEFINITION;
125
126typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
127
128struct rdcost_block_args {
Yaowu Xuf883b422016-08-30 14:01:10 -0700129 const AV1_COMP *cpi;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700130 MACROBLOCK *x;
131 ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
132 ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
Angie Chiang7c2b7f22016-11-07 16:00:00 -0800133 RD_STATS rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700134 int64_t this_rd;
135 int64_t best_rd;
136 int exit_early;
137 int use_fast_coef_costing;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700138};
139
140#define LAST_NEW_MV_INDEX 6
Yaowu Xuf883b422016-08-30 14:01:10 -0700141static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
Emil Keyder01770b32017-01-20 18:03:11 -0500142 { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500143 { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
144 { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
145 { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700146 { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500147 { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
148 { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700149
Emil Keyder01770b32017-01-20 18:03:11 -0500150 { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700151
Emil Keyder01770b32017-01-20 18:03:11 -0500152 { NEWMV, { LAST_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500153 { NEWMV, { LAST2_FRAME, NONE_FRAME } },
154 { NEWMV, { LAST3_FRAME, NONE_FRAME } },
155 { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700156 { NEWMV, { ALTREF2_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500157 { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
158 { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700159
Emil Keyder01770b32017-01-20 18:03:11 -0500160 { NEARMV, { LAST_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500161 { NEARMV, { LAST2_FRAME, NONE_FRAME } },
162 { NEARMV, { LAST3_FRAME, NONE_FRAME } },
163 { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700164 { NEARMV, { ALTREF2_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500165 { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
166 { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700167
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700168 { GLOBALMV, { LAST_FRAME, NONE_FRAME } },
169 { GLOBALMV, { LAST2_FRAME, NONE_FRAME } },
170 { GLOBALMV, { LAST3_FRAME, NONE_FRAME } },
171 { GLOBALMV, { BWDREF_FRAME, NONE_FRAME } },
172 { GLOBALMV, { ALTREF2_FRAME, NONE_FRAME } },
173 { GLOBALMV, { GOLDEN_FRAME, NONE_FRAME } },
174 { GLOBALMV, { ALTREF_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700175
Sebastien Alaiwan34d55662017-11-15 09:36:03 +0100176 // TODO(zoeliu): May need to reconsider the order on the modes to check
Zoe Liu85b66462017-04-20 14:28:19 -0700177
Yaowu Xuc27fc142016-08-22 16:08:15 -0700178 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700179 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
180 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700181 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700182 { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
183 { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
184 { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
185 { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700186 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
187 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
188 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
189 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
Zoe Liuc082bbc2017-05-17 13:31:37 -0700190
191#if CONFIG_EXT_COMP_REFS
192 { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
Zoe Liufcf5fa22017-06-26 16:00:38 -0700193 { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
Zoe Liuc082bbc2017-05-17 13:31:37 -0700194 { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
195 { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
196#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -0700197
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700198 { PAETH_PRED, { INTRA_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700199
Emil Keyder01770b32017-01-20 18:03:11 -0500200 { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
Urvang Joshie6ca8e82017-03-15 14:57:41 -0700201 { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
202 { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
Urvang Joshi6be4a542016-11-03 15:24:05 -0700203
Yaowu Xuc27fc142016-08-22 16:08:15 -0700204 { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
205 { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
206 { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
207 { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
208 { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
209 { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700210 { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700211
Yaowu Xuc27fc142016-08-22 16:08:15 -0700212 { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
213 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
214 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
215 { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
216 { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
217 { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700218 { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700219
Yaowu Xuc27fc142016-08-22 16:08:15 -0700220 { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
221 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
222 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
223 { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
224 { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
225 { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700226 { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700227
Yaowu Xuc27fc142016-08-22 16:08:15 -0700228 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
229 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
230 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
231 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
232 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
233 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700234 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700235
Yaowu Xuc27fc142016-08-22 16:08:15 -0700236 { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
237 { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
238 { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
239 { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
240 { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
241 { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700242 { GLOBAL_GLOBALMV, { LAST_FRAME, BWDREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700243
Yaowu Xuc27fc142016-08-22 16:08:15 -0700244 { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
245 { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
246 { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
247 { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
248 { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
249 { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700250 { GLOBAL_GLOBALMV, { LAST2_FRAME, BWDREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700251
Yaowu Xuc27fc142016-08-22 16:08:15 -0700252 { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
253 { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
254 { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
255 { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
256 { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
257 { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700258 { GLOBAL_GLOBALMV, { LAST3_FRAME, BWDREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700259
Yaowu Xuc27fc142016-08-22 16:08:15 -0700260 { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
261 { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
262 { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
263 { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
264 { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
265 { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700266 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, BWDREF_FRAME } },
Zoe Liuc082bbc2017-05-17 13:31:37 -0700267
Zoe Liue9b15e22017-07-19 15:53:01 -0700268 { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
269 { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
270 { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
271 { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
272 { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
273 { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700274 { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700275
276 { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
277 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
278 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
279 { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
280 { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
281 { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700282 { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700283
284 { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
285 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
286 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
287 { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
288 { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
289 { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700290 { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700291
292 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
293 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
294 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
295 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
296 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
297 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700298 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700299
Emil Keyder01770b32017-01-20 18:03:11 -0500300 { H_PRED, { INTRA_FRAME, NONE_FRAME } },
301 { V_PRED, { INTRA_FRAME, NONE_FRAME } },
302 { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
303 { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
304 { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
305 { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
306 { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
307 { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700308
Debargha Mukherjeec1077e92017-11-06 20:17:33 -0800309#if CONFIG_EXT_COMP_REFS
310 { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
311 { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
312 { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
313 { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
314 { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
315 { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
316 { GLOBAL_GLOBALMV, { LAST_FRAME, LAST2_FRAME } },
317
318 { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
319 { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
320 { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
321 { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
322 { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
323 { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
324 { GLOBAL_GLOBALMV, { LAST_FRAME, LAST3_FRAME } },
325
326 { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
327 { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
328 { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
329 { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
330 { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
331 { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
332 { GLOBAL_GLOBALMV, { LAST_FRAME, GOLDEN_FRAME } },
333
334 { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
335 { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
336 { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
337 { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
338 { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
339 { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
340 { GLOBAL_GLOBALMV, { BWDREF_FRAME, ALTREF_FRAME } },
341#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -0700342};
343
hui su8a516a82017-07-06 10:00:36 -0700344static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700345 DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED,
Urvang Joshib7301cd2017-11-09 15:08:56 -0800346 SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D207_PRED, D153_PRED,
347 D63_PRED, D117_PRED, D45_PRED,
hui su8a516a82017-07-06 10:00:36 -0700348};
349
Luc Trudeaud6d9eee2017-07-12 12:36:50 -0400350#if CONFIG_CFL
351static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
Urvang Joshib7301cd2017-11-09 15:08:56 -0800352 UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED,
353 UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
354 UV_D135_PRED, UV_D207_PRED, UV_D153_PRED, UV_D63_PRED,
355 UV_D117_PRED, UV_D45_PRED,
Luc Trudeaud6d9eee2017-07-12 12:36:50 -0400356};
357#else
358#define uv_rd_search_mode_order intra_rd_search_mode_order
359#endif // CONFIG_CFL
360
Yaowu Xuc27fc142016-08-22 16:08:15 -0700361static INLINE int write_uniform_cost(int n, int v) {
hui su37499292017-04-26 09:49:53 -0700362 const int l = get_unsigned_bits(n);
363 const int m = (1 << l) - n;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700364 if (l == 0) return 0;
365 if (v < m)
Hui Su751a2332018-01-23 11:35:03 -0800366 return av1_cost_literal(l - 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700367 else
Hui Su751a2332018-01-23 11:35:03 -0800368 return av1_cost_literal(l);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700369}
370
371// constants for prune 1 and prune 2 decision boundaries
372#define FAST_EXT_TX_CORR_MID 0.0
373#define FAST_EXT_TX_EDST_MID 0.1
374#define FAST_EXT_TX_CORR_MARGIN 0.5
375#define FAST_EXT_TX_EDST_MARGIN 0.3
376
Debargha Mukherjee51666862017-10-24 14:29:13 -0700377int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
378 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast);
379int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
380 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast);
381
Yushin Cho2f025aa2017-09-28 17:39:21 -0700382static unsigned pixel_dist_visible_only(
383 const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src,
384 const int src_stride, const uint8_t *dst, const int dst_stride,
385 const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows,
386 int visible_cols) {
387 unsigned sse;
388
Debargha Mukherjee35a4db32017-11-14 11:58:16 -0800389 if (txb_rows == visible_rows && txb_cols == visible_cols) {
Yushin Cho2f025aa2017-09-28 17:39:21 -0700390 cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
391 return sse;
392 }
Yushin Cho2f025aa2017-09-28 17:39:21 -0700393 const MACROBLOCKD *xd = &x->e_mbd;
394
395 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
396 uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
397 visible_cols, visible_rows);
398 return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2);
399 }
Yushin Cho2f025aa2017-09-28 17:39:21 -0700400 sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols,
401 visible_rows);
402 return sse;
403}
404
Yushin Choe30a47c2017-08-15 13:08:30 -0700405#if CONFIG_DIST_8X8
Yushin Choc49177e2017-07-18 17:18:09 -0700406static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
407 int sstride, int coeff_shift) {
408 uint64_t svar = 0;
409 uint64_t dvar = 0;
410 uint64_t sum_s = 0;
411 uint64_t sum_d = 0;
412 uint64_t sum_s2 = 0;
413 uint64_t sum_d2 = 0;
414 uint64_t sum_sd = 0;
415 uint64_t dist = 0;
416
417 int i, j;
418 for (i = 0; i < 8; i++) {
419 for (j = 0; j < 8; j++) {
420 sum_s += src[i * sstride + j];
421 sum_d += dst[i * dstride + j];
422 sum_s2 += src[i * sstride + j] * src[i * sstride + j];
423 sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
424 sum_sd += src[i * sstride + j] * dst[i * dstride + j];
425 }
426 }
427 /* Compute the variance -- the calculation cannot go negative. */
428 svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
429 dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
430
431 // Tuning of jm's original dering distortion metric used in CDEF tool,
432 // suggested by jm
433 const uint64_t a = 4;
434 const uint64_t b = 2;
435 const uint64_t c1 = (400 * a << 2 * coeff_shift);
436 const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift);
437
438 dist =
439 (uint64_t)floor(.5 +
440 (sum_d2 + sum_s2 - 2 * sum_sd) * .5 * (svar + dvar + c1) /
441 (sqrt(svar * (double)dvar + c2)));
442
443 // Calibrate dist to have similar rate for the same QP with MSE only
444 // distortion (as in master branch)
445 dist = (uint64_t)((float)dist * 0.75);
446
447 return dist;
448}
Yushin Choc49177e2017-07-18 17:18:09 -0700449
Yushin Chob7b60c52017-07-14 16:18:52 -0700450static int od_compute_var_4x4(uint16_t *x, int stride) {
Yushin Cho7a428ba2017-01-12 16:28:49 -0800451 int sum;
452 int s2;
453 int i;
454 sum = 0;
455 s2 = 0;
456 for (i = 0; i < 4; i++) {
457 int j;
458 for (j = 0; j < 4; j++) {
459 int t;
460
461 t = x[i * stride + j];
462 sum += t;
463 s2 += t * t;
464 }
465 }
Yushin Chob7b60c52017-07-14 16:18:52 -0700466
Yushin Cho7a428ba2017-01-12 16:28:49 -0800467 return (s2 - (sum * sum >> 4)) >> 4;
468}
469
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500470/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
471 the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
472 is applied both horizontally and vertically. For X=5, the filter is
473 a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
474#define OD_DIST_LP_MID (5)
475#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
476
Yushin Chob7b60c52017-07-14 16:18:52 -0700477static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
478 uint16_t *y, od_coeff *e_lp, int stride) {
Yushin Cho7a428ba2017-01-12 16:28:49 -0800479 double sum;
480 int min_var;
481 double mean_var;
482 double var_stat;
483 double activity;
484 double calibration;
485 int i;
486 int j;
487 double vardist;
Yushin Cho7a428ba2017-01-12 16:28:49 -0800488
489 vardist = 0;
Yushin Chob7b60c52017-07-14 16:18:52 -0700490
Yushin Cho7a428ba2017-01-12 16:28:49 -0800491#if 1
492 min_var = INT_MAX;
493 mean_var = 0;
494 for (i = 0; i < 3; i++) {
495 for (j = 0; j < 3; j++) {
496 int varx;
497 int vary;
498 varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
499 vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
500 min_var = OD_MINI(min_var, varx);
501 mean_var += 1. / (1 + varx);
502 /* The cast to (double) is to avoid an overflow before the sqrt.*/
503 vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
504 }
505 }
506 /* We use a different variance statistic depending on whether activity
James Zern89a015b2017-08-08 12:39:00 -0400507 masking is used, since the harmonic mean appeared slightly worse with
Yushin Cho7a428ba2017-01-12 16:28:49 -0800508 masking off. The calibration constant just ensures that we preserve the
509 rate compared to activity=1. */
510 if (use_activity_masking) {
511 calibration = 1.95;
512 var_stat = 9. / mean_var;
513 } else {
514 calibration = 1.62;
515 var_stat = min_var;
516 }
517 /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
518 activity masking constant. */
519 activity = calibration * pow(.25 + var_stat, -1. / 6);
520#else
521 activity = 1;
Fergus Simpson4063a682017-02-28 16:52:22 -0800522#endif // 1
Yushin Cho7a428ba2017-01-12 16:28:49 -0800523 sum = 0;
524 for (i = 0; i < 8; i++) {
525 for (j = 0; j < 8; j++)
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500526 sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
Yushin Cho7a428ba2017-01-12 16:28:49 -0800527 }
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500528 /* Normalize the filter to unit DC response. */
529 sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
530 OD_DIST_LP_NORM);
Yushin Cho7a428ba2017-01-12 16:28:49 -0800531 return activity * activity * (sum + vardist);
532}
533
534// Note : Inputs x and y are in a pixel domain
Yushin Chob7b60c52017-07-14 16:18:52 -0700535static double od_compute_dist_common(int activity_masking, uint16_t *x,
536 uint16_t *y, int bsize_w, int bsize_h,
Yushin Cho75b01002017-06-21 13:43:57 -0700537 int qindex, od_coeff *tmp,
538 od_coeff *e_lp) {
539 int i, j;
540 double sum = 0;
541 const int mid = OD_DIST_LP_MID;
542
543 for (j = 0; j < bsize_w; j++) {
544 e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
545 e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] +
546 2 * tmp[(bsize_h - 2) * bsize_w + j];
547 }
548 for (i = 1; i < bsize_h - 1; i++) {
549 for (j = 0; j < bsize_w; j++) {
550 e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
551 tmp[(i - 1) * bsize_w + j] +
552 tmp[(i + 1) * bsize_w + j];
553 }
554 }
555 for (i = 0; i < bsize_h; i += 8) {
556 for (j = 0; j < bsize_w; j += 8) {
Yushin Chob7b60c52017-07-14 16:18:52 -0700557 sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j],
Yushin Cho75b01002017-06-21 13:43:57 -0700558 &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
559 bsize_w);
560 }
561 }
562 /* Scale according to linear regression against SSE, for 8x8 blocks. */
563 if (activity_masking) {
564 sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
565 (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
566 } else {
567 sum *= qindex >= 128
568 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
569 : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
570 : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
571 }
572
573 return sum;
574}
575
Yushin Chob7b60c52017-07-14 16:18:52 -0700576static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
577 int bsize_h, int qindex) {
Yushin Cho7a428ba2017-01-12 16:28:49 -0800578 assert(bsize_w >= 8 && bsize_h >= 8);
Yushin Chod0b77ac2017-10-20 17:33:16 -0700579
Yushin Chob7b60c52017-07-14 16:18:52 -0700580 int activity_masking = 0;
Yushin Chod0b77ac2017-10-20 17:33:16 -0700581
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400582 int i, j;
583 DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
584 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
585 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
586 for (i = 0; i < bsize_h; i++) {
587 for (j = 0; j < bsize_w; j++) {
588 e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500589 }
Yushin Cho75b01002017-06-21 13:43:57 -0700590 }
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400591 int mid = OD_DIST_LP_MID;
592 for (i = 0; i < bsize_h; i++) {
593 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
594 tmp[i * bsize_w + bsize_w - 1] =
595 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
596 for (j = 1; j < bsize_w - 1; j++) {
597 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
598 e[i * bsize_w + j + 1];
599 }
600 }
601 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
602 qindex, tmp, e_lp);
Yushin Cho75b01002017-06-21 13:43:57 -0700603}
604
Yushin Chob7b60c52017-07-14 16:18:52 -0700605static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
606 int bsize_h, int qindex) {
Yushin Cho75b01002017-06-21 13:43:57 -0700607 assert(bsize_w >= 8 && bsize_h >= 8);
Yushin Chod0b77ac2017-10-20 17:33:16 -0700608
Yushin Chob7b60c52017-07-14 16:18:52 -0700609 int activity_masking = 0;
Yushin Chod0b77ac2017-10-20 17:33:16 -0700610
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400611 DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]);
612 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
613 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
614 int i, j;
615 for (i = 0; i < bsize_h; i++) {
616 for (j = 0; j < bsize_w; j++) {
617 y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500618 }
Yushin Cho7a428ba2017-01-12 16:28:49 -0800619 }
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400620 int mid = OD_DIST_LP_MID;
621 for (i = 0; i < bsize_h; i++) {
622 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
623 tmp[i * bsize_w + bsize_w - 1] =
624 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
625 for (j = 1; j < bsize_w - 1; j++) {
626 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
627 e[i * bsize_w + j + 1];
628 }
629 }
630 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
631 qindex, tmp, e_lp);
Yushin Cho7a428ba2017-01-12 16:28:49 -0800632}
633
Yushin Choe30a47c2017-08-15 13:08:30 -0700634int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
Yushin Chob7b60c52017-07-14 16:18:52 -0700635 const uint8_t *src, int src_stride, const uint8_t *dst,
636 int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
637 int bsh, int visible_w, int visible_h, int qindex) {
638 int64_t d = 0;
Yushin Cho7a428ba2017-01-12 16:28:49 -0800639 int i, j;
Yushin Choe30a47c2017-08-15 13:08:30 -0700640 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -0700641
642 DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
643 DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]);
Yushin Chob7b60c52017-07-14 16:18:52 -0700644
Yushin Choee810272017-09-13 17:30:25 -0700645 assert(bsw >= 8);
646 assert(bsh >= 8);
647 assert((bsw & 0x07) == 0);
648 assert((bsh & 0x07) == 0);
649
Yushin Choe30a47c2017-08-15 13:08:30 -0700650 if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
651 x->tune_metric == AOM_TUNE_DAALA_DIST) {
Yushin Choe30a47c2017-08-15 13:08:30 -0700652 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Yushin Cho75b01002017-06-21 13:43:57 -0700653 for (j = 0; j < bsh; j++)
Yushin Cho8ab875d2017-06-23 14:47:21 -0700654 for (i = 0; i < bsw; i++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700655 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
Yushin Cho75b01002017-06-21 13:43:57 -0700656
Yushin Choe30a47c2017-08-15 13:08:30 -0700657 if ((bsw == visible_w) && (bsh == visible_h)) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700658 for (j = 0; j < bsh; j++)
Yushin Cho8ab875d2017-06-23 14:47:21 -0700659 for (i = 0; i < bsw; i++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700660 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
661 } else {
662 for (j = 0; j < visible_h; j++)
663 for (i = 0; i < visible_w; i++)
664 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700665
Yushin Choe30a47c2017-08-15 13:08:30 -0700666 if (visible_w < bsw) {
667 for (j = 0; j < bsh; j++)
668 for (i = visible_w; i < bsw; i++)
669 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
670 }
671
672 if (visible_h < bsh) {
673 for (j = visible_h; j < bsh; j++)
674 for (i = 0; i < bsw; i++)
675 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
676 }
677 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700678 } else {
Yushin Choe30a47c2017-08-15 13:08:30 -0700679 for (j = 0; j < bsh; j++)
680 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700681
Yushin Choe30a47c2017-08-15 13:08:30 -0700682 if ((bsw == visible_w) && (bsh == visible_h)) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700683 for (j = 0; j < bsh; j++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700684 for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
685 } else {
686 for (j = 0; j < visible_h; j++)
687 for (i = 0; i < visible_w; i++)
688 rec[j * bsw + i] = dst[j * dst_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700689
Yushin Choe30a47c2017-08-15 13:08:30 -0700690 if (visible_w < bsw) {
691 for (j = 0; j < bsh; j++)
692 for (i = visible_w; i < bsw; i++)
693 rec[j * bsw + i] = src[j * src_stride + i];
694 }
695
696 if (visible_h < bsh) {
697 for (j = visible_h; j < bsh; j++)
698 for (i = 0; i < bsw; i++)
699 rec[j * bsw + i] = src[j * src_stride + i];
700 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700701 }
Yushin Choe30a47c2017-08-15 13:08:30 -0700702 }
Yushin Choe30a47c2017-08-15 13:08:30 -0700703 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700704
Yushin Choe30a47c2017-08-15 13:08:30 -0700705 if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
706 d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
707 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
Yushin Choc49177e2017-07-18 17:18:09 -0700708 int coeff_shift = AOMMAX(xd->bd - 8, 0);
709
710 for (i = 0; i < bsh; i += 8) {
711 for (j = 0; j < bsw; j += 8) {
712 d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j],
713 bsw, coeff_shift);
714 }
715 }
Yushin Choc49177e2017-07-18 17:18:09 -0700716 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
717 d = ((uint64_t)d) >> 2 * coeff_shift;
Yushin Choe30a47c2017-08-15 13:08:30 -0700718 } else {
719 // Otherwise, MSE by default
Yushin Cho2f025aa2017-09-28 17:39:21 -0700720 d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride,
721 tx_bsize, bsh, bsw, visible_h, visible_w);
Yushin Choc49177e2017-07-18 17:18:09 -0700722 }
Yushin Chob7b60c52017-07-14 16:18:52 -0700723
Yushin Cho7a428ba2017-01-12 16:28:49 -0800724 return d;
725}
Yushin Cho75b01002017-06-21 13:43:57 -0700726
Cheng Chenf9cf5b62018-01-12 16:05:15 -0800727static int64_t dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src,
728 int src_stride, const int16_t *diff,
729 int diff_stride, int bsw, int bsh, int visible_w,
730 int visible_h, int qindex) {
Yushin Chob7b60c52017-07-14 16:18:52 -0700731 int64_t d = 0;
Yushin Cho75b01002017-06-21 13:43:57 -0700732 int i, j;
Yushin Choe30a47c2017-08-15 13:08:30 -0700733 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -0700734
735 DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
736 DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]);
Yushin Chob7b60c52017-07-14 16:18:52 -0700737
Yushin Choee810272017-09-13 17:30:25 -0700738 assert(bsw >= 8);
739 assert(bsh >= 8);
740 assert((bsw & 0x07) == 0);
741 assert((bsh & 0x07) == 0);
742
Yushin Choe30a47c2017-08-15 13:08:30 -0700743 if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
744 x->tune_metric == AOM_TUNE_DAALA_DIST) {
Yushin Choe30a47c2017-08-15 13:08:30 -0700745 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
746 for (j = 0; j < bsh; j++)
747 for (i = 0; i < bsw; i++)
748 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
749 } else {
Yushin Choe30a47c2017-08-15 13:08:30 -0700750 for (j = 0; j < bsh; j++)
751 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
Yushin Choe30a47c2017-08-15 13:08:30 -0700752 }
Yushin Cho75b01002017-06-21 13:43:57 -0700753
Yushin Choe30a47c2017-08-15 13:08:30 -0700754 if ((bsw == visible_w) && (bsh == visible_h)) {
Yushin Cho75b01002017-06-21 13:43:57 -0700755 for (j = 0; j < bsh; j++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700756 for (i = 0; i < bsw; i++)
757 diff16[j * bsw + i] = diff[j * diff_stride + i];
758 } else {
759 for (j = 0; j < visible_h; j++)
760 for (i = 0; i < visible_w; i++)
761 diff16[j * bsw + i] = diff[j * diff_stride + i];
Yushin Cho75b01002017-06-21 13:43:57 -0700762
Yushin Choe30a47c2017-08-15 13:08:30 -0700763 if (visible_w < bsw) {
764 for (j = 0; j < bsh; j++)
765 for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
766 }
767
768 if (visible_h < bsh) {
769 for (j = visible_h; j < bsh; j++)
770 for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
771 }
Yushin Cho75b01002017-06-21 13:43:57 -0700772 }
773 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700774
Yushin Choe30a47c2017-08-15 13:08:30 -0700775 if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
776 d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
777 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
Yushin Choc49177e2017-07-18 17:18:09 -0700778 int coeff_shift = AOMMAX(xd->bd - 8, 0);
779 DECLARE_ALIGNED(16, uint16_t, dst16[MAX_TX_SQUARE]);
780
781 for (i = 0; i < bsh; i++) {
782 for (j = 0; j < bsw; j++) {
783 dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j];
784 }
785 }
786
787 for (i = 0; i < bsh; i += 8) {
788 for (j = 0; j < bsw; j += 8) {
789 d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j],
790 bsw, coeff_shift);
791 }
792 }
793 // Don't scale 'd' for HBD since it will be done by caller side for diff
794 // input
Yushin Choe30a47c2017-08-15 13:08:30 -0700795 } else {
796 // Otherwise, MSE by default
Yushin Cho2f025aa2017-09-28 17:39:21 -0700797 d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h);
Yushin Choc49177e2017-07-18 17:18:09 -0700798 }
Yushin Cho75b01002017-06-21 13:43:57 -0700799
800 return d;
801}
Yushin Chob7b60c52017-07-14 16:18:52 -0700802#endif // CONFIG_DIST_8X8
Yushin Cho7a428ba2017-01-12 16:28:49 -0800803
Yaowu Xuf883b422016-08-30 14:01:10 -0700804static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse9f217762017-04-20 15:34:54 -0700805 const uint8_t *src, int src_stride,
806 const uint8_t *dst, int dst_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700807 double *hordist, double *verdist) {
Alex Converse9f217762017-04-20 15:34:54 -0700808 const int bw = block_size_wide[bsize];
809 const int bh = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700810 unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -0700811
812 const int f_index = bsize - BLOCK_16X16;
813 if (f_index < 0) {
Alex Converse9f217762017-04-20 15:34:54 -0700814 const int w_shift = bw == 8 ? 1 : 2;
815 const int h_shift = bh == 8 ? 1 : 2;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700816 if (cpi->common.use_highbitdepth) {
Alex Converse9f217762017-04-20 15:34:54 -0700817 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
818 const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
819 for (int i = 0; i < bh; ++i)
820 for (int j = 0; j < bw; ++j) {
821 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700822 esq[index] +=
823 (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
824 (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
825 }
826 } else {
Alex Converse9f217762017-04-20 15:34:54 -0700827 for (int i = 0; i < bh; ++i)
828 for (int j = 0; j < bw; ++j) {
829 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700830 esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
831 (src[j + i * src_stride] - dst[j + i * dst_stride]);
832 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700833 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700834 } else {
Alex Converse9f217762017-04-20 15:34:54 -0700835 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
836 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
837 &esq[1]);
838 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
839 &esq[2]);
840 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
841 dst_stride, &esq[3]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700842 src += bh / 4 * src_stride;
843 dst += bh / 4 * dst_stride;
844
Alex Converse9f217762017-04-20 15:34:54 -0700845 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
846 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
847 &esq[5]);
848 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
849 &esq[6]);
850 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
851 dst_stride, &esq[7]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700852 src += bh / 4 * src_stride;
853 dst += bh / 4 * dst_stride;
854
Alex Converse9f217762017-04-20 15:34:54 -0700855 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
856 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
857 &esq[9]);
858 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
859 &esq[10]);
860 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
861 dst_stride, &esq[11]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700862 src += bh / 4 * src_stride;
863 dst += bh / 4 * dst_stride;
864
Alex Converse9f217762017-04-20 15:34:54 -0700865 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
866 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
867 &esq[13]);
868 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
869 &esq[14]);
870 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
871 dst_stride, &esq[15]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700872 }
873
Alex Converse9f217762017-04-20 15:34:54 -0700874 double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
875 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
876 esq[12] + esq[13] + esq[14] + esq[15];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700877 if (total > 0) {
878 const double e_recip = 1.0 / total;
Alex Converse9f217762017-04-20 15:34:54 -0700879 hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
880 hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
881 hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
882 verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
883 verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
884 verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700885 } else {
886 hordist[0] = verdist[0] = 0.25;
887 hordist[1] = verdist[1] = 0.25;
888 hordist[2] = verdist[2] = 0.25;
889 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700890}
891
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -0700892static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
893 const uint8_t *src, int src_stride,
894 const uint8_t *dst, int dst_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700895 int prune_bitmask = 0;
896 double svm_proj_h = 0, svm_proj_v = 0;
Alex Converse89912f92017-04-21 13:28:50 -0700897 double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -0700898 get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
899 hdist, vdist);
900
901 svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
902 vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
903 svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
904 hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
905 if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
906 prune_bitmask |= 1 << FLIPADST_1D;
907 else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
908 prune_bitmask |= 1 << ADST_1D;
909
910 if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
911 prune_bitmask |= 1 << (FLIPADST_1D + 8);
912 else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
913 prune_bitmask |= 1 << (ADST_1D + 8);
914
915 return prune_bitmask;
916}
917
Alex Converse89912f92017-04-21 13:28:50 -0700918static void get_horver_correlation(const int16_t *diff, int stride, int w,
919 int h, double *hcorr, double *vcorr) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700920 // Returns hor/ver correlation coefficient
921 const int num = (h - 1) * (w - 1);
922 double num_r;
923 int i, j;
924 int64_t xy_sum = 0, xz_sum = 0;
925 int64_t x_sum = 0, y_sum = 0, z_sum = 0;
926 int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
927 double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
928 *hcorr = *vcorr = 1;
929
930 assert(num > 0);
931 num_r = 1.0 / num;
932 for (i = 1; i < h; ++i) {
933 for (j = 1; j < w; ++j) {
934 const int16_t x = diff[i * stride + j];
935 const int16_t y = diff[i * stride + j - 1];
936 const int16_t z = diff[(i - 1) * stride + j];
937 xy_sum += x * y;
938 xz_sum += x * z;
939 x_sum += x;
940 y_sum += y;
941 z_sum += z;
942 x2_sum += x * x;
943 y2_sum += y * y;
944 z2_sum += z * z;
945 }
946 }
947 x_var_n = x2_sum - (x_sum * x_sum) * num_r;
948 y_var_n = y2_sum - (y_sum * y_sum) * num_r;
949 z_var_n = z2_sum - (z_sum * z_sum) * num_r;
950 xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
951 xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
952 if (x_var_n > 0 && y_var_n > 0) {
953 *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
954 *hcorr = *hcorr < 0 ? 0 : *hcorr;
955 }
956 if (x_var_n > 0 && z_var_n > 0) {
957 *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
958 *vcorr = *vcorr < 0 ? 0 : *vcorr;
959 }
960}
961
Cheng Chenf9cf5b62018-01-12 16:05:15 -0800962static int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
Alex Converse89912f92017-04-21 13:28:50 -0700963 double hcorr, vcorr;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700964 int prune_bitmask = 0;
Alex Converse89912f92017-04-21 13:28:50 -0700965 get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700966
Alex Converse89912f92017-04-21 13:28:50 -0700967 if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700968 prune_bitmask |= 1 << IDTX_1D;
Alex Converse89912f92017-04-21 13:28:50 -0700969 else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700970 prune_bitmask |= 1 << DCT_1D;
971
Alex Converse89912f92017-04-21 13:28:50 -0700972 if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700973 prune_bitmask |= 1 << (IDTX_1D + 8);
Alex Converse89912f92017-04-21 13:28:50 -0700974 else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -0700975 prune_bitmask |= 1 << (DCT_1D + 8);
976 return prune_bitmask;
977}
978
979// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xuf883b422016-08-30 14:01:10 -0700980static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse89912f92017-04-21 13:28:50 -0700981 MACROBLOCK *x, const MACROBLOCKD *xd,
982 int adst_flipadst, int dct_idtx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700983 int prune = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700984
Alex Converse89912f92017-04-21 13:28:50 -0700985 if (adst_flipadst) {
986 const struct macroblock_plane *const p = &x->plane[0];
987 const struct macroblockd_plane *const pd = &xd->plane[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700988 prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
Alex Converse89912f92017-04-21 13:28:50 -0700989 pd->dst.buf, pd->dst.stride);
990 }
991 if (dct_idtx) {
992 av1_subtract_plane(x, bsize, 0);
993 const struct macroblock_plane *const p = &x->plane[0];
994 const int bw = 4 << (b_width_log2_lookup[bsize]);
995 const int bh = 4 << (b_height_log2_lookup[bsize]);
996 prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
997 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700998
999 return prune;
1000}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001001
1002// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xuf883b422016-08-30 14:01:10 -07001003static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse89912f92017-04-21 13:28:50 -07001004 const MACROBLOCK *x, const MACROBLOCKD *xd) {
1005 const struct macroblock_plane *const p = &x->plane[0];
1006 const struct macroblockd_plane *const pd = &xd->plane[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001007 return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
Alex Converse89912f92017-04-21 13:28:50 -07001008 pd->dst.stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001009}
1010
Hui Su032ab8b2017-09-19 14:53:40 -07001011// 1D Transforms used in inter set, this needs to be changed if
1012// ext_tx_used_inter is changed
1013static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
1014 { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 },
Hui Su032ab8b2017-09-19 14:53:40 -07001015};
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001016
1017static void get_energy_distribution_finer(const int16_t *diff, int stride,
1018 int bw, int bh, float *hordist,
1019 float *verdist) {
1020 // First compute downscaled block energy values (esq); downscale factors
1021 // are defined by w_shift and h_shift.
1022 unsigned int esq[256];
1023 const int w_shift = bw <= 8 ? 0 : 1;
1024 const int h_shift = bh <= 8 ? 0 : 1;
1025 const int esq_w = bw <= 8 ? bw : bw / 2;
1026 const int esq_h = bh <= 8 ? bh : bh / 2;
1027 const int esq_sz = esq_w * esq_h;
1028 int i, j;
1029 memset(esq, 0, esq_sz * sizeof(esq[0]));
1030 for (i = 0; i < bh; i++) {
1031 unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w;
1032 const int16_t *cur_diff_row = diff + i * stride;
1033 for (j = 0; j < bw; j++) {
1034 cur_esq_row[j >> w_shift] += cur_diff_row[j] * cur_diff_row[j];
1035 }
1036 }
1037
1038 uint64_t total = 0;
1039 for (i = 0; i < esq_sz; i++) total += esq[i];
1040
1041 // Output hordist and verdist arrays are normalized 1D projections of esq
1042 if (total == 0) {
1043 float hor_val = 1.0f / esq_w;
1044 for (j = 0; j < esq_w - 1; j++) hordist[j] = hor_val;
1045 float ver_val = 1.0f / esq_h;
1046 for (i = 0; i < esq_h - 1; i++) verdist[i] = ver_val;
1047 return;
1048 }
1049
1050 const float e_recip = 1.0f / (float)total;
1051 memset(hordist, 0, (esq_w - 1) * sizeof(hordist[0]));
1052 memset(verdist, 0, (esq_h - 1) * sizeof(verdist[0]));
1053 const unsigned int *cur_esq_row;
1054 for (i = 0; i < esq_h - 1; i++) {
1055 cur_esq_row = esq + i * esq_w;
1056 for (j = 0; j < esq_w - 1; j++) {
1057 hordist[j] += (float)cur_esq_row[j];
1058 verdist[i] += (float)cur_esq_row[j];
1059 }
1060 verdist[i] += (float)cur_esq_row[j];
1061 }
1062 cur_esq_row = esq + i * esq_w;
1063 for (j = 0; j < esq_w - 1; j++) hordist[j] += (float)cur_esq_row[j];
1064
1065 for (j = 0; j < esq_w - 1; j++) hordist[j] *= e_recip;
1066 for (i = 0; i < esq_h - 1; i++) verdist[i] *= e_recip;
1067}
1068
Alexander Bokov79a37242017-09-29 11:25:55 -07001069// Instead of 1D projections of the block energy distribution computed by
1070// get_energy_distribution_finer() this function computes a full
1071// two-dimensional energy distribution of the input block.
1072static void get_2D_energy_distribution(const int16_t *diff, int stride, int bw,
1073 int bh, float *edist) {
1074 unsigned int esq[256] = { 0 };
1075 const int esq_w = bw >> 2;
1076 const int esq_h = bh >> 2;
1077 const int esq_sz = esq_w * esq_h;
1078 uint64_t total = 0;
1079 for (int i = 0; i < bh; i += 4) {
1080 for (int j = 0; j < bw; j += 4) {
1081 unsigned int cur_sum_energy = 0;
1082 for (int k = 0; k < 4; k++) {
1083 const int16_t *cur_diff = diff + (i + k) * stride + j;
1084 cur_sum_energy += cur_diff[0] * cur_diff[0] +
1085 cur_diff[1] * cur_diff[1] +
1086 cur_diff[2] * cur_diff[2] + cur_diff[3] * cur_diff[3];
1087 }
1088 esq[(i >> 2) * esq_w + (j >> 2)] = cur_sum_energy;
1089 total += cur_sum_energy;
1090 }
1091 }
1092
1093 const float e_recip = 1.0f / (float)total;
1094 for (int i = 0; i < esq_sz - 1; i++) edist[i] = esq[i] * e_recip;
1095}
1096
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001097// Similar to get_horver_correlation, but also takes into account first
1098// row/column, when computing horizontal/vertical correlation.
1099static void get_horver_correlation_full(const int16_t *diff, int stride, int w,
1100 int h, float *hcorr, float *vcorr) {
Yaowu Xu29373ee2017-10-19 15:50:34 -07001101 const float num_hor = (float)(h * (w - 1));
1102 const float num_ver = (float)((h - 1) * w);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001103 int i, j;
1104
1105 // The following notation is used:
1106 // x - current pixel
1107 // y - left neighbor pixel
1108 // z - top neighbor pixel
1109 int64_t xy_sum = 0, xz_sum = 0;
1110 int64_t xhor_sum = 0, xver_sum = 0, y_sum = 0, z_sum = 0;
1111 int64_t x2hor_sum = 0, x2ver_sum = 0, y2_sum = 0, z2_sum = 0;
1112
1113 int16_t x, y, z;
1114 for (j = 1; j < w; ++j) {
1115 x = diff[j];
1116 y = diff[j - 1];
1117 xy_sum += x * y;
1118 xhor_sum += x;
1119 y_sum += y;
1120 x2hor_sum += x * x;
1121 y2_sum += y * y;
1122 }
1123 for (i = 1; i < h; ++i) {
1124 x = diff[i * stride];
1125 z = diff[(i - 1) * stride];
1126 xz_sum += x * z;
1127 xver_sum += x;
1128 z_sum += z;
1129 x2ver_sum += x * x;
1130 z2_sum += z * z;
1131 for (j = 1; j < w; ++j) {
1132 x = diff[i * stride + j];
1133 y = diff[i * stride + j - 1];
1134 z = diff[(i - 1) * stride + j];
1135 xy_sum += x * y;
1136 xz_sum += x * z;
1137 xhor_sum += x;
1138 xver_sum += x;
1139 y_sum += y;
1140 z_sum += z;
1141 x2hor_sum += x * x;
1142 x2ver_sum += x * x;
1143 y2_sum += y * y;
1144 z2_sum += z * z;
1145 }
1146 }
1147 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
1148 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
1149 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
1150 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
1151 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
1152 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
1153
1154 *hcorr = *vcorr = 1;
1155 if (xhor_var_n > 0 && y_var_n > 0) {
1156 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
1157 *hcorr = *hcorr < 0 ? 0 : *hcorr;
1158 }
1159 if (xver_var_n > 0 && z_var_n > 0) {
1160 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
1161 *vcorr = *vcorr < 0 ? 0 : *vcorr;
1162 }
1163}
1164
1165// Performs a forward pass through a neural network with 2 fully-connected
1166// layers, assuming ReLU as activation function. Number of output neurons
1167// is always equal to 4.
1168// fc1, fc2 - weight matrices of the respective layers.
1169// b1, b2 - bias vectors of the respective layers.
1170static void compute_1D_scores(float *features, int num_features,
1171 const float *fc1, const float *b1,
1172 const float *fc2, const float *b2,
1173 int num_hidden_units, float *dst_scores) {
1174 assert(num_hidden_units <= 32);
1175 float hidden_layer[32];
1176 for (int i = 0; i < num_hidden_units; i++) {
1177 const float *cur_coef = fc1 + i * num_features;
1178 hidden_layer[i] = 0.0f;
1179 for (int j = 0; j < num_features; j++)
1180 hidden_layer[i] += cur_coef[j] * features[j];
1181 hidden_layer[i] = AOMMAX(hidden_layer[i] + b1[i], 0.0f);
1182 }
1183 for (int i = 0; i < 4; i++) {
1184 const float *cur_coef = fc2 + i * num_hidden_units;
1185 dst_scores[i] = 0.0f;
1186 for (int j = 0; j < num_hidden_units; j++)
1187 dst_scores[i] += cur_coef[j] * hidden_layer[j];
1188 dst_scores[i] += b2[i];
1189 }
1190}
1191
1192// Transforms raw scores into a probability distribution across 16 TX types
1193static void score_2D_transform_pow8(float *scores_2D, float shift) {
1194 float sum = 0.0f;
1195 int i;
1196
1197 for (i = 0; i < 16; i++) {
1198 float v, v2, v4;
1199 v = AOMMAX(scores_2D[i] + shift, 0.0f);
1200 v2 = v * v;
1201 v4 = v2 * v2;
1202 scores_2D[i] = v4 * v4;
1203 sum += scores_2D[i];
1204 }
1205 for (i = 0; i < 16; i++) scores_2D[i] /= sum;
1206}
1207
Alexander Bokov79a37242017-09-29 11:25:55 -07001208// Similarly to compute_1D_scores() performs a forward pass through a
1209// neural network with two fully-connected layers. The only difference
1210// is that it assumes 1 output neuron, as required by the classifier used
1211// for TX size pruning.
1212static float compute_tx_split_prune_score(float *features, int num_features,
1213 const float *fc1, const float *b1,
1214 const float *fc2, float b2,
1215 int num_hidden_units) {
1216 assert(num_hidden_units <= 64);
1217 float hidden_layer[64];
1218 for (int i = 0; i < num_hidden_units; i++) {
1219 const float *cur_coef = fc1 + i * num_features;
1220 hidden_layer[i] = 0.0f;
1221 for (int j = 0; j < num_features; j++)
1222 hidden_layer[i] += cur_coef[j] * features[j];
1223 hidden_layer[i] = AOMMAX(hidden_layer[i] + b1[i], 0.0f);
1224 }
1225 float dst_score = 0.0f;
1226 for (int j = 0; j < num_hidden_units; j++)
1227 dst_score += fc2[j] * hidden_layer[j];
1228 dst_score += b2;
1229 return dst_score;
1230}
1231
1232static int prune_tx_split(BLOCK_SIZE bsize, const int16_t *diff, float hcorr,
1233 float vcorr) {
1234 if (bsize <= BLOCK_4X4 || bsize > BLOCK_16X16) return 0;
1235
1236 float features[17];
1237 const int bw = block_size_wide[bsize], bh = block_size_high[bsize];
1238 const int feature_num = (bw / 4) * (bh / 4) + 1;
1239 assert(feature_num <= 17);
1240
1241 get_2D_energy_distribution(diff, bw, bw, bh, features);
1242 features[feature_num - 2] = hcorr;
1243 features[feature_num - 1] = vcorr;
1244
1245 const int bidx = bsize - BLOCK_4X4 - 1;
1246 const float *fc1 = av1_prune_tx_split_learned_weights[bidx];
1247 const float *b1 =
1248 fc1 + av1_prune_tx_split_num_hidden_units[bidx] * feature_num;
1249 const float *fc2 = b1 + av1_prune_tx_split_num_hidden_units[bidx];
1250 float b2 = *(fc2 + av1_prune_tx_split_num_hidden_units[bidx]);
1251 float score =
1252 compute_tx_split_prune_score(features, feature_num, fc1, b1, fc2, b2,
1253 av1_prune_tx_split_num_hidden_units[bidx]);
1254
1255 return (score > av1_prune_tx_split_thresholds[bidx]);
1256}
1257
1258static int prune_tx_2D(BLOCK_SIZE bsize, const MACROBLOCK *x, int tx_set_type,
1259 int tx_type_pruning_aggressiveness,
1260 int use_tx_split_prune) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001261 if (bsize >= BLOCK_32X32) return 0;
Hui Su9bf85992017-11-21 14:18:03 -08001262 aom_clear_system_state();
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001263 const struct macroblock_plane *const p = &x->plane[0];
1264 const int bidx = AOMMAX(bsize - BLOCK_4X4, 0);
1265 const float score_thresh =
Alexander Bokov79a37242017-09-29 11:25:55 -07001266 av1_prune_2D_adaptive_thresholds[bidx]
1267 [tx_type_pruning_aggressiveness - 1];
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001268 float hfeatures[16], vfeatures[16];
1269 float hscores[4], vscores[4];
1270 float scores_2D[16];
1271 int tx_type_table_2D[16] = {
1272 DCT_DCT, DCT_ADST, DCT_FLIPADST, V_DCT,
1273 ADST_DCT, ADST_ADST, ADST_FLIPADST, V_ADST,
1274 FLIPADST_DCT, FLIPADST_ADST, FLIPADST_FLIPADST, V_FLIPADST,
1275 H_DCT, H_ADST, H_FLIPADST, IDTX
1276 };
1277 const int bw = block_size_wide[bsize], bh = block_size_high[bsize];
1278 const int hfeatures_num = bw <= 8 ? bw : bw / 2;
1279 const int vfeatures_num = bh <= 8 ? bh : bh / 2;
1280 assert(hfeatures_num <= 16);
1281 assert(vfeatures_num <= 16);
1282
1283 get_energy_distribution_finer(p->src_diff, bw, bw, bh, hfeatures, vfeatures);
1284 get_horver_correlation_full(p->src_diff, bw, bw, bh,
1285 &hfeatures[hfeatures_num - 1],
1286 &vfeatures[vfeatures_num - 1]);
1287
1288 const float *fc1_hor = av1_prune_2D_learned_weights_hor[bidx];
1289 const float *b1_hor =
1290 fc1_hor + av1_prune_2D_num_hidden_units_hor[bidx] * hfeatures_num;
1291 const float *fc2_hor = b1_hor + av1_prune_2D_num_hidden_units_hor[bidx];
1292 const float *b2_hor = fc2_hor + av1_prune_2D_num_hidden_units_hor[bidx] * 4;
1293 compute_1D_scores(hfeatures, hfeatures_num, fc1_hor, b1_hor, fc2_hor, b2_hor,
1294 av1_prune_2D_num_hidden_units_hor[bidx], hscores);
1295
1296 const float *fc1_ver = av1_prune_2D_learned_weights_ver[bidx];
1297 const float *b1_ver =
1298 fc1_ver + av1_prune_2D_num_hidden_units_ver[bidx] * vfeatures_num;
1299 const float *fc2_ver = b1_ver + av1_prune_2D_num_hidden_units_ver[bidx];
1300 const float *b2_ver = fc2_ver + av1_prune_2D_num_hidden_units_ver[bidx] * 4;
1301 compute_1D_scores(vfeatures, vfeatures_num, fc1_ver, b1_ver, fc2_ver, b2_ver,
1302 av1_prune_2D_num_hidden_units_ver[bidx], vscores);
1303
1304 float score_2D_average = 0.0f;
1305 for (int i = 0; i < 4; i++) {
1306 float *cur_scores_2D = scores_2D + i * 4;
1307 cur_scores_2D[0] = vscores[i] * hscores[0];
1308 cur_scores_2D[1] = vscores[i] * hscores[1];
1309 cur_scores_2D[2] = vscores[i] * hscores[2];
1310 cur_scores_2D[3] = vscores[i] * hscores[3];
1311 score_2D_average += cur_scores_2D[0] + cur_scores_2D[1] + cur_scores_2D[2] +
1312 cur_scores_2D[3];
1313 }
1314 score_2D_average /= 16;
1315 score_2D_transform_pow8(scores_2D, (20 - score_2D_average));
1316
1317 // Always keep the TX type with the highest score, prune all others with
1318 // score below score_thresh.
1319 int max_score_i = 0;
1320 float max_score = 0.0f;
1321 for (int i = 0; i < 16; i++) {
1322 if (scores_2D[i] > max_score &&
1323 av1_ext_tx_used[tx_set_type][tx_type_table_2D[i]]) {
1324 max_score = scores_2D[i];
1325 max_score_i = i;
1326 }
1327 }
1328
1329 int prune_bitmask = 0;
1330 for (int i = 0; i < 16; i++) {
1331 if (scores_2D[i] < score_thresh && i != max_score_i)
1332 prune_bitmask |= (1 << tx_type_table_2D[i]);
1333 }
1334
Alexander Bokov79a37242017-09-29 11:25:55 -07001335 // Also apply TX size pruning if it's turned on. The value
1336 // of prune_tx_split_flag indicates whether we should do
1337 // full TX size search (flag=0) or use the largest available
1338 // TX size without performing any further search (flag=1).
1339 int prune_tx_split_flag = 0;
1340 if (use_tx_split_prune) {
1341 prune_tx_split_flag =
1342 prune_tx_split(bsize, p->src_diff, hfeatures[hfeatures_num - 1],
1343 vfeatures[vfeatures_num - 1]);
1344 }
1345 prune_bitmask |= (prune_tx_split_flag << TX_TYPES);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001346 return prune_bitmask;
1347}
Hui Su032ab8b2017-09-19 14:53:40 -07001348
Alexander Bokov79a37242017-09-29 11:25:55 -07001349static int prune_tx(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
1350 const MACROBLOCKD *const xd, int tx_set_type,
1351 int use_tx_split_prune) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001352 int tx_set = ext_tx_set_index[1][tx_set_type];
1353 assert(tx_set >= 0);
1354 const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001355
1356 switch (cpi->sf.tx_type_search.prune_mode) {
1357 case NO_PRUNE: return 0; break;
1358 case PRUNE_ONE:
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001359 if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001360 return prune_one_for_sby(cpi, bsize, x, xd);
1361 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001362 case PRUNE_TWO:
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001363 if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001364 if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
1365 return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
1366 }
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001367 if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
Yaowu Xuc27fc142016-08-22 16:08:15 -07001368 return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
1369 return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
1370 break;
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001371 case PRUNE_2D_ACCURATE:
1372 if (tx_set_type == EXT_TX_SET_ALL16)
Alexander Bokov79a37242017-09-29 11:25:55 -07001373 return prune_tx_2D(bsize, x, tx_set_type, 6, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001374 else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT)
Alexander Bokov79a37242017-09-29 11:25:55 -07001375 return prune_tx_2D(bsize, x, tx_set_type, 4, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001376 else
1377 return 0;
1378 break;
1379 case PRUNE_2D_FAST:
1380 if (tx_set_type == EXT_TX_SET_ALL16)
Alexander Bokov79a37242017-09-29 11:25:55 -07001381 return prune_tx_2D(bsize, x, tx_set_type, 10, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001382 else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT)
Alexander Bokov79a37242017-09-29 11:25:55 -07001383 return prune_tx_2D(bsize, x, tx_set_type, 7, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001384 else
1385 return 0;
1386 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001387 }
1388 assert(0);
1389 return 0;
1390}
1391
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001392static int do_tx_type_search(TX_TYPE tx_type, int prune,
1393 TX_TYPE_PRUNE_MODE mode) {
Sebastien Alaiwan3bac9922017-11-02 12:34:41 +01001394 // TODO(sarahparker) implement for non ext tx
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001395 if (mode >= PRUNE_2D_ACCURATE) {
1396 return !((prune >> tx_type) & 1);
1397 } else {
1398 return !(((prune >> vtx_tab[tx_type]) & 1) |
1399 ((prune >> (htx_tab[tx_type] + 8)) & 1));
1400 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001401}
1402
Yaowu Xuf883b422016-08-30 14:01:10 -07001403static void model_rd_from_sse(const AV1_COMP *const cpi,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001404 const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
1405 int plane, int64_t sse, int *rate,
1406 int64_t *dist) {
1407 const struct macroblockd_plane *const pd = &xd->plane[plane];
1408 const int dequant_shift =
Yaowu Xud3e7c682017-12-21 14:08:25 -08001409 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001410
1411 // Fast approximate the modelling function.
1412 if (cpi->sf.simple_model_rd_from_var) {
1413 const int64_t square_error = sse;
Monty Montgomery125c0fc2017-10-26 00:44:35 -04001414 int quantizer = (pd->dequant_Q3[1] >> dequant_shift);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001415
1416 if (quantizer < 120)
1417 *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xuf883b422016-08-30 14:01:10 -07001418 (16 - AV1_PROB_COST_SHIFT));
Yaowu Xuc27fc142016-08-22 16:08:15 -07001419 else
1420 *rate = 0;
1421 *dist = (square_error * quantizer) >> 8;
1422 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001423 av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
Monty Montgomery125c0fc2017-10-26 00:44:35 -04001424 pd->dequant_Q3[1] >> dequant_shift, rate,
1425 dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001426 }
1427
1428 *dist <<= 4;
1429}
1430
Yaowu Xuf883b422016-08-30 14:01:10 -07001431static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001432 MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
1433 int plane_to, int *out_rate_sum,
1434 int64_t *out_dist_sum, int *skip_txfm_sb,
1435 int64_t *skip_sse_sb) {
1436 // Note our transform coeffs are 8 times an orthogonal transform.
1437 // Hence quantizer step is also 8 times. To get effective quantizer
1438 // we need to divide by 8 before sending to modeling function.
1439 int plane;
1440 const int ref = xd->mi[0]->mbmi.ref_frame[0];
1441
1442 int64_t rate_sum = 0;
1443 int64_t dist_sum = 0;
1444 int64_t total_sse = 0;
1445
1446 x->pred_sse[ref] = 0;
1447
1448 for (plane = plane_from; plane <= plane_to; ++plane) {
1449 struct macroblock_plane *const p = &x->plane[plane];
1450 struct macroblockd_plane *const pd = &xd->plane[plane];
Debargha Mukherjee5d149e12017-12-14 12:49:51 -08001451 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001452 unsigned int sse;
1453 int rate;
1454 int64_t dist;
1455
Jingning Han9ce464c2017-02-20 15:36:30 -08001456 if (x->skip_chroma_rd && plane) continue;
Jingning Han9ce464c2017-02-20 15:36:30 -08001457
Yaowu Xuc27fc142016-08-22 16:08:15 -07001458 // TODO(geza): Write direct sse functions that do not compute
1459 // variance as well.
1460 cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
1461 &sse);
1462
1463 if (plane == 0) x->pred_sse[ref] = sse;
1464
1465 total_sse += sse;
1466
1467 model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
1468
1469 rate_sum += rate;
1470 dist_sum += dist;
1471 }
1472
1473 *skip_txfm_sb = total_sse == 0;
1474 *skip_sse_sb = total_sse << 4;
1475 *out_rate_sum = (int)rate_sum;
1476 *out_dist_sum = dist_sum;
1477}
1478
Yaowu Xuf883b422016-08-30 14:01:10 -07001479int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
1480 intptr_t block_size, int64_t *ssz) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001481 int i;
1482 int64_t error = 0, sqcoeff = 0;
1483
1484 for (i = 0; i < block_size; i++) {
1485 const int diff = coeff[i] - dqcoeff[i];
1486 error += diff * diff;
1487 sqcoeff += coeff[i] * coeff[i];
1488 }
1489
1490 *ssz = sqcoeff;
1491 return error;
1492}
1493
Yaowu Xuf883b422016-08-30 14:01:10 -07001494int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
1495 int block_size) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001496 int i;
1497 int64_t error = 0;
1498
1499 for (i = 0; i < block_size; i++) {
1500 const int diff = coeff[i] - dqcoeff[i];
1501 error += diff * diff;
1502 }
1503
1504 return error;
1505}
1506
Yaowu Xuf883b422016-08-30 14:01:10 -07001507int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
1508 const tran_low_t *dqcoeff, intptr_t block_size,
1509 int64_t *ssz, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001510 int i;
1511 int64_t error = 0, sqcoeff = 0;
1512 int shift = 2 * (bd - 8);
1513 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
1514
1515 for (i = 0; i < block_size; i++) {
1516 const int64_t diff = coeff[i] - dqcoeff[i];
1517 error += diff * diff;
1518 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
1519 }
1520 assert(error >= 0 && sqcoeff >= 0);
1521 error = (error + rounding) >> shift;
1522 sqcoeff = (sqcoeff + rounding) >> shift;
1523
1524 *ssz = sqcoeff;
1525 return error;
1526}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001527
Alex Converse61f37b82017-03-29 15:26:03 -07001528// Get transform block visible dimensions cropped to the MI units.
1529static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
1530 BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
1531 BLOCK_SIZE tx_bsize, int *width, int *height,
1532 int *visible_width, int *visible_height) {
1533 assert(tx_bsize <= plane_bsize);
1534 int txb_height = block_size_high[tx_bsize];
1535 int txb_width = block_size_wide[tx_bsize];
1536 const int block_height = block_size_high[plane_bsize];
1537 const int block_width = block_size_wide[plane_bsize];
1538 const struct macroblockd_plane *const pd = &xd->plane[plane];
1539 // TODO(aconverse@google.com): Investigate using crop_width/height here rather
1540 // than the MI size
1541 const int block_rows =
1542 (xd->mb_to_bottom_edge >= 0)
1543 ? block_height
1544 : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
1545 const int block_cols =
1546 (xd->mb_to_right_edge >= 0)
1547 ? block_width
1548 : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
1549 const int tx_unit_size = tx_size_wide_log2[0];
1550 if (width) *width = txb_width;
1551 if (height) *height = txb_height;
1552 *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
1553 *visible_height =
1554 clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
1555}
1556
Yushin Cho75b01002017-06-21 13:43:57 -07001557// Compute the pixel domain distortion from src and dst on all visible 4x4s in
1558// the
Alex Converse61f37b82017-03-29 15:26:03 -07001559// transform block.
Yushin Cho75b01002017-06-21 13:43:57 -07001560static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
1561 int plane, const uint8_t *src, const int src_stride,
1562 const uint8_t *dst, const int dst_stride,
1563 int blk_row, int blk_col,
1564 const BLOCK_SIZE plane_bsize,
1565 const BLOCK_SIZE tx_bsize) {
Alex Converse61f37b82017-03-29 15:26:03 -07001566 int txb_rows, txb_cols, visible_rows, visible_cols;
Yushin Cho75b01002017-06-21 13:43:57 -07001567 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -07001568
Alex Converse61f37b82017-03-29 15:26:03 -07001569 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
1570 &txb_cols, &txb_rows, &visible_cols, &visible_rows);
1571 assert(visible_rows > 0);
1572 assert(visible_cols > 0);
Yushin Cho75b01002017-06-21 13:43:57 -07001573
Yushin Chob7b60c52017-07-14 16:18:52 -07001574#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07001575 if (x->using_dist_8x8 && plane == 0 && txb_cols >= 8 && txb_rows >= 8)
Yushin Chofcddadf2017-08-30 13:49:38 -07001576 return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
1577 tx_bsize, txb_cols, txb_rows, visible_cols,
1578 visible_rows, x->qindex);
Yushin Chob7b60c52017-07-14 16:18:52 -07001579#endif // CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07001580
Yushin Cho2f025aa2017-09-28 17:39:21 -07001581 unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst,
1582 dst_stride, tx_bsize, txb_rows,
1583 txb_cols, visible_rows, visible_cols);
1584
Alex Converse61f37b82017-03-29 15:26:03 -07001585 return sse;
1586}
1587
Yushin Cho75b01002017-06-21 13:43:57 -07001588// Compute the pixel domain distortion from diff on all visible 4x4s in the
1589// transform block.
1590static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
1591 const int16_t *diff, const int diff_stride,
1592 int blk_row, int blk_col,
1593 const BLOCK_SIZE plane_bsize,
1594 const BLOCK_SIZE tx_bsize) {
Alex Converse61f37b82017-03-29 15:26:03 -07001595 int visible_rows, visible_cols;
Yushin Cho75b01002017-06-21 13:43:57 -07001596 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -07001597#if CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07001598 int txb_height = block_size_high[tx_bsize];
1599 int txb_width = block_size_wide[tx_bsize];
1600 const int src_stride = x->plane[plane].src.stride;
1601 const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0];
1602 const uint8_t *src = &x->plane[plane].src.buf[src_idx];
Yushin Cho75b01002017-06-21 13:43:57 -07001603#endif
1604
Alex Converse61f37b82017-03-29 15:26:03 -07001605 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
1606 NULL, &visible_cols, &visible_rows);
Yushin Cho75b01002017-06-21 13:43:57 -07001607
Yushin Chob7b60c52017-07-14 16:18:52 -07001608#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07001609 if (x->using_dist_8x8 && plane == 0 && txb_width >= 8 && txb_height >= 8)
Cheng Chenf9cf5b62018-01-12 16:05:15 -08001610 return dist_8x8_diff(x, src, src_stride, diff, diff_stride, txb_width,
1611 txb_height, visible_cols, visible_rows, x->qindex);
Yushin Cho75b01002017-06-21 13:43:57 -07001612 else
1613#endif
1614 return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols,
1615 visible_rows);
Alex Converse61f37b82017-03-29 15:26:03 -07001616}
1617
Hui Su4d51bed2017-11-29 15:52:40 -08001618int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
1619 int *val_count) {
1620 const int max_pix_val = 1 << 8;
1621 memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
hui sud9a812b2017-07-06 14:34:37 -07001622 for (int r = 0; r < rows; ++r) {
1623 for (int c = 0; c < cols; ++c) {
Hui Su4d51bed2017-11-29 15:52:40 -08001624 const int this_val = src[r * stride + c];
1625 assert(this_val < max_pix_val);
1626 ++val_count[this_val];
hui sud9a812b2017-07-06 14:34:37 -07001627 }
1628 }
1629 int n = 0;
Hui Su4d51bed2017-11-29 15:52:40 -08001630 for (int i = 0; i < max_pix_val; ++i) {
hui sud9a812b2017-07-06 14:34:37 -07001631 if (val_count[i]) ++n;
1632 }
1633 return n;
1634}
1635
hui sud9a812b2017-07-06 14:34:37 -07001636int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
Hui Su4d51bed2017-11-29 15:52:40 -08001637 int bit_depth, int *val_count) {
hui sud9a812b2017-07-06 14:34:37 -07001638 assert(bit_depth <= 12);
Hui Su4d51bed2017-11-29 15:52:40 -08001639 const int max_pix_val = 1 << bit_depth;
hui sud9a812b2017-07-06 14:34:37 -07001640 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
Hui Su4d51bed2017-11-29 15:52:40 -08001641 memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
hui sud9a812b2017-07-06 14:34:37 -07001642 for (int r = 0; r < rows; ++r) {
1643 for (int c = 0; c < cols; ++c) {
Hui Su4d51bed2017-11-29 15:52:40 -08001644 const int this_val = src[r * stride + c];
1645 assert(this_val < max_pix_val);
1646 ++val_count[this_val];
hui sud9a812b2017-07-06 14:34:37 -07001647 }
1648 }
1649 int n = 0;
Hui Su4d51bed2017-11-29 15:52:40 -08001650 for (int i = 0; i < max_pix_val; ++i) {
hui sud9a812b2017-07-06 14:34:37 -07001651 if (val_count[i]) ++n;
1652 }
1653 return n;
1654}
hui sud9a812b2017-07-06 14:34:37 -07001655
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001656void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
Frederic Barbier33b39f02017-11-21 11:11:24 +01001657 int blk_row, int blk_col, int eob,
1658 int reduced_tx_set) {
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001659 struct macroblockd_plane *const pd = &xd->plane[plane];
1660 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001661 const PLANE_TYPE plane_type = get_plane_type(plane);
1662 const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
1663 const TX_TYPE tx_type =
Luc Trudeau2eb9b842017-12-13 11:19:16 -05001664 av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size);
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001665 const int dst_stride = pd->dst.stride;
1666 uint8_t *dst =
1667 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01001668 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
1669 dst_stride, eob, reduced_tx_set);
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001670}
1671
Angie Chiang808d8592017-04-06 18:36:55 -07001672void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
1673 BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
1674 TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
1675 OUTPUT_STATUS output_status) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001676 MACROBLOCKD *const xd = &x->e_mbd;
1677 const struct macroblock_plane *const p = &x->plane[plane];
Yushin Chob7b60c52017-07-14 16:18:52 -07001678#if CONFIG_DIST_8X8
Yushin Cho4483e3d2017-04-18 19:41:20 -07001679 struct macroblockd_plane *const pd = &xd->plane[plane];
Yushin Chob7b60c52017-07-14 16:18:52 -07001680#else // CONFIG_DIST_8X8
Yushin Cho4483e3d2017-04-18 19:41:20 -07001681 const struct macroblockd_plane *const pd = &xd->plane[plane];
Yushin Chob7b60c52017-07-14 16:18:52 -07001682#endif // CONFIG_DIST_8X8
Yushin Cho7a428ba2017-01-12 16:28:49 -08001683
Yushin Cho55104332017-08-14 16:15:43 -07001684 if (cpi->sf.use_transform_domain_distortion
Debargha Mukherjeed64e92c2017-12-20 22:23:35 -08001685#if CONFIG_TX64X64
1686 // Any 64-pt transforms only preserves half the coefficients.
1687 // Therefore transform domain distortion is not valid for these
1688 // transform sizes.
1689 && txsize_sqr_up_map[tx_size] != TX_64X64
1690#endif // CONFIG_TX64X64
Yushin Cho55104332017-08-14 16:15:43 -07001691#if CONFIG_DIST_8X8
1692 && !x->using_dist_8x8
1693#endif
1694 ) {
hui sud2f12ba2017-04-12 10:08:43 -07001695 // Transform domain distortion computation is more efficient as it does
Yaowu Xuc27fc142016-08-22 16:08:15 -07001696 // not involve an inverse transform, but it is less accurate.
Urvang Joshi80893152017-10-27 11:51:14 -07001697 const int buffer_length = av1_get_max_eob(tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001698 int64_t this_sse;
Sebastien Alaiwan58596362018-01-26 10:11:35 +01001699 // TX-domain results need to shift down to Q2/D10 to match pixel
1700 // domain distortion values which are in Q2^2
Jingning Hanff705452017-04-27 11:32:15 -07001701 int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001702 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
1703 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Thomas Daede6ff6af62017-02-03 16:29:24 -08001704
Yi Luod61e6082017-05-26 16:14:39 -07001705 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1706 *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length,
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07001707 &this_sse, xd->bd);
Yi Luod61e6082017-05-26 16:14:39 -07001708 else
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07001709 *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse);
Yushin Chod0b77ac2017-10-20 17:33:16 -07001710
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07001711 *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift);
1712 *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001713 } else {
1714 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
Jingning Hanb9c57272016-10-25 10:15:39 -07001715 const int bsw = block_size_wide[tx_bsize];
1716 const int bsh = block_size_high[tx_bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001717 const int src_stride = x->plane[plane].src.stride;
1718 const int dst_stride = xd->plane[plane].dst.stride;
Jingning Hanb9c57272016-10-25 10:15:39 -07001719 // Scale the transform block index to pixel unit.
1720 const int src_idx = (blk_row * src_stride + blk_col)
1721 << tx_size_wide_log2[0];
1722 const int dst_idx = (blk_row * dst_stride + blk_col)
1723 << tx_size_wide_log2[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001724 const uint8_t *src = &x->plane[plane].src.buf[src_idx];
1725 const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
1726 const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1727 const uint16_t eob = p->eobs[block];
1728
Yaowu Xuc27fc142016-08-22 16:08:15 -07001729 assert(cpi != NULL);
Jingning Hanb9c57272016-10-25 10:15:39 -07001730 assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001731
Angie Chiangc0cf6c02017-04-04 13:47:27 -07001732 {
1733 const int diff_stride = block_size_wide[plane_bsize];
1734 const int diff_idx = (blk_row * diff_stride + blk_col)
1735 << tx_size_wide_log2[0];
1736 const int16_t *diff = &p->src_diff[diff_idx];
Yushin Cho75b01002017-06-21 13:43:57 -07001737 *out_sse = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
1738 plane_bsize, tx_bsize);
hui sub1cc1f92017-04-11 17:41:29 -07001739 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
Alex Conversef323f012017-04-24 09:26:33 -07001740 *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2);
Angie Chiangc0cf6c02017-04-04 13:47:27 -07001741 }
Alex Conversef323f012017-04-24 09:26:33 -07001742 *out_sse *= 16;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001743
1744 if (eob) {
Angie Chiang228cc182017-04-07 15:22:16 -07001745 if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
Yushin Cho75b01002017-06-21 13:43:57 -07001746 *out_dist = pixel_dist(cpi, x, plane, src, src_stride, dst, dst_stride,
1747 blk_row, blk_col, plane_bsize, tx_bsize);
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001748 } else {
Jingning Han6a9dbef2017-04-10 10:25:14 -07001749 uint8_t *recon;
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001750 DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
Jingning Han6a9dbef2017-04-10 10:25:14 -07001751
1752 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1753 recon = CONVERT_TO_BYTEPTR(recon16);
1754 else
1755 recon = (uint8_t *)recon16;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001756
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001757 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1758 aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
1759 NULL, 0, bsw, bsh, xd->bd);
1760 } else {
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001761 aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL,
1762 0, bsw, bsh);
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001763 }
Angie Chiang41fffae2017-04-03 10:33:18 -07001764
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001765 const PLANE_TYPE plane_type = get_plane_type(plane);
Jingning Han19b5c8f2017-07-06 15:10:12 -07001766 TX_TYPE tx_type =
Luc Trudeau2eb9b842017-12-13 11:19:16 -05001767 av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size);
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01001768 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, recon,
1769 MAX_TX_SIZE, eob,
1770 cpi->common.reduced_tx_set_used);
Angie Chiang41fffae2017-04-03 10:33:18 -07001771
Yushin Chob7b60c52017-07-14 16:18:52 -07001772#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07001773 if (x->using_dist_8x8 && plane == 0 && (bsw < 8 || bsh < 8)) {
Yushin Cho75b01002017-06-21 13:43:57 -07001774 // Save decoded pixels for inter block in pd->pred to avoid
1775 // block_8x8_rd_txfm_daala_dist() need to produce them
1776 // by calling av1_inverse_transform_block() again.
1777 const int pred_stride = block_size_wide[plane_bsize];
1778 const int pred_idx = (blk_row * pred_stride + blk_col)
1779 << tx_size_wide_log2[0];
1780 int16_t *pred = &pd->pred[pred_idx];
1781 int i, j;
Yushin Cho4483e3d2017-04-18 19:41:20 -07001782
Yushin Cho8ab875d2017-06-23 14:47:21 -07001783 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1784 for (j = 0; j < bsh; j++)
1785 for (i = 0; i < bsw; i++)
1786 pred[j * pred_stride + i] =
1787 CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i];
1788 } else {
Yushin Cho8ab875d2017-06-23 14:47:21 -07001789 for (j = 0; j < bsh; j++)
1790 for (i = 0; i < bsw; i++)
1791 pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -07001792 }
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001793 }
Yushin Chob7b60c52017-07-14 16:18:52 -07001794#endif // CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07001795 *out_dist =
1796 pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
1797 blk_row, blk_col, plane_bsize, tx_bsize);
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001798 }
Alex Conversef323f012017-04-24 09:26:33 -07001799 *out_dist *= 16;
1800 } else {
1801 *out_dist = *out_sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001802 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001803 }
1804}
1805
Hui Su4a5c6cf2018-01-24 17:32:01 -08001806#if CONFIG_TXK_SEL
1807static int64_t search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
1808 int block, int blk_row, int blk_col,
1809 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
1810 const ENTROPY_CONTEXT *a,
1811 const ENTROPY_CONTEXT *l,
1812 int use_fast_coef_costing, RD_STATS *rd_stats) {
1813 const AV1_COMMON *cm = &cpi->common;
1814 MACROBLOCKD *xd = &x->e_mbd;
1815 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1816 int rate_cost = 0;
1817 const int is_inter = is_inter_block(mbmi);
1818 TX_TYPE txk_start = DCT_DCT;
Jingning Han66965a22018-01-25 09:53:41 -08001819 TX_TYPE txk_end = x->rd_model == LOW_TXFM_RD ? DCT_DCT : TX_TYPES - 1;
Hui Su4a5c6cf2018-01-24 17:32:01 -08001820 TX_TYPE best_tx_type = txk_start;
1821 int64_t best_rd = INT64_MAX;
1822 uint8_t best_txb_ctx = 0;
1823 uint16_t best_eob = 0;
1824 RD_STATS best_rd_stats;
1825 av1_invalid_rd_stats(&best_rd_stats);
1826 for (TX_TYPE tx_type = txk_start; tx_type <= txk_end; ++tx_type) {
1827 if (plane == 0)
1828 mbmi->txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = tx_type;
1829 TX_TYPE ref_tx_type =
1830 av1_get_tx_type(get_plane_type(plane), xd, blk_row, blk_col, tx_size);
1831 if (tx_type != ref_tx_type) {
1832 // use av1_get_tx_type() to check if the tx_type is valid for the current
1833 // mode if it's not, we skip it here.
1834 continue;
1835 }
1836
1837 RD_STATS this_rd_stats;
1838 av1_invalid_rd_stats(&this_rd_stats);
1839 if (cpi->sf.optimize_coefficients != FULL_TRELLIS_OPT) {
1840 av1_xform_quant(
1841 cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
1842 USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
1843 } else {
1844 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
1845 tx_size, AV1_XFORM_QUANT_FP);
1846 av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
1847 tx_size, a, l, 1, &rate_cost);
1848 }
1849 av1_dist_block(cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size,
1850 &this_rd_stats.dist, &this_rd_stats.sse,
1851 OUTPUT_HAS_PREDICTED_PIXELS);
1852
1853 const int eob = x->plane[plane].eobs[block];
1854 const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
1855 if (eob)
1856 rate_cost +=
1857 av1_tx_type_cost(cm, x, xd, mbmi->sb_type, plane, tx_size, tx_type);
1858 else
1859 rate_cost =
1860 av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
1861 scan_order, a, l, use_fast_coef_costing);
1862 this_rd_stats.rate = rate_cost;
1863
1864 int64_t rd = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
1865
1866 if (rd < best_rd) {
1867 best_rd = rd;
1868 best_rd_stats = this_rd_stats;
1869 best_tx_type = tx_type;
1870 best_txb_ctx = x->plane[plane].txb_entropy_ctx[block];
1871 best_eob = x->plane[plane].eobs[block];
1872 }
Cheng Chen3c222602018-01-22 19:34:18 -08001873
1874 // Skip transform type search when we found the block has been quantized to
1875 // all zero and at the same time, it has better rdcost than doing transform.
1876 if (cpi->sf.tx_type_search.skip_tx_search && !best_eob) break;
Hui Su4a5c6cf2018-01-24 17:32:01 -08001877 }
1878
1879 av1_merge_rd_stats(rd_stats, &best_rd_stats);
1880
1881 if (best_eob == 0) best_tx_type = DCT_DCT;
1882
1883 if (plane == 0)
1884 mbmi->txk_type[(blk_row << MAX_MIB_SIZE_LOG2) + blk_col] = best_tx_type;
1885 x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx;
1886 x->plane[plane].eobs[block] = best_eob;
1887
1888 if (!is_inter && best_eob) {
1889 // intra mode needs decoded result such that the next transform block
1890 // can use it for prediction.
1891 if (cpi->sf.optimize_coefficients != FULL_TRELLIS_OPT) {
1892 av1_xform_quant(
1893 cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
1894 USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
1895 } else {
1896 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize,
1897 tx_size, AV1_XFORM_QUANT_FP);
1898 av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
1899 tx_size, a, l, 1, &rate_cost);
1900 }
1901
1902 av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
1903 x->plane[plane].eobs[block],
1904 cm->reduced_tx_set_used);
1905 }
1906 return best_rd;
1907}
1908#endif // CONFIG_TXK_SEL
1909
Yaowu Xuc27fc142016-08-22 16:08:15 -07001910static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
1911 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
1912 struct rdcost_block_args *args = arg;
1913 MACROBLOCK *const x = args->x;
1914 MACROBLOCKD *const xd = &x->e_mbd;
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07001915 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Angie Chiange50f3ec2017-04-10 15:50:33 -07001916 const AV1_COMP *cpi = args->cpi;
Angie Chiang65a39bb2017-04-11 16:50:04 -07001917 ENTROPY_CONTEXT *a = args->t_above + blk_col;
1918 ENTROPY_CONTEXT *l = args->t_left + blk_row;
Angie Chiang18ad8942017-04-11 12:37:07 -07001919 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001920 int64_t rd1, rd2, rd;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08001921 RD_STATS this_rd_stats;
Yushin Choc00769a2017-09-14 14:44:30 -07001922
1923#if CONFIG_DIST_8X8
1924 // If sub8x8 tx, 8x8 or larger partition, and luma channel,
1925 // dist-8x8 disables early skip, because the distortion metrics for
1926 // sub8x8 tx (MSE) and reference distortion from 8x8 or larger partition
1927 // (new distortion metric) are different.
1928 // Exception is: dist-8x8 is enabled but still MSE is used,
1929 // i.e. "--tune=" encoder option is not used.
Yushin Chof9970a52017-10-13 12:57:13 -07001930 int bw = block_size_wide[plane_bsize];
1931 int bh = block_size_high[plane_bsize];
Yushin Choc00769a2017-09-14 14:44:30 -07001932 int disable_early_skip =
Luc Trudeau439ba632017-12-21 13:17:01 -05001933 x->using_dist_8x8 && plane == AOM_PLANE_Y && bw >= 8 && bh >= 8 &&
Yushin Choc00769a2017-09-14 14:44:30 -07001934 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
1935 x->tune_metric != AOM_TUNE_PSNR;
Yushin Choa4817a62017-07-27 13:09:43 -07001936#endif // CONFIG_DIST_8X8
Yushin Cho6341f5c2017-03-24 14:36:28 -07001937
Angie Chiang7c2b7f22016-11-07 16:00:00 -08001938 av1_init_rd_stats(&this_rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001939
1940 if (args->exit_early) return;
1941
1942 if (!is_inter_block(mbmi)) {
Luc Trudeau2eb9b842017-12-13 11:19:16 -05001943 av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size);
Angie Chiang62e54cd2017-04-06 10:45:56 -07001944 av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
1945 }
1946
Angie Chiangcd9b03f2017-04-16 13:37:13 -07001947#if !CONFIG_TXK_SEL
Cheng Chen82775f62018-01-18 12:09:54 -08001948 const PLANE_TYPE plane_type = get_plane_type(plane);
1949 const TX_TYPE tx_type =
1950 av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size);
1951 const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
1952 int rate_cost = 0;
1953
Sarah Parker792c2ec2017-12-21 16:08:22 -08001954 // full forward transform and quantization
Debargha Mukherjee6cf2b462018-01-12 15:53:50 -08001955 if (cpi->sf.optimize_coefficients != FULL_TRELLIS_OPT) {
1956 av1_xform_quant(
1957 cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
1958 USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
Cheng Chen82775f62018-01-18 12:09:54 -08001959
1960 rate_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
1961 scan_order, a, l, args->use_fast_coef_costing);
Sarah Parker792c2ec2017-12-21 16:08:22 -08001962 } else {
1963 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
1964 AV1_XFORM_QUANT_FP);
Jingning Han3bce7542017-07-25 10:53:57 -07001965
Sebastien Alaiwan58596362018-01-26 10:11:35 +01001966 /// TX-domain results need to shift down to Q2/D10 to match pixel
1967 // domain distortion values which are in Q2^2
Sarah Parker792c2ec2017-12-21 16:08:22 -08001968 const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
Sarah Parker792c2ec2017-12-21 16:08:22 -08001969 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
1970 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
1971 const int buffer_length = av1_get_max_eob(tx_size);
1972 int64_t tmp_dist;
1973 int64_t tmp;
Sarah Parker792c2ec2017-12-21 16:08:22 -08001974 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1975 tmp_dist =
1976 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd);
1977 else
Sarah Parker792c2ec2017-12-21 16:08:22 -08001978 tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp);
Sarah Parker792c2ec2017-12-21 16:08:22 -08001979 tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
Jingning Han3bce7542017-07-25 10:53:57 -07001980
Sarah Parker792c2ec2017-12-21 16:08:22 -08001981 if (
Yushin Choa4817a62017-07-27 13:09:43 -07001982#if CONFIG_DIST_8X8
Sarah Parker792c2ec2017-12-21 16:08:22 -08001983 disable_early_skip ||
Yushin Choa4817a62017-07-27 13:09:43 -07001984#endif
Sarah Parker792c2ec2017-12-21 16:08:22 -08001985 RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
Michelle Findlay-Olynykfbab0622017-12-13 14:10:56 -08001986 av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
Cheng Chen82775f62018-01-18 12:09:54 -08001987 tx_size, a, l, CONFIG_LV_MAP, &rate_cost);
1988
1989 const int eob = x->plane[plane].eobs[block];
1990 if (!eob)
1991 rate_cost =
1992 av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
1993 scan_order, a, l, args->use_fast_coef_costing);
Sarah Parker792c2ec2017-12-21 16:08:22 -08001994 } else {
1995 args->exit_early = 1;
1996 return;
1997 }
Jingning Han3bce7542017-07-25 10:53:57 -07001998 }
Angie Chiang62e54cd2017-04-06 10:45:56 -07001999 if (!is_inter_block(mbmi)) {
2000 struct macroblock_plane *const p = &x->plane[plane];
Angie Chiangbc2288c2017-04-09 15:41:17 -07002001 av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
Frederic Barbier33b39f02017-11-21 11:11:24 +01002002 p->eobs[block], cm->reduced_tx_set_used);
Angie Chiang808d8592017-04-06 18:36:55 -07002003 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
2004 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
2005 OUTPUT_HAS_DECODED_PIXELS);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002006 } else {
Angie Chiang808d8592017-04-06 18:36:55 -07002007 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
2008 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
2009 OUTPUT_HAS_PREDICTED_PIXELS);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002010 }
Luc Trudeauc7af36d2017-10-11 21:01:00 -04002011 rd = RDCOST(x->rdmult, 0, this_rd_stats.dist);
2012 if (args->this_rd + rd > args->best_rd) {
2013 args->exit_early = 1;
2014 return;
2015 }
Debargha Mukherjeed2cfbef2017-12-03 16:15:27 -08002016
Cheng Chen82775f62018-01-18 12:09:54 -08002017 this_rd_stats.rate = rate_cost;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002018#else // !CONFIG_TXK_SEL
Hui Su4a5c6cf2018-01-24 17:32:01 -08002019 search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
2020 a, l, args->use_fast_coef_costing, &this_rd_stats);
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002021#endif // !CONFIG_TXK_SEL
Angie Chiang65a39bb2017-04-11 16:50:04 -07002022
Luc Trudeau439ba632017-12-21 13:17:01 -05002023#if CONFIG_CFL
2024 if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(mbmi)) {
2025 assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8);
2026 cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
2027 }
2028#endif // CONFIG_CFL
2029
Angie Chiang3963d632016-11-10 18:41:40 -08002030#if CONFIG_RD_DEBUG
Angie Chiange94556b2016-11-09 10:59:30 -08002031 av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
2032 this_rd_stats.rate);
Fergus Simpson4063a682017-02-28 16:52:22 -08002033#endif // CONFIG_RD_DEBUG
Yushin Cho6341f5c2017-03-24 14:36:28 -07002034 av1_set_txb_context(x, plane, block, tx_size, a, l);
Angie Chiangb3a12b52017-03-23 14:53:10 -07002035
Jingning Hanc5c37032018-01-04 16:43:43 -08002036 if (plane == 0)
2037 x->blk_skip[plane][blk_row * (block_size_wide[plane_bsize] >>
2038 tx_size_wide_log2[0]) +
2039 blk_col] = (x->plane[plane].eobs[block] == 0);
2040
Urvang Joshi70006e42017-06-14 16:08:55 -07002041 rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
2042 rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002043
2044 // TODO(jingning): temporarily enabled only for luma component
Yaowu Xuf883b422016-08-30 14:01:10 -07002045 rd = AOMMIN(rd1, rd2);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002046
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002047 this_rd_stats.skip &= !x->plane[plane].eobs[block];
Yushin Chod0b77ac2017-10-20 17:33:16 -07002048
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002049 av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
Yushin Cho7a428ba2017-01-12 16:28:49 -08002050
Yaowu Xuc27fc142016-08-22 16:08:15 -07002051 args->this_rd += rd;
2052
Yushin Chob7b60c52017-07-14 16:18:52 -07002053#if CONFIG_DIST_8X8
Yushin Choc00769a2017-09-14 14:44:30 -07002054 if (!disable_early_skip)
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002055#endif
2056 if (args->this_rd > args->best_rd) {
2057 args->exit_early = 1;
2058 return;
2059 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002060}
2061
Yushin Chob7b60c52017-07-14 16:18:52 -07002062#if CONFIG_DIST_8X8
2063static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
2064 BLOCK_SIZE bsize,
2065 struct rdcost_block_args *args) {
Yushin Cho7a428ba2017-01-12 16:28:49 -08002066 MACROBLOCKD *const xd = &x->e_mbd;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002067 const struct macroblockd_plane *const pd = &xd->plane[0];
2068 const struct macroblock_plane *const p = &x->plane[0];
Yushin Cho4483e3d2017-04-18 19:41:20 -07002069 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002070 const int src_stride = p->src.stride;
2071 const int dst_stride = pd->dst.stride;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002072 const uint8_t *src = &p->src.buf[0];
2073 const uint8_t *dst = &pd->dst.buf[0];
2074 const int16_t *pred = &pd->pred[0];
Yushin Cho2f025aa2017-09-28 17:39:21 -07002075 int bw = block_size_wide[bsize];
2076 int bh = block_size_high[bsize];
2077 int visible_w = bw;
2078 int visible_h = bh;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002079
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002080 int i, j;
2081 int64_t rd, rd1, rd2;
Yushin Chof0049ba2017-11-20 15:22:43 -08002082 int64_t sse = INT64_MAX, dist = INT64_MAX;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002083 int qindex = x->qindex;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002084
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002085 assert((bw & 0x07) == 0);
2086 assert((bh & 0x07) == 0);
Yushin Cho7a428ba2017-01-12 16:28:49 -08002087
Yushin Cho2f025aa2017-09-28 17:39:21 -07002088 get_txb_dimensions(xd, 0, bsize, 0, 0, bsize, &bw, &bh, &visible_w,
2089 &visible_h);
2090
Yushin Chof0049ba2017-11-20 15:22:43 -08002091 const int diff_stride = block_size_wide[bsize];
2092 const int16_t *diff = p->src_diff;
Cheng Chenf9cf5b62018-01-12 16:05:15 -08002093 sse = dist_8x8_diff(x, src, src_stride, diff, diff_stride, bw, bh, visible_w,
2094 visible_h, qindex);
Yushin Chof0049ba2017-11-20 15:22:43 -08002095 sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
2096 sse *= 16;
Yushin Cho4483e3d2017-04-18 19:41:20 -07002097
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002098 if (!is_inter_block(mbmi)) {
Yushin Chof0049ba2017-11-20 15:22:43 -08002099 dist = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, bsize, bw, bh,
2100 visible_w, visible_h, qindex);
2101 dist *= 16;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002102 } else {
Yaowu Xud3e7c682017-12-21 14:08:25 -08002103 // For inter mode, the decoded pixels are provided in pd->pred,
2104 // while the predicted pixels are in dst.
Yushin Chof0049ba2017-11-20 15:22:43 -08002105 uint8_t *pred8;
2106 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
2107
2108 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
2109 pred8 = CONVERT_TO_BYTEPTR(pred16);
2110 else
2111 pred8 = (uint8_t *)pred16;
Yushin Chof0049ba2017-11-20 15:22:43 -08002112
Yushin Chof0049ba2017-11-20 15:22:43 -08002113 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2114 for (j = 0; j < bh; j++)
2115 for (i = 0; i < bw; i++)
2116 CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i];
2117 } else {
Yushin Chof0049ba2017-11-20 15:22:43 -08002118 for (j = 0; j < bh; j++)
2119 for (i = 0; i < bw; i++) pred8[j * bw + i] = (uint8_t)pred[j * bw + i];
Yushin Cho2f025aa2017-09-28 17:39:21 -07002120 }
Yushin Chof0049ba2017-11-20 15:22:43 -08002121
2122 dist = av1_dist_8x8(cpi, x, src, src_stride, pred8, bw, bsize, bw, bh,
2123 visible_w, visible_h, qindex);
2124 dist *= 16;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002125 }
2126
Yushin Cho7cd2e112018-01-12 15:50:25 -08002127#ifdef DEBUG_DIST_8X8
Yushin Chof0049ba2017-11-20 15:22:43 -08002128 if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8) {
2129 assert(args->rd_stats.sse == sse);
2130 assert(args->rd_stats.dist == dist);
2131 }
Yushin Cho7cd2e112018-01-12 15:50:25 -08002132#endif // DEBUG_DIST_8X8
2133
Yushin Chof0049ba2017-11-20 15:22:43 -08002134 args->rd_stats.sse = sse;
2135 args->rd_stats.dist = dist;
2136
Urvang Joshi70006e42017-06-14 16:08:55 -07002137 rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist);
2138 rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse);
Yushin Cho7a428ba2017-01-12 16:28:49 -08002139 rd = AOMMIN(rd1, rd2);
2140
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002141 args->rd_stats.rdcost = rd;
2142 args->this_rd = rd;
Yushin Cho04eb9592017-06-21 17:35:06 -07002143
2144 if (args->this_rd > args->best_rd) args->exit_early = 1;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002145}
Yushin Chob7b60c52017-07-14 16:18:52 -07002146#endif // CONFIG_DIST_8X8
Yushin Cho7a428ba2017-01-12 16:28:49 -08002147
Yue Chen4a8ea372017-12-21 16:41:37 -08002148#if CONFIG_FILTER_INTRA
2149static int skip_invalid_tx_size_for_filter_intra(const MB_MODE_INFO *mbmi,
2150 int plane,
2151 RD_STATS *rd_stats) {
2152 if (plane == 0 && !is_inter_block(mbmi) &&
2153 mbmi->filter_intra_mode_info.use_filter_intra &&
2154 !av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
2155 rd_stats->rate = INT_MAX;
2156 rd_stats->dist = INT64_MAX;
2157 rd_stats->skip = 0;
2158 rd_stats->sse = INT64_MAX;
2159 return 1;
2160 } else {
2161 return 0;
2162 }
2163}
2164#endif
2165
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002166static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
2167 RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
2168 BLOCK_SIZE bsize, TX_SIZE tx_size,
2169 int use_fast_coef_casting) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002170 MACROBLOCKD *const xd = &x->e_mbd;
2171 const struct macroblockd_plane *const pd = &xd->plane[plane];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002172 struct rdcost_block_args args;
Yaowu Xuf883b422016-08-30 14:01:10 -07002173 av1_zero(args);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002174 args.x = x;
2175 args.cpi = cpi;
2176 args.best_rd = ref_best_rd;
2177 args.use_fast_coef_costing = use_fast_coef_casting;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002178 av1_init_rd_stats(&args.rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002179
2180 if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
2181
Yue Chen4a8ea372017-12-21 16:41:37 -08002182#if CONFIG_FILTER_INTRA
2183 if (skip_invalid_tx_size_for_filter_intra(&xd->mi[0]->mbmi, plane,
2184 rd_stats)) {
2185 return;
2186 }
2187#endif
2188
Yaowu Xuf883b422016-08-30 14:01:10 -07002189 av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002190
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002191 av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
2192 &args);
Yushin Chob7b60c52017-07-14 16:18:52 -07002193#if CONFIG_DIST_8X8
Yushin Chof9970a52017-10-13 12:57:13 -07002194 int bw = block_size_wide[bsize];
2195 int bh = block_size_high[bsize];
2196
2197 if (x->using_dist_8x8 && !args.exit_early && plane == 0 && bw >= 8 &&
2198 bh >= 8 && (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
Yushin Chob7b60c52017-07-14 16:18:52 -07002199 dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args);
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002200#endif
Yushin Cho7a428ba2017-01-12 16:28:49 -08002201
Yaowu Xuc27fc142016-08-22 16:08:15 -07002202 if (args.exit_early) {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002203 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002204 } else {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002205 *rd_stats = args.rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002206 }
2207}
2208
Yue Chen3dd03e32017-10-17 15:39:52 -07002209static int tx_size_cost(const AV1_COMMON *const cm, const MACROBLOCK *const x,
Urvang Joshiab8840e2017-10-06 16:38:24 -07002210 BLOCK_SIZE bsize, TX_SIZE tx_size) {
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002211 const MACROBLOCKD *const xd = &x->e_mbd;
2212 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Urvang Joshifeb925f2016-12-05 10:37:29 -08002213
Rupert Swarbrickfcff0b22017-10-05 09:26:04 +01002214 if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) {
Urvang Joshifeb925f2016-12-05 10:37:29 -08002215 const int is_inter = is_inter_block(mbmi);
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08002216 const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, is_inter);
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002217 const int depth = tx_size_to_depth(tx_size, bsize, is_inter);
David Barker84dc6e92018-01-16 15:57:49 +00002218 const int tx_size_ctx = get_tx_size_context(xd, is_inter);
Yue Chenb23d00a2017-07-28 17:01:21 -07002219 int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
Urvang Joshifeb925f2016-12-05 10:37:29 -08002220 return r_tx_size;
2221 } else {
2222 return 0;
2223 }
2224}
2225
Hui Suddbcde22017-09-18 17:22:02 -07002226// TODO(angiebird): use this function whenever it's possible
Yue Chenb23d00a2017-07-28 17:01:21 -07002227int av1_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
2228 const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
2229 TX_SIZE tx_size, TX_TYPE tx_type) {
Angie Chiang05917872017-04-15 12:28:56 -07002230 if (plane > 0) return 0;
2231
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08002232 const TX_SIZE square_tx_size = txsize_sqr_map[tx_size];
Jingning Han243b66b2017-06-23 12:11:47 -07002233
Angie Chiang65201562017-04-10 15:23:28 -07002234 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2235 const int is_inter = is_inter_block(mbmi);
Angie Chiang65201562017-04-10 15:23:28 -07002236 if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
2237 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
2238 const int ext_tx_set =
2239 get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
2240 if (is_inter) {
2241 if (ext_tx_set > 0)
Rupert Swarbrickffbff572017-12-12 11:27:46 +00002242 return x->inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type];
Angie Chiang65201562017-04-10 15:23:28 -07002243 } else {
Urvang Joshi978152a2018-01-09 12:59:25 -08002244 if (ext_tx_set > 0) {
Yue Chen57b8ff62017-10-10 23:37:31 -07002245#if CONFIG_FILTER_INTRA
2246 PREDICTION_MODE intra_dir;
Yue Chenb0571872017-12-18 18:12:59 -08002247 if (mbmi->filter_intra_mode_info.use_filter_intra)
Yue Chen57b8ff62017-10-10 23:37:31 -07002248 intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
Yue Chenb0571872017-12-18 18:12:59 -08002249 .filter_intra_mode];
Yue Chen57b8ff62017-10-10 23:37:31 -07002250 else
2251 intra_dir = mbmi->mode;
Rupert Swarbrickffbff572017-12-12 11:27:46 +00002252 return x->intra_tx_type_costs[ext_tx_set][square_tx_size][intra_dir]
2253 [tx_type];
Yue Chen57b8ff62017-10-10 23:37:31 -07002254#else
Rupert Swarbrickffbff572017-12-12 11:27:46 +00002255 return x->intra_tx_type_costs[ext_tx_set][square_tx_size][mbmi->mode]
2256 [tx_type];
Yue Chen57b8ff62017-10-10 23:37:31 -07002257#endif
2258 }
Angie Chiang65201562017-04-10 15:23:28 -07002259 }
2260 }
Angie Chiang65201562017-04-10 15:23:28 -07002261 return 0;
2262}
Hui Su206d22f2018-01-23 14:05:49 -08002263
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002264static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2265 RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
Urvang Joshi9752a2e2017-10-02 17:32:27 -07002266 TX_TYPE tx_type, TX_SIZE tx_size) {
Urvang Joshi52648442016-10-13 17:27:51 -07002267 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002268 MACROBLOCKD *const xd = &x->e_mbd;
2269 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2270 int64_t rd = INT64_MAX;
Zoe Liu1eed2df2017-10-16 17:13:15 -07002271 const int skip_ctx = av1_get_skip_context(xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002272 int s0, s1;
2273 const int is_inter = is_inter_block(mbmi);
Jingning Hanbf9c6b72016-12-14 14:50:45 -08002274 const int tx_select =
Rupert Swarbrick8e5b39a2017-12-11 15:54:30 +00002275 cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type);
Urvang Joshifeb925f2016-12-05 10:37:29 -08002276
Yue Chen3dd03e32017-10-17 15:39:52 -07002277 const int r_tx_size = tx_size_cost(cm, x, bs, tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002278
Yaowu Xuc27fc142016-08-22 16:08:15 -07002279 assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
Yaowu Xuc27fc142016-08-22 16:08:15 -07002280
Zoe Liu1eed2df2017-10-16 17:13:15 -07002281 s0 = x->skip_cost[skip_ctx][0];
2282 s1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002283
2284 mbmi->tx_type = tx_type;
2285 mbmi->tx_size = tx_size;
Yue Chen95e13e22017-11-01 23:56:35 -07002286#if CONFIG_FILTER_INTRA
Yue Chen4a8ea372017-12-21 16:41:37 -08002287 if (skip_invalid_tx_size_for_filter_intra(mbmi, AOM_PLANE_Y, rd_stats)) {
Yue Chen95e13e22017-11-01 23:56:35 -07002288 return INT64_MAX;
Yue Chen18f6c152017-11-06 11:23:47 -08002289 }
Yue Chen95e13e22017-11-01 23:56:35 -07002290#endif
Luc Trudeau439ba632017-12-21 13:17:01 -05002291 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOM_PLANE_Y, bs, tx_size,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002292 cpi->sf.use_fast_coef_costing);
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002293 if (rd_stats->rate == INT_MAX) return INT64_MAX;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002294#if !CONFIG_TXK_SEL
Luc Trudeau439ba632017-12-21 13:17:01 -05002295 rd_stats->rate +=
2296 av1_tx_type_cost(cm, x, xd, bs, AOM_PLANE_Y, tx_size, tx_type);
Angie Chiang05917872017-04-15 12:28:56 -07002297#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002298
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002299 if (rd_stats->skip) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002300 if (is_inter) {
Urvang Joshi70006e42017-06-14 16:08:55 -07002301 rd = RDCOST(x->rdmult, s1, rd_stats->sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002302 } else {
Urvang Joshi70006e42017-06-14 16:08:55 -07002303 rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002304 }
2305 } else {
Urvang Joshi70006e42017-06-14 16:08:55 -07002306 rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select,
2307 rd_stats->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002308 }
2309
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002310 if (tx_select) rd_stats->rate += r_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002311
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002312 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
2313 !(rd_stats->skip))
Urvang Joshi70006e42017-06-14 16:08:55 -07002314 rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07002315
2316 return rd;
2317}
2318
Angie Chiang2d147c12017-04-05 11:23:59 -07002319static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002320 TX_TYPE tx_type, TX_SIZE tx_size, int prune) {
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002321 const MACROBLOCKD *const xd = &x->e_mbd;
2322 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Angie Chiang2d147c12017-04-05 11:23:59 -07002323 const int is_inter = is_inter_block(mbmi);
Angie Chiang2d147c12017-04-05 11:23:59 -07002324
Angie Chianga4fa1902017-04-05 15:26:09 -07002325 if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002326 if (!is_inter && x->use_default_intra_tx_type &&
Luc Trudeau2eb9b842017-12-13 11:19:16 -05002327 tx_type != get_default_tx_type(0, xd, tx_size))
Angie Chiang2d147c12017-04-05 11:23:59 -07002328 return 1;
2329 if (is_inter && x->use_default_inter_tx_type &&
Luc Trudeau2eb9b842017-12-13 11:19:16 -05002330 tx_type != get_default_tx_type(0, xd, tx_size))
Angie Chiang2d147c12017-04-05 11:23:59 -07002331 return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002332 const AV1_COMMON *const cm = &cpi->common;
Hui Suddbcde22017-09-18 17:22:02 -07002333 const TxSetType tx_set_type =
2334 get_ext_tx_set_type(tx_size, bs, is_inter, cm->reduced_tx_set_used);
2335 if (!av1_ext_tx_used[tx_set_type][tx_type]) return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002336 if (is_inter) {
Angie Chiang2d147c12017-04-05 11:23:59 -07002337 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002338 if (!do_tx_type_search(tx_type, prune, cpi->sf.tx_type_search.prune_mode))
2339 return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002340 }
Angie Chiang2d147c12017-04-05 11:23:59 -07002341 }
Angie Chiang2d147c12017-04-05 11:23:59 -07002342 return 0;
2343}
2344
Urvang Joshi52648442016-10-13 17:27:51 -07002345static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
2346 MACROBLOCK *x, int *r, int64_t *d, int *s,
2347 int64_t *sse, int64_t ref_best_rd) {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002348 RD_STATS rd_stats;
Jingning Han66965a22018-01-25 09:53:41 -08002349 x->rd_model = LOW_TXFM_RD;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002350 int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs, DCT_DCT,
2351 max_txsize_lookup[bs]);
Jingning Han66965a22018-01-25 09:53:41 -08002352 x->rd_model = FULL_TXFM_RD;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002353 *r = rd_stats.rate;
2354 *d = rd_stats.dist;
2355 *s = rd_stats.skip;
2356 *sse = rd_stats.sse;
2357 return rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002358}
Yaowu Xuc27fc142016-08-22 16:08:15 -07002359
Urvang Joshi52648442016-10-13 17:27:51 -07002360static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002361 RD_STATS *rd_stats, int64_t ref_best_rd,
Urvang Joshi52648442016-10-13 17:27:51 -07002362 BLOCK_SIZE bs) {
2363 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002364 MACROBLOCKD *const xd = &x->e_mbd;
2365 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2366 TX_TYPE tx_type, best_tx_type = DCT_DCT;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002367 int64_t this_rd, best_rd = INT64_MAX;
Zoe Liu1eed2df2017-10-16 17:13:15 -07002368 const int skip_ctx = av1_get_skip_context(xd);
2369 int s0 = x->skip_cost[skip_ctx][0];
2370 int s1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002371 const int is_inter = is_inter_block(mbmi);
2372 int prune = 0;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002373 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002374
2375 mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08002376 mbmi->min_tx_size = mbmi->tx_size;
Hui Suddbcde22017-09-18 17:22:02 -07002377 const TxSetType tx_set_type =
2378 get_ext_tx_set_type(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002379
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002380 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2381 !x->use_default_inter_tx_type) {
Alexander Bokov79a37242017-09-29 11:25:55 -07002382 prune = prune_tx(cpi, bs, x, xd, tx_set_type, 0);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002383 }
Yue Chen4a8ea372017-12-21 16:41:37 -08002384#if CONFIG_FILTER_INTRA
2385 if (skip_invalid_tx_size_for_filter_intra(mbmi, AOM_PLANE_Y, rd_stats)) {
2386 return;
2387 }
2388#endif
Sarah Parkere68a3e42017-02-16 14:03:24 -08002389 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used) >
2390 1 &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07002391 !xd->lossless[mbmi->segment_id]) {
Yushin Cho77bba8d2016-11-04 16:36:56 -07002392 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
Hui Suddbcde22017-09-18 17:22:02 -07002393 if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002394 RD_STATS this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002395 if (is_inter) {
2396 if (x->use_default_inter_tx_type &&
Luc Trudeau2eb9b842017-12-13 11:19:16 -05002397 tx_type != get_default_tx_type(0, xd, mbmi->tx_size))
Yaowu Xuc27fc142016-08-22 16:08:15 -07002398 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002399 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002400 if (!do_tx_type_search(tx_type, prune,
2401 cpi->sf.tx_type_search.prune_mode))
2402 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002403 }
2404 } else {
2405 if (x->use_default_intra_tx_type &&
Luc Trudeau2eb9b842017-12-13 11:19:16 -05002406 tx_type != get_default_tx_type(0, xd, mbmi->tx_size))
Yaowu Xuc27fc142016-08-22 16:08:15 -07002407 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002408 }
2409
2410 mbmi->tx_type = tx_type;
2411
Luc Trudeau9cea9932017-12-13 21:14:13 -05002412 txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, AOM_PLANE_Y, bs,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002413 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Yushin Chod0b77ac2017-10-20 17:33:16 -07002414
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002415 if (this_rd_stats.rate == INT_MAX) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002416
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08002417 if (this_rd_stats.skip) {
Urvang Joshi70006e42017-06-14 16:08:55 -07002418 this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08002419 } else {
2420 this_rd_stats.rate += av1_tx_type_cost(cm, x, xd, bs, AOM_PLANE_Y,
2421 mbmi->tx_size, tx_type);
Urvang Joshi70006e42017-06-14 16:08:55 -07002422 this_rd =
2423 RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08002424 }
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002425 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
2426 !this_rd_stats.skip)
Urvang Joshi70006e42017-06-14 16:08:55 -07002427 this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07002428
2429 if (this_rd < best_rd) {
2430 best_rd = this_rd;
2431 best_tx_type = mbmi->tx_type;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002432 *rd_stats = this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002433 }
2434 }
Guillaume Martres4e4d3a02016-08-21 19:02:33 -07002435 } else {
2436 mbmi->tx_type = DCT_DCT;
Luc Trudeau9cea9932017-12-13 21:14:13 -05002437 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOM_PLANE_Y, bs,
2438 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002439 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002440 mbmi->tx_type = best_tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002441}
2442
Urvang Joshi52648442016-10-13 17:27:51 -07002443static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002444 RD_STATS *rd_stats, int64_t ref_best_rd,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002445 BLOCK_SIZE bs) {
2446 MACROBLOCKD *const xd = &x->e_mbd;
2447 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2448
2449 mbmi->tx_size = TX_4X4;
2450 mbmi->tx_type = DCT_DCT;
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08002451 mbmi->min_tx_size = TX_4X4;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002452
Yue Chen4a8ea372017-12-21 16:41:37 -08002453#if CONFIG_FILTER_INTRA
2454 if (skip_invalid_tx_size_for_filter_intra(mbmi, AOM_PLANE_Y, rd_stats)) {
2455 return;
2456 }
2457#endif
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002458 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2459 cpi->sf.use_fast_coef_costing);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002460}
2461
Angie Chiangf1cb0752017-04-10 16:01:20 -07002462static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
2463 int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
2464 return num_blk;
2465}
Angie Chiangf1cb0752017-04-10 16:01:20 -07002466
Hui Sufbb75a92018-01-11 16:14:48 -08002467static int get_search_init_depth(int mi_width, int mi_height,
2468 const SPEED_FEATURES *sf) {
2469 if (sf->tx_size_search_method == USE_LARGESTALL) return MAX_VARTX_DEPTH;
2470 return (mi_height != mi_width) ? sf->tx_size_search_init_depth_rect
2471 : sf->tx_size_search_init_depth_sqr;
2472}
2473
Urvang Joshi52648442016-10-13 17:27:51 -07002474static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002475 MACROBLOCK *x, RD_STATS *rd_stats,
2476 int64_t ref_best_rd, BLOCK_SIZE bs) {
Angie Chianga4fa1902017-04-05 15:26:09 -07002477 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002478 MACROBLOCKD *const xd = &x->e_mbd;
2479 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002480 int64_t rd = INT64_MAX;
Angie Chianga4fa1902017-04-05 15:26:09 -07002481 int n;
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002482 int start_tx;
2483 int depth;
Angie Chianga4fa1902017-04-05 15:26:09 -07002484 int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002485 const int is_inter = is_inter_block(mbmi);
2486 const TX_SIZE max_rect_tx_size = get_max_rect_tx_size(bs, is_inter);
2487 TX_SIZE best_tx_size = max_rect_tx_size;
Angie Chianga4fa1902017-04-05 15:26:09 -07002488 TX_TYPE best_tx_type = DCT_DCT;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002489#if CONFIG_TXK_SEL
Angie Chiangf1cb0752017-04-10 16:01:20 -07002490 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002491#endif // CONFIG_TXK_SEL
Jingning Hanc5c37032018-01-04 16:43:43 -08002492 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
2493 const int n4 = bsize_to_num_blk(bs);
Angie Chianga4fa1902017-04-05 15:26:09 -07002494 const int tx_select = cm->tx_mode == TX_MODE_SELECT;
Angie Chianga4fa1902017-04-05 15:26:09 -07002495
2496 av1_invalid_rd_stats(rd_stats);
2497
Angie Chianga4fa1902017-04-05 15:26:09 -07002498 if (tx_select) {
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002499 start_tx = max_rect_tx_size;
Hui Sufbb75a92018-01-11 16:14:48 -08002500 depth = get_search_init_depth(mi_size_wide[bs], mi_size_high[bs], &cpi->sf);
Angie Chianga4fa1902017-04-05 15:26:09 -07002501 } else {
2502 const TX_SIZE chosen_tx_size =
2503 tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2504 start_tx = chosen_tx_size;
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002505 depth = MAX_TX_DEPTH;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002506 }
2507
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002508 int prune = 0;
2509 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2510 !x->use_default_inter_tx_type) {
Alexander Bokov79a37242017-09-29 11:25:55 -07002511 prune = prune_tx(cpi, bs, x, xd, EXT_TX_SET_ALL16, 0);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002512 }
2513
Angie Chianga4fa1902017-04-05 15:26:09 -07002514 last_rd = INT64_MAX;
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08002515 for (n = start_tx; depth <= MAX_TX_DEPTH;
2516 depth++, n = sub_tx_size_map[0][n]) {
Angie Chiangf1cb0752017-04-10 16:01:20 -07002517 TX_TYPE tx_start = DCT_DCT;
2518 TX_TYPE tx_end = TX_TYPES;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002519#if CONFIG_TXK_SEL
Angie Chiangf1cb0752017-04-10 16:01:20 -07002520 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
Hui Su4a5c6cf2018-01-24 17:32:01 -08002521 // performed in search_txk_type()
Angie Chiangf1cb0752017-04-10 16:01:20 -07002522 tx_end = DCT_DCT + 1;
2523#endif
Angie Chianga4fa1902017-04-05 15:26:09 -07002524 TX_TYPE tx_type;
Angie Chiangf1cb0752017-04-10 16:01:20 -07002525 for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
Angie Chianga4fa1902017-04-05 15:26:09 -07002526 RD_STATS this_rd_stats;
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002527 if (skip_txfm_search(cpi, x, bs, tx_type, n, prune)) continue;
Jingning Handd8600f2018-01-23 09:06:32 -08002528
Jingning Han66965a22018-01-25 09:53:41 -08002529 if (mbmi->ref_mv_idx > 0) x->rd_model = LOW_TXFM_RD;
Angie Chianga4fa1902017-04-05 15:26:09 -07002530 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, n);
Jingning Han66965a22018-01-25 09:53:41 -08002531 x->rd_model = FULL_TXFM_RD;
Yushin Chod0b77ac2017-10-20 17:33:16 -07002532
Angie Chianga4fa1902017-04-05 15:26:09 -07002533 // Early termination in transform size search.
2534 if (cpi->sf.tx_size_search_breakout &&
2535 (rd == INT64_MAX ||
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002536 (this_rd_stats.skip == 1 && tx_type != DCT_DCT && n != start_tx) ||
2537 (n != (int)start_tx && rd > last_rd))) {
Angie Chianga4fa1902017-04-05 15:26:09 -07002538 break;
Lester Lu432012f2017-08-17 14:39:29 -07002539 }
Angie Chianga4fa1902017-04-05 15:26:09 -07002540
2541 last_rd = rd;
Hui Suda816a12017-08-18 14:46:02 -07002542 ref_best_rd = AOMMIN(rd, ref_best_rd);
Angie Chianga4fa1902017-04-05 15:26:09 -07002543 if (rd < best_rd) {
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002544#if CONFIG_TXK_SEL
Angie Chiangbce07f12017-12-01 16:34:31 -08002545 memcpy(best_txk_type, mbmi->txk_type,
2546 sizeof(best_txk_type[0]) * MAX_SB_SQUARE /
2547 (TX_SIZE_W_MIN * TX_SIZE_H_MIN));
Angie Chiangf1cb0752017-04-10 16:01:20 -07002548#endif
Jingning Hanc5c37032018-01-04 16:43:43 -08002549 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
Angie Chianga4fa1902017-04-05 15:26:09 -07002550 best_tx_type = tx_type;
2551 best_tx_size = n;
2552 best_rd = rd;
2553 *rd_stats = this_rd_stats;
2554 }
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07002555#if !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
Angie Chianga4fa1902017-04-05 15:26:09 -07002556 const int is_inter = is_inter_block(mbmi);
2557 if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07002558#endif // !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
Cheng Chen3c222602018-01-22 19:34:18 -08002559
2560#if !CONFIG_TXK_SEL
2561 // stop searching other tx types if skip has better rdcost than transform
2562 // all tx blocks.
2563 if (cpi->sf.tx_type_search.skip_tx_search && !is_inter && rd_stats->skip)
2564 break;
2565#endif
Angie Chianga4fa1902017-04-05 15:26:09 -07002566 }
Jingning Han62177822018-01-11 08:02:06 -08002567 if (n == TX_4X4) break;
Angie Chianga4fa1902017-04-05 15:26:09 -07002568 }
2569 mbmi->tx_size = best_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002570 mbmi->tx_type = best_tx_type;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002571#if CONFIG_TXK_SEL
Angie Chiangbce07f12017-12-01 16:34:31 -08002572 memcpy(mbmi->txk_type, best_txk_type,
2573 sizeof(best_txk_type[0]) * MAX_SB_SQUARE /
2574 (TX_SIZE_W_MIN * TX_SIZE_H_MIN));
Angie Chiangf1cb0752017-04-10 16:01:20 -07002575#endif
Jingning Hanc5c37032018-01-04 16:43:43 -08002576 memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002577
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08002578 mbmi->min_tx_size = mbmi->tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002579}
2580
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002581static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2582 RD_STATS *rd_stats, BLOCK_SIZE bs,
2583 int64_t ref_best_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002584 MACROBLOCKD *xd = &x->e_mbd;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002585 av1_init_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002586
2587 assert(bs == xd->mi[0]->mbmi.sb_type);
2588
Yaowu Xu1e2aae12017-02-27 16:33:14 -08002589 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002590 choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002591 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002592 choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002593 } else {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002594 choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002595 }
2596}
2597
Hui Su4665f092018-01-17 18:10:06 -08002598// Return the rate cost for luma prediction mode info. of intra blocks.
2599static int intra_mode_info_cost_y(const AV1_COMP *cpi, const MACROBLOCK *x,
2600 const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
2601 int mode_cost) {
2602 int total_rate = mode_cost;
2603 const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0;
2604#if CONFIG_FILTER_INTRA
2605 const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra;
2606#endif // CONFIG_FILTER_INTRA
Hui Su4665f092018-01-17 18:10:06 -08002607// Can only activate one mode.
2608#if CONFIG_INTRABC
Hui Sub6d058d2018-01-18 14:12:36 -08002609 const int use_intrabc = mbmi->use_intrabc;
Hui Su4665f092018-01-17 18:10:06 -08002610 assert(((mbmi->mode != DC_PRED) + use_palette + use_intrabc +
2611 use_filter_intra) <= 1);
2612#else
2613 assert((mbmi->mode != DC_PRED) + use_palette + use_filter_intra <= 1);
2614#endif // CONFIG_INTRABC
2615 const int try_palette =
2616 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
2617 if (try_palette && mbmi->mode == DC_PRED) {
2618 const MACROBLOCKD *xd = &x->e_mbd;
2619 const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
2620 const int mode_ctx = av1_get_palette_mode_ctx(xd);
2621 total_rate += x->palette_y_mode_cost[bsize_ctx][mode_ctx][use_palette];
2622 if (use_palette) {
2623 const uint8_t *const color_map = xd->plane[0].color_index_map;
2624 int block_width, block_height, rows, cols;
2625 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
2626 &cols);
2627 const int plt_size = mbmi->palette_mode_info.palette_size[0];
2628 int palette_mode_cost =
2629 x->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
2630 write_uniform_cost(plt_size, color_map[0]);
2631 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
2632 const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
2633 palette_mode_cost +=
2634 av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache,
2635 n_cache, cpi->common.bit_depth);
2636 palette_mode_cost +=
2637 av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP);
2638 total_rate += palette_mode_cost;
2639 }
2640 }
2641#if CONFIG_FILTER_INTRA
2642 if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
2643 total_rate += x->filter_intra_cost[mbmi->tx_size][use_filter_intra];
2644 if (use_filter_intra) {
2645 total_rate += x->filter_intra_mode_cost[mbmi->filter_intra_mode_info
2646 .filter_intra_mode];
2647 }
2648 }
2649#endif // CONFIG_FILTER_INTRA
Hui Su4665f092018-01-17 18:10:06 -08002650 if (av1_is_directional_mode(mbmi->mode, bsize)) {
2651 if (av1_use_angle_delta(bsize)) {
2652#if CONFIG_EXT_INTRA_MOD
2653 total_rate += x->angle_delta_cost[mbmi->mode - V_PRED]
2654 [MAX_ANGLE_DELTA + mbmi->angle_delta[0]];
2655#else
2656 total_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
2657 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
2658#endif // CONFIG_EXT_INTRA_MOD
2659 }
2660 }
Hui Su4665f092018-01-17 18:10:06 -08002661#if CONFIG_INTRABC
2662 if (av1_allow_intrabc(&cpi->common))
2663 total_rate += x->intrabc_cost[use_intrabc];
2664#endif // CONFIG_INTRABC
2665 return total_rate;
2666}
2667
Hui Sub6d058d2018-01-18 14:12:36 -08002668// Return the rate cost for chroma prediction mode info. of intra blocks.
2669static int intra_mode_info_cost_uv(const AV1_COMP *cpi, const MACROBLOCK *x,
2670 const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize,
2671 int mode_cost) {
2672 int total_rate = mode_cost;
2673 const int use_palette = mbmi->palette_mode_info.palette_size[1] > 0;
2674 const UV_PREDICTION_MODE mode = mbmi->uv_mode;
2675// Can only activate one mode.
2676#if CONFIG_INTRABC
2677 assert(((mode != UV_DC_PRED) + use_palette + mbmi->use_intrabc) <= 1);
2678#else
2679 assert((mode != UV_DC_PRED) + use_palette <= 1);
2680#endif // CONFIG_INTRABC
2681 const int try_palette =
2682 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
2683 if (try_palette && mode == UV_DC_PRED) {
2684 const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info;
2685 total_rate +=
2686 x->palette_uv_mode_cost[pmi->palette_size[0] > 0][use_palette];
2687 if (use_palette) {
2688 const int bsize_ctx = av1_get_palette_bsize_ctx(bsize);
2689 const int plt_size = pmi->palette_size[1];
2690 const MACROBLOCKD *xd = &x->e_mbd;
2691 const uint8_t *const color_map = xd->plane[1].color_index_map;
2692 int palette_mode_cost =
2693 x->palette_uv_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
2694 write_uniform_cost(plt_size, color_map[0]);
2695 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
2696 const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
2697 palette_mode_cost += av1_palette_color_cost_uv(pmi, color_cache, n_cache,
2698 cpi->common.bit_depth);
2699 palette_mode_cost +=
2700 av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP);
2701 total_rate += palette_mode_cost;
2702 }
2703 }
2704 if (av1_is_directional_mode(get_uv_mode(mode), mbmi->sb_type)) {
2705 if (av1_use_angle_delta(bsize)) {
2706#if CONFIG_EXT_INTRA_MOD
2707 total_rate += x->angle_delta_cost[mode - V_PRED]
2708 [mbmi->angle_delta[1] + MAX_ANGLE_DELTA];
2709#else
2710 total_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
2711 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
2712#endif // CONFIG_EXT_INTRA_MOD
2713 }
2714 }
2715 return total_rate;
2716}
2717
Yaowu Xuc27fc142016-08-22 16:08:15 -07002718static int conditional_skipintra(PREDICTION_MODE mode,
2719 PREDICTION_MODE best_intra_mode) {
2720 if (mode == D117_PRED && best_intra_mode != V_PRED &&
2721 best_intra_mode != D135_PRED)
2722 return 1;
2723 if (mode == D63_PRED && best_intra_mode != V_PRED &&
2724 best_intra_mode != D45_PRED)
2725 return 1;
2726 if (mode == D207_PRED && best_intra_mode != H_PRED &&
2727 best_intra_mode != D45_PRED)
2728 return 1;
2729 if (mode == D153_PRED && best_intra_mode != H_PRED &&
2730 best_intra_mode != D135_PRED)
2731 return 1;
2732 return 0;
2733}
2734
hui su308a6392017-01-12 14:49:57 -08002735// Model based RD estimation for luma intra blocks.
2736static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
hui su9a416f52017-01-13 11:37:53 -08002737 BLOCK_SIZE bsize, int mode_cost) {
David Barker761b1ac2017-09-25 11:23:03 +01002738 const AV1_COMMON *cm = &cpi->common;
hui su308a6392017-01-12 14:49:57 -08002739 MACROBLOCKD *const xd = &x->e_mbd;
2740 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07002741 assert(!is_inter_block(mbmi));
hui su308a6392017-01-12 14:49:57 -08002742 RD_STATS this_rd_stats;
2743 int row, col;
2744 int64_t temp_sse, this_rd;
Yue Chen1a5ab9f2017-12-14 14:53:51 -08002745 TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 0);
2746#if CONFIG_FILTER_INTRA
Yue Chenb0571872017-12-18 18:12:59 -08002747 if (mbmi->filter_intra_mode_info.use_filter_intra) {
Yue Chen1a5ab9f2017-12-14 14:53:51 -08002748 tx_size = av1_max_tx_size_for_filter_intra(bsize, cm->tx_mode);
2749 if (!av1_filter_intra_allowed_txsize(tx_size)) return INT64_MAX;
2750 }
2751#endif
hui su308a6392017-01-12 14:49:57 -08002752 const int stepr = tx_size_high_unit[tx_size];
2753 const int stepc = tx_size_wide_unit[tx_size];
2754 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
2755 const int max_blocks_high = max_block_high(xd, bsize, 0);
2756 mbmi->tx_size = tx_size;
2757 // Prediction.
2758 for (row = 0; row < max_blocks_high; row += stepr) {
2759 for (col = 0; col < max_blocks_wide; col += stepc) {
Luc Trudeau2eb9b842017-12-13 11:19:16 -05002760 av1_predict_intra_block_facade(cm, xd, 0, col, row, tx_size);
hui su308a6392017-01-12 14:49:57 -08002761 }
2762 }
2763 // RD estimation.
2764 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
2765 &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
Joe Young830d4ce2017-05-30 17:48:13 -07002766 if (av1_is_directional_mode(mbmi->mode, bsize) &&
2767 av1_use_angle_delta(bsize)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07002768#if CONFIG_EXT_INTRA_MOD
2769 mode_cost += x->angle_delta_cost[mbmi->mode - V_PRED]
2770 [MAX_ANGLE_DELTA + mbmi->angle_delta[0]];
2771#else
hui su0a6731f2017-04-26 15:23:47 -07002772 mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
2773 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
Joe Young3ca43bf2017-10-06 15:12:46 -07002774#endif // CONFIG_EXT_INTRA_MOD
hui su9a416f52017-01-13 11:37:53 -08002775 }
hui su8f4cc0a2017-01-13 15:14:49 -08002776#if CONFIG_FILTER_INTRA
Yue Chen4eba69b2017-11-09 22:37:35 -08002777 if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
Yue Chenb0571872017-12-18 18:12:59 -08002778 if (mbmi->filter_intra_mode_info.use_filter_intra) {
2779 const int mode = mbmi->filter_intra_mode_info.filter_intra_mode;
Yue Chen4eba69b2017-11-09 22:37:35 -08002780 mode_cost += x->filter_intra_cost[mbmi->tx_size][1] +
Yue Chen994dba22017-12-19 15:27:26 -08002781 x->filter_intra_mode_cost[mode];
hui su8f4cc0a2017-01-13 15:14:49 -08002782 } else {
Yue Chen4eba69b2017-11-09 22:37:35 -08002783 mode_cost += x->filter_intra_cost[mbmi->tx_size][0];
hui su8f4cc0a2017-01-13 15:14:49 -08002784 }
2785 }
2786#endif // CONFIG_FILTER_INTRA
Urvang Joshi70006e42017-06-14 16:08:55 -07002787 this_rd =
2788 RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
hui su308a6392017-01-12 14:49:57 -08002789 return this_rd;
2790}
2791
Urvang Joshi56ba91b2017-01-10 13:22:09 -08002792// Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
2793// new_height'. Extra rows and columns are filled in by copying last valid
2794// row/column.
2795static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
2796 int orig_height, int new_width,
2797 int new_height) {
2798 int j;
2799 assert(new_width >= orig_width);
2800 assert(new_height >= orig_height);
2801 if (new_width == orig_width && new_height == orig_height) return;
2802
2803 for (j = orig_height - 1; j >= 0; --j) {
2804 memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
2805 // Copy last column to extra columns.
2806 memset(color_map + j * new_width + orig_width,
2807 color_map[j * new_width + orig_width - 1], new_width - orig_width);
2808 }
2809 // Copy last row to extra rows.
2810 for (j = orig_height; j < new_height; ++j) {
2811 memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
2812 new_width);
2813 }
2814}
2815
hui su33567b22017-04-30 16:40:19 -07002816// Bias toward using colors in the cache.
2817// TODO(huisu): Try other schemes to improve compression.
2818static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
Hui Su5891f982017-12-18 16:18:23 -08002819 int n_colors, int stride, int *centroids) {
hui su33567b22017-04-30 16:40:19 -07002820 if (n_cache <= 0) return;
2821 for (int i = 0; i < n_colors * stride; i += stride) {
Hui Su5891f982017-12-18 16:18:23 -08002822 int min_diff = abs(centroids[i] - (int)color_cache[0]);
hui su33567b22017-04-30 16:40:19 -07002823 int idx = 0;
2824 for (int j = 1; j < n_cache; ++j) {
Hui Su5891f982017-12-18 16:18:23 -08002825 const int this_diff = abs(centroids[i] - color_cache[j]);
hui su33567b22017-04-30 16:40:19 -07002826 if (this_diff < min_diff) {
2827 min_diff = this_diff;
2828 idx = j;
2829 }
2830 }
Hui Su5891f982017-12-18 16:18:23 -08002831 if (min_diff <= 1) centroids[i] = color_cache[idx];
hui su33567b22017-04-30 16:40:19 -07002832 }
2833}
hui su33567b22017-04-30 16:40:19 -07002834
Hui Su4d51bed2017-11-29 15:52:40 -08002835// Given the base colors as specified in centroids[], calculate the RD cost
2836// of palette mode.
Hui Sudb685552018-01-12 16:38:33 -08002837static void palette_rd_y(
2838 const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
Hui Su4665f092018-01-17 18:10:06 -08002839 BLOCK_SIZE bsize, int dc_mode_cost, const int *data, int *centroids, int n,
2840 uint16_t *color_cache, int n_cache, MB_MODE_INFO *best_mbmi,
2841 uint8_t *best_palette_color_map, int64_t *best_rd, int64_t *best_model_rd,
2842 int *rate, int *rate_tokenonly, int *rate_overhead, int64_t *distortion,
2843 int *skippable, PICK_MODE_CONTEXT *ctx, uint8_t *blk_skip) {
Hui Su4d51bed2017-11-29 15:52:40 -08002844 optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
Hui Su4d51bed2017-11-29 15:52:40 -08002845 int k = av1_remove_duplicates(centroids, n);
2846 if (k < PALETTE_MIN_SIZE) {
2847 // Too few unique colors to create a palette. And DC_PRED will work
2848 // well for that case anyway. So skip.
2849 return;
2850 }
2851 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
Hui Su4d51bed2017-11-29 15:52:40 -08002852 if (cpi->common.use_highbitdepth)
2853 for (int i = 0; i < k; ++i)
2854 pmi->palette_colors[i] =
2855 clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth);
2856 else
Hui Su4d51bed2017-11-29 15:52:40 -08002857 for (int i = 0; i < k; ++i)
Hui Su5891f982017-12-18 16:18:23 -08002858 pmi->palette_colors[i] = clip_pixel(centroids[i]);
Hui Su4d51bed2017-11-29 15:52:40 -08002859 pmi->palette_size[0] = k;
2860 MACROBLOCKD *const xd = &x->e_mbd;
2861 uint8_t *const color_map = xd->plane[0].color_index_map;
2862 int block_width, block_height, rows, cols;
2863 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
2864 &cols);
2865 av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
2866 extend_palette_color_map(color_map, cols, rows, block_width, block_height);
Hui Su4665f092018-01-17 18:10:06 -08002867 const int palette_mode_cost =
2868 intra_mode_info_cost_y(cpi, x, mbmi, bsize, dc_mode_cost);
Hui Su4d51bed2017-11-29 15:52:40 -08002869 int64_t this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
2870 if (*best_model_rd != INT64_MAX &&
2871 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
2872 return;
2873 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
2874 RD_STATS tokenonly_rd_stats;
2875 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
2876 if (tokenonly_rd_stats.rate == INT_MAX) return;
2877 int this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
2878 int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
2879 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
2880 tokenonly_rd_stats.rate -=
2881 tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
2882 }
2883 if (this_rd < *best_rd) {
2884 *best_rd = this_rd;
2885 memcpy(best_palette_color_map, color_map,
2886 block_width * block_height * sizeof(color_map[0]));
2887 *best_mbmi = *mbmi;
Jingning Hanc5c37032018-01-04 16:43:43 -08002888 memcpy(blk_skip, x->blk_skip[0], sizeof(uint8_t) * ctx->num_4x4_blk);
Hui Su4d51bed2017-11-29 15:52:40 -08002889 *rate_overhead = this_rate - tokenonly_rd_stats.rate;
2890 if (rate) *rate = this_rate;
2891 if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
2892 if (distortion) *distortion = tokenonly_rd_stats.dist;
2893 if (skippable) *skippable = tokenonly_rd_stats.skip;
2894 }
2895}
2896
Jingning Hanc5c37032018-01-04 16:43:43 -08002897static int rd_pick_palette_intra_sby(
Hui Sudb685552018-01-12 16:38:33 -08002898 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
Hui Su4665f092018-01-17 18:10:06 -08002899 int dc_mode_cost, MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map,
2900 int64_t *best_rd, int64_t *best_model_rd, int *rate, int *rate_tokenonly,
2901 int64_t *distortion, int *skippable, PICK_MODE_CONTEXT *ctx,
2902 uint8_t *best_blk_skip) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002903 int rate_overhead = 0;
2904 MACROBLOCKD *const xd = &x->e_mbd;
2905 MODE_INFO *const mic = xd->mi[0];
hui sude0c70a2017-01-09 17:12:17 -08002906 MB_MODE_INFO *const mbmi = &mic->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07002907 assert(!is_inter_block(mbmi));
Hui Su8b618f62017-12-20 12:03:35 -08002908 assert(av1_allow_palette(cpi->common.allow_screen_content_tools, bsize));
Hui Su4d51bed2017-11-29 15:52:40 -08002909 int colors, n;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002910 const int src_stride = x->plane[0].src.stride;
2911 const uint8_t *const src = x->plane[0].src.buf;
hui sude0c70a2017-01-09 17:12:17 -08002912 uint8_t *const color_map = xd->plane[0].color_index_map;
Urvang Joshi56ba91b2017-01-10 13:22:09 -08002913 int block_width, block_height, rows, cols;
2914 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
2915 &cols);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002916
Hui Su4d51bed2017-11-29 15:52:40 -08002917 int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
Yaowu Xuc27fc142016-08-22 16:08:15 -07002918 if (cpi->common.use_highbitdepth)
Yaowu Xuf883b422016-08-30 14:01:10 -07002919 colors = av1_count_colors_highbd(src, src_stride, rows, cols,
Hui Su4d51bed2017-11-29 15:52:40 -08002920 cpi->common.bit_depth, count_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002921 else
Hui Su4d51bed2017-11-29 15:52:40 -08002922 colors = av1_count_colors(src, src_stride, rows, cols, count_buf);
hui su5db97432016-10-14 16:10:14 -07002923#if CONFIG_FILTER_INTRA
Yue Chenb0571872017-12-18 18:12:59 -08002924 mbmi->filter_intra_mode_info.use_filter_intra = 0;
hui su5db97432016-10-14 16:10:14 -07002925#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07002926
2927 if (colors > 1 && colors <= 64) {
Hui Su4d51bed2017-11-29 15:52:40 -08002928 int r, c, i;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002929 const int max_itr = 50;
Hui Su5891f982017-12-18 16:18:23 -08002930 int *const data = x->palette_buffer->kmeans_data_buf;
2931 int centroids[PALETTE_MAX_SIZE];
2932 int lb, ub, val;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002933 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
2934 if (cpi->common.use_highbitdepth)
2935 lb = ub = src16[0];
2936 else
Yaowu Xuc27fc142016-08-22 16:08:15 -07002937 lb = ub = src[0];
2938
Yaowu Xuc27fc142016-08-22 16:08:15 -07002939 if (cpi->common.use_highbitdepth) {
2940 for (r = 0; r < rows; ++r) {
2941 for (c = 0; c < cols; ++c) {
2942 val = src16[r * src_stride + c];
2943 data[r * cols + c] = val;
2944 if (val < lb)
2945 lb = val;
2946 else if (val > ub)
2947 ub = val;
2948 }
2949 }
2950 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002951 for (r = 0; r < rows; ++r) {
2952 for (c = 0; c < cols; ++c) {
2953 val = src[r * src_stride + c];
2954 data[r * cols + c] = val;
2955 if (val < lb)
2956 lb = val;
2957 else if (val > ub)
2958 ub = val;
2959 }
2960 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002961 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002962
2963 mbmi->mode = DC_PRED;
hui su5db97432016-10-14 16:10:14 -07002964#if CONFIG_FILTER_INTRA
Yue Chenb0571872017-12-18 18:12:59 -08002965 mbmi->filter_intra_mode_info.use_filter_intra = 0;
hui su5db97432016-10-14 16:10:14 -07002966#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07002967
hui su33567b22017-04-30 16:40:19 -07002968 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
Hui Su3748bc22017-08-23 11:30:41 -07002969 const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
hui su33567b22017-04-30 16:40:19 -07002970
Hui Su4d51bed2017-11-29 15:52:40 -08002971 // Find the dominant colors, stored in top_colors[].
2972 int top_colors[PALETTE_MAX_SIZE] = { 0 };
2973 for (i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i) {
2974 int max_count = 0;
2975 for (int j = 0; j < (1 << cpi->common.bit_depth); ++j) {
2976 if (count_buf[j] > max_count) {
2977 max_count = count_buf[j];
2978 top_colors[i] = j;
2979 }
2980 }
2981 assert(max_count > 0);
2982 count_buf[top_colors[i]] = 0;
2983 }
2984
2985 // Try the dominant colors directly.
2986 // TODO(huisu@google.com): Try to avoid duplicate computation in cases
2987 // where the dominant colors and the k-means results are similar.
2988 for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
Hui Su5891f982017-12-18 16:18:23 -08002989 for (i = 0; i < n; ++i) centroids[i] = top_colors[i];
Hui Su4665f092018-01-17 18:10:06 -08002990 palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
2991 color_cache, n_cache, best_mbmi, best_palette_color_map,
2992 best_rd, best_model_rd, rate, rate_tokenonly, &rate_overhead,
2993 distortion, skippable, ctx, best_blk_skip);
Hui Su4d51bed2017-11-29 15:52:40 -08002994 }
2995
2996 // K-means clustering.
2997 for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
Urvang Joshi773e3542017-05-05 18:09:42 -07002998 if (colors == PALETTE_MIN_SIZE) {
2999 // Special case: These colors automatically become the centroids.
3000 assert(colors == n);
3001 assert(colors == 2);
3002 centroids[0] = lb;
3003 centroids[1] = ub;
Urvang Joshi773e3542017-05-05 18:09:42 -07003004 } else {
3005 for (i = 0; i < n; ++i) {
3006 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
3007 }
3008 av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
Urvang Joshi5fb95f72017-05-05 17:36:16 -07003009 }
Hui Su4665f092018-01-17 18:10:06 -08003010 palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
3011 color_cache, n_cache, best_mbmi, best_palette_color_map,
3012 best_rd, best_model_rd, rate, rate_tokenonly, &rate_overhead,
3013 distortion, skippable, ctx, best_blk_skip);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003014 }
3015 }
hui sude0c70a2017-01-09 17:12:17 -08003016
3017 if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
3018 memcpy(color_map, best_palette_color_map,
Luc Trudeau0401e892017-08-31 00:37:11 -04003019 block_width * block_height * sizeof(best_palette_color_map[0]));
hui sude0c70a2017-01-09 17:12:17 -08003020 }
3021 *mbmi = *best_mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003022 return rate_overhead;
3023}
3024
hui su5db97432016-10-14 16:10:14 -07003025#if CONFIG_FILTER_INTRA
3026// Return 1 if an filter intra mode is selected; return 0 otherwise.
3027static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3028 int *rate, int *rate_tokenonly,
3029 int64_t *distortion, int *skippable,
3030 BLOCK_SIZE bsize, int mode_cost,
Jingning Hanc5c37032018-01-04 16:43:43 -08003031 int64_t *best_rd, int64_t *best_model_rd,
3032 PICK_MODE_CONTEXT *ctx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003033 MACROBLOCKD *const xd = &x->e_mbd;
3034 MODE_INFO *const mic = xd->mi[0];
3035 MB_MODE_INFO *mbmi = &mic->mbmi;
hui su5db97432016-10-14 16:10:14 -07003036 int filter_intra_selected_flag = 0;
hui su5db97432016-10-14 16:10:14 -07003037 FILTER_INTRA_MODE mode;
Yue Chen18f6c152017-11-06 11:23:47 -08003038 TX_SIZE best_tx_size = TX_8X8;
hui su5db97432016-10-14 16:10:14 -07003039 FILTER_INTRA_MODE_INFO filter_intra_mode_info;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003040 TX_TYPE best_tx_type;
Jingning Han571e1cb2018-01-02 17:05:29 -08003041#if CONFIG_TXK_SEL
3042 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
3043#endif
Jingning Hanc5c37032018-01-04 16:43:43 -08003044 (void)ctx;
hui su5db97432016-10-14 16:10:14 -07003045 av1_zero(filter_intra_mode_info);
Yue Chenb0571872017-12-18 18:12:59 -08003046 mbmi->filter_intra_mode_info.use_filter_intra = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003047 mbmi->mode = DC_PRED;
3048 mbmi->palette_mode_info.palette_size[0] = 0;
3049
3050 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
hui su8f4cc0a2017-01-13 15:14:49 -08003051 int64_t this_rd, this_model_rd;
3052 RD_STATS tokenonly_rd_stats;
Yue Chenb0571872017-12-18 18:12:59 -08003053 mbmi->filter_intra_mode_info.filter_intra_mode = mode;
hui su8f4cc0a2017-01-13 15:14:49 -08003054 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3055 if (*best_model_rd != INT64_MAX &&
3056 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3057 continue;
3058 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003059 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
3060 if (tokenonly_rd_stats.rate == INT_MAX) continue;
Hui Su4665f092018-01-17 18:10:06 -08003061 const int this_rate =
3062 tokenonly_rd_stats.rate +
3063 intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
Urvang Joshi70006e42017-06-14 16:08:55 -07003064 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003065
3066 if (this_rd < *best_rd) {
3067 *best_rd = this_rd;
Zoe Liu083e8422018-01-17 10:41:12 -08003068 best_tx_size = mbmi->tx_size;
hui su5db97432016-10-14 16:10:14 -07003069 filter_intra_mode_info = mbmi->filter_intra_mode_info;
Zoe Liu083e8422018-01-17 10:41:12 -08003070 best_tx_type = mbmi->tx_type;
Jingning Han571e1cb2018-01-02 17:05:29 -08003071#if CONFIG_TXK_SEL
3072 memcpy(best_txk_type, mbmi->txk_type,
3073 sizeof(*best_txk_type) *
3074 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
3075#endif
Jingning Han60db3c92018-01-05 10:14:37 -08003076 memcpy(ctx->blk_skip[0], x->blk_skip[0],
3077 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003078 *rate = this_rate;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003079 *rate_tokenonly = tokenonly_rd_stats.rate;
3080 *distortion = tokenonly_rd_stats.dist;
3081 *skippable = tokenonly_rd_stats.skip;
hui su5db97432016-10-14 16:10:14 -07003082 filter_intra_selected_flag = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003083 }
3084 }
3085
hui su5db97432016-10-14 16:10:14 -07003086 if (filter_intra_selected_flag) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003087 mbmi->mode = DC_PRED;
3088 mbmi->tx_size = best_tx_size;
Yue Chenb0571872017-12-18 18:12:59 -08003089 mbmi->filter_intra_mode_info = filter_intra_mode_info;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003090 mbmi->tx_type = best_tx_type;
Jingning Han571e1cb2018-01-02 17:05:29 -08003091#if CONFIG_TXK_SEL
3092 memcpy(mbmi->txk_type, best_txk_type,
3093 sizeof(*best_txk_type) *
3094 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
3095#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003096 return 1;
3097 } else {
3098 return 0;
3099 }
3100}
hui su5db97432016-10-14 16:10:14 -07003101#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003102
hui su45dc5972016-12-08 17:42:50 -08003103// Run RD calculation with given luma intra prediction angle., and return
3104// the RD cost. Update the best mode info. if the RD cost is the best so far.
3105static int64_t calc_rd_given_intra_angle(
3106 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
3107 int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
3108 RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
Angie Chiang53bf1e92017-11-29 16:53:07 -08003109 TX_TYPE *best_tx_type, int64_t *best_rd, int64_t *best_model_rd,
Jingning Hanc5c37032018-01-04 16:43:43 -08003110 TX_TYPE *best_txk_type, uint8_t *best_blk_skip) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003111 int this_rate;
3112 RD_STATS tokenonly_rd_stats;
hui su9a416f52017-01-13 11:37:53 -08003113 int64_t this_rd, this_model_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003114 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
Jingning Hanc5c37032018-01-04 16:43:43 -08003115 const int n4 = bsize_to_num_blk(bsize);
Urvang Joshi330aec82017-05-08 15:37:42 -07003116 assert(!is_inter_block(mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07003117
Angie Chiang53bf1e92017-11-29 16:53:07 -08003118#if !CONFIG_TXK_SEL
3119 (void)best_txk_type;
3120#endif
3121
hui su45dc5972016-12-08 17:42:50 -08003122 mbmi->angle_delta[0] = angle_delta;
hui su9a416f52017-01-13 11:37:53 -08003123 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3124 if (*best_model_rd != INT64_MAX &&
3125 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3126 return INT64_MAX;
3127 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
hui su45dc5972016-12-08 17:42:50 -08003128 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
3129 if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
3130
3131 this_rate = tokenonly_rd_stats.rate + mode_cost +
Joe Young3ca43bf2017-10-06 15:12:46 -07003132#if CONFIG_EXT_INTRA_MOD
3133 x->angle_delta_cost[mbmi->mode - V_PRED]
3134 [max_angle_delta + mbmi->angle_delta[0]];
3135#else
hui su45dc5972016-12-08 17:42:50 -08003136 write_uniform_cost(2 * max_angle_delta + 1,
3137 mbmi->angle_delta[0] + max_angle_delta);
Joe Young3ca43bf2017-10-06 15:12:46 -07003138#endif // CONFIG_EXT_INTRA_MOD
Urvang Joshi70006e42017-06-14 16:08:55 -07003139 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003140
3141 if (this_rd < *best_rd) {
Angie Chiang53bf1e92017-11-29 16:53:07 -08003142#if CONFIG_TXK_SEL
3143 memcpy(best_txk_type, mbmi->txk_type,
3144 sizeof(*best_txk_type) *
3145 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
3146#endif
Jingning Hanc5c37032018-01-04 16:43:43 -08003147 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003148 *best_rd = this_rd;
3149 *best_angle_delta = mbmi->angle_delta[0];
3150 *best_tx_size = mbmi->tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003151 *best_tx_type = mbmi->tx_type;
3152 *rate = this_rate;
hui su45dc5972016-12-08 17:42:50 -08003153 rd_stats->rate = tokenonly_rd_stats.rate;
3154 rd_stats->dist = tokenonly_rd_stats.dist;
3155 rd_stats->skip = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003156 }
hui su45dc5972016-12-08 17:42:50 -08003157 return this_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003158}
3159
hui su45dc5972016-12-08 17:42:50 -08003160// With given luma directional intra prediction mode, pick the best angle delta
3161// Return the RD cost corresponding to the best angle delta.
Urvang Joshi52648442016-10-13 17:27:51 -07003162static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
hui su45dc5972016-12-08 17:42:50 -08003163 int *rate, RD_STATS *rd_stats,
3164 BLOCK_SIZE bsize, int mode_cost,
hui su9a416f52017-01-13 11:37:53 -08003165 int64_t best_rd,
3166 int64_t *best_model_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003167 MACROBLOCKD *const xd = &x->e_mbd;
3168 MODE_INFO *const mic = xd->mi[0];
3169 MB_MODE_INFO *mbmi = &mic->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07003170 assert(!is_inter_block(mbmi));
hui su45dc5972016-12-08 17:42:50 -08003171 int i, angle_delta, best_angle_delta = 0;
hui su45dc5972016-12-08 17:42:50 -08003172 int first_try = 1;
Debargha Mukherjeedf0e0d72017-04-27 15:16:53 -07003173 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
Zoe Liu083e8422018-01-17 10:41:12 -08003174 TX_SIZE best_tx_size = mbmi->tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003175 TX_TYPE best_tx_type = mbmi->tx_type;
Angie Chiang53bf1e92017-11-29 16:53:07 -08003176#if CONFIG_TXK_SEL
Jingning Hanc5c37032018-01-04 16:43:43 -08003177 const int n4 = bsize_to_num_blk(bsize);
Angie Chiang53bf1e92017-11-29 16:53:07 -08003178 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
3179#else
3180 TX_TYPE *best_txk_type = NULL;
3181#endif
Jingning Hanc5c37032018-01-04 16:43:43 -08003182 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003183
hui su0a6731f2017-04-26 15:23:47 -07003184 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003185
hui su0a6731f2017-04-26 15:23:47 -07003186 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
Hui Su259d4422017-10-13 10:08:17 -07003187 for (i = 0; i < 2; ++i) {
3188 best_rd_in = (best_rd == INT64_MAX)
3189 ? INT64_MAX
3190 : (best_rd + (best_rd >> (first_try ? 3 : 5)));
3191 this_rd = calc_rd_given_intra_angle(
3192 cpi, x, bsize, mode_cost, best_rd_in, (1 - 2 * i) * angle_delta,
3193 MAX_ANGLE_DELTA, rate, rd_stats, &best_angle_delta, &best_tx_size,
Jingning Hanc5c37032018-01-04 16:43:43 -08003194 &best_tx_type, &best_rd, best_model_rd, best_txk_type, best_blk_skip);
Hui Su259d4422017-10-13 10:08:17 -07003195 rd_cost[2 * angle_delta + i] = this_rd;
3196 if (first_try && this_rd == INT64_MAX) return best_rd;
3197 first_try = 0;
3198 if (angle_delta == 0) {
3199 rd_cost[1] = this_rd;
3200 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003201 }
3202 }
hui su45dc5972016-12-08 17:42:50 -08003203 }
3204
3205 assert(best_rd != INT64_MAX);
hui su0a6731f2017-04-26 15:23:47 -07003206 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
hui su45dc5972016-12-08 17:42:50 -08003207 int64_t rd_thresh;
Hui Su259d4422017-10-13 10:08:17 -07003208 for (i = 0; i < 2; ++i) {
3209 int skip_search = 0;
3210 rd_thresh = best_rd + (best_rd >> 5);
3211 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
3212 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
3213 skip_search = 1;
3214 if (!skip_search) {
Jingning Hanc5c37032018-01-04 16:43:43 -08003215 calc_rd_given_intra_angle(cpi, x, bsize, mode_cost, best_rd,
3216 (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA,
3217 rate, rd_stats, &best_angle_delta,
3218 &best_tx_size, &best_tx_type, &best_rd,
3219 best_model_rd, best_txk_type, best_blk_skip);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003220 }
3221 }
3222 }
3223
3224 mbmi->tx_size = best_tx_size;
3225 mbmi->angle_delta[0] = best_angle_delta;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003226 mbmi->tx_type = best_tx_type;
Angie Chiang53bf1e92017-11-29 16:53:07 -08003227#if CONFIG_TXK_SEL
3228 memcpy(mbmi->txk_type, best_txk_type,
3229 sizeof(*best_txk_type) *
3230 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
Jingning Hanc5c37032018-01-04 16:43:43 -08003231 memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
Angie Chiang53bf1e92017-11-29 16:53:07 -08003232#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003233 return best_rd;
3234}
3235
3236// Indices are sign, integer, and fractional part of the gradient value
3237static const uint8_t gradient_to_angle_bin[2][7][16] = {
3238 {
3239 { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
3240 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
3241 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3242 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3243 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3244 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3245 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3246 },
3247 {
3248 { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
3249 { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
3250 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3251 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3252 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3253 { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3254 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3255 },
3256};
3257
Yue Chen56e226e2017-05-02 16:21:40 -07003258/* clang-format off */
Yaowu Xuc27fc142016-08-22 16:08:15 -07003259static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
3260 0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
hui su9cc10652017-04-27 17:22:07 -07003261 0,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003262};
Yue Chen56e226e2017-05-02 16:21:40 -07003263/* clang-format on */
Yaowu Xuc27fc142016-08-22 16:08:15 -07003264
3265static void angle_estimation(const uint8_t *src, int src_stride, int rows,
hui su9cc10652017-04-27 17:22:07 -07003266 int cols, BLOCK_SIZE bsize,
3267 uint8_t *directional_mode_skip_mask) {
3268 memset(directional_mode_skip_mask, 0,
3269 INTRA_MODES * sizeof(*directional_mode_skip_mask));
Joe Young830d4ce2017-05-30 17:48:13 -07003270 // Check if angle_delta is used
3271 if (!av1_use_angle_delta(bsize)) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003272 uint64_t hist[DIRECTIONAL_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003273 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3274 src += src_stride;
hui su9cc10652017-04-27 17:22:07 -07003275 int r, c, dx, dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003276 for (r = 1; r < rows; ++r) {
3277 for (c = 1; c < cols; ++c) {
3278 dx = src[c] - src[c - 1];
3279 dy = src[c] - src[c - src_stride];
hui su9cc10652017-04-27 17:22:07 -07003280 int index;
3281 const int temp = dx * dx + dy * dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003282 if (dy == 0) {
3283 index = 2;
3284 } else {
hui su9cc10652017-04-27 17:22:07 -07003285 const int sn = (dx > 0) ^ (dy > 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003286 dx = abs(dx);
3287 dy = abs(dy);
hui su9cc10652017-04-27 17:22:07 -07003288 const int remd = (dx % dy) * 16 / dy;
3289 const int quot = dx / dy;
Yaowu Xuf883b422016-08-30 14:01:10 -07003290 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003291 }
3292 hist[index] += temp;
3293 }
3294 src += src_stride;
3295 }
3296
hui su9cc10652017-04-27 17:22:07 -07003297 int i;
3298 uint64_t hist_sum = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003299 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3300 for (i = 0; i < INTRA_MODES; ++i) {
hui su9cc10652017-04-27 17:22:07 -07003301 if (av1_is_directional_mode(i, bsize)) {
Urvang Joshida70e7b2016-10-19 11:48:54 -07003302 const uint8_t angle_bin = mode_to_angle_bin[i];
3303 uint64_t score = 2 * hist[angle_bin];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003304 int weight = 2;
Urvang Joshida70e7b2016-10-19 11:48:54 -07003305 if (angle_bin > 0) {
3306 score += hist[angle_bin - 1];
3307 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003308 }
Urvang Joshida70e7b2016-10-19 11:48:54 -07003309 if (angle_bin < DIRECTIONAL_MODES - 1) {
3310 score += hist[angle_bin + 1];
3311 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003312 }
3313 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3314 directional_mode_skip_mask[i] = 1;
3315 }
3316 }
3317}
3318
Yaowu Xuc27fc142016-08-22 16:08:15 -07003319static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
hui su9cc10652017-04-27 17:22:07 -07003320 int rows, int cols, BLOCK_SIZE bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003321 uint8_t *directional_mode_skip_mask) {
hui su9cc10652017-04-27 17:22:07 -07003322 memset(directional_mode_skip_mask, 0,
3323 INTRA_MODES * sizeof(*directional_mode_skip_mask));
Joe Young830d4ce2017-05-30 17:48:13 -07003324 // Check if angle_delta is used
3325 if (!av1_use_angle_delta(bsize)) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003326 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
hui su9cc10652017-04-27 17:22:07 -07003327 uint64_t hist[DIRECTIONAL_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003328 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3329 src += src_stride;
hui su9cc10652017-04-27 17:22:07 -07003330 int r, c, dx, dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003331 for (r = 1; r < rows; ++r) {
3332 for (c = 1; c < cols; ++c) {
3333 dx = src[c] - src[c - 1];
3334 dy = src[c] - src[c - src_stride];
hui su9cc10652017-04-27 17:22:07 -07003335 int index;
3336 const int temp = dx * dx + dy * dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003337 if (dy == 0) {
3338 index = 2;
3339 } else {
hui su9cc10652017-04-27 17:22:07 -07003340 const int sn = (dx > 0) ^ (dy > 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003341 dx = abs(dx);
3342 dy = abs(dy);
hui su9cc10652017-04-27 17:22:07 -07003343 const int remd = (dx % dy) * 16 / dy;
3344 const int quot = dx / dy;
Yaowu Xuf883b422016-08-30 14:01:10 -07003345 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003346 }
3347 hist[index] += temp;
3348 }
3349 src += src_stride;
3350 }
3351
hui su9cc10652017-04-27 17:22:07 -07003352 int i;
3353 uint64_t hist_sum = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003354 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3355 for (i = 0; i < INTRA_MODES; ++i) {
hui su9cc10652017-04-27 17:22:07 -07003356 if (av1_is_directional_mode(i, bsize)) {
Urvang Joshida70e7b2016-10-19 11:48:54 -07003357 const uint8_t angle_bin = mode_to_angle_bin[i];
3358 uint64_t score = 2 * hist[angle_bin];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003359 int weight = 2;
Urvang Joshida70e7b2016-10-19 11:48:54 -07003360 if (angle_bin > 0) {
3361 score += hist[angle_bin - 1];
3362 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003363 }
Urvang Joshida70e7b2016-10-19 11:48:54 -07003364 if (angle_bin < DIRECTIONAL_MODES - 1) {
3365 score += hist[angle_bin + 1];
3366 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003367 }
3368 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3369 directional_mode_skip_mask[i] = 1;
3370 }
3371 }
3372}
Yaowu Xuc27fc142016-08-22 16:08:15 -07003373
Hui Su00121ed2018-01-12 09:41:47 -08003374// Given selected prediction mode, search for the best tx type and size.
3375static void intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
3376 BLOCK_SIZE bsize, const int *bmode_costs,
Hui Su4665f092018-01-17 18:10:06 -08003377 int64_t *best_rd, int *rate, int *rate_tokenonly,
3378 int64_t *distortion, int *skippable,
3379 MB_MODE_INFO *best_mbmi, PICK_MODE_CONTEXT *ctx) {
Hui Su00121ed2018-01-12 09:41:47 -08003380 MACROBLOCKD *const xd = &x->e_mbd;
3381 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3382 RD_STATS rd_stats;
3383 super_block_yrd(cpi, x, &rd_stats, bsize, *best_rd);
3384 if (rd_stats.rate == INT_MAX) return;
3385 int this_rate_tokenonly = rd_stats.rate;
3386 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
3387 // super_block_yrd above includes the cost of the tx_size in the
3388 // tokenonly rate, but for intra blocks, tx_size is always coded
3389 // (prediction granularity), so we account for it in the full rate,
3390 // not the tokenonly rate.
3391 this_rate_tokenonly -= tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
3392 }
Hui Su4665f092018-01-17 18:10:06 -08003393 const int this_rate =
3394 rd_stats.rate +
3395 intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]);
Hui Su00121ed2018-01-12 09:41:47 -08003396 const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist);
3397 if (this_rd < *best_rd) {
3398 *best_mbmi = *mbmi;
3399 *best_rd = this_rd;
3400 *rate = this_rate;
3401 *rate_tokenonly = this_rate_tokenonly;
3402 *distortion = rd_stats.dist;
3403 *skippable = rd_stats.skip;
3404 memcpy(ctx->blk_skip[0], x->blk_skip[0],
3405 sizeof(uint8_t) * ctx->num_4x4_blk);
3406 }
3407}
3408
Yaowu Xuc27fc142016-08-22 16:08:15 -07003409// This function is used only for intra_only frames
Urvang Joshi52648442016-10-13 17:27:51 -07003410static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
3411 int *rate, int *rate_tokenonly,
3412 int64_t *distortion, int *skippable,
Jingning Hanc5c37032018-01-04 16:43:43 -08003413 BLOCK_SIZE bsize, int64_t best_rd,
3414 PICK_MODE_CONTEXT *ctx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003415 MACROBLOCKD *const xd = &x->e_mbd;
3416 MODE_INFO *const mic = xd->mi[0];
hui sude0c70a2017-01-09 17:12:17 -08003417 MB_MODE_INFO *const mbmi = &mic->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07003418 assert(!is_inter_block(mbmi));
hui su308a6392017-01-12 14:49:57 -08003419 int64_t best_model_rd = INT64_MAX;
Jingning Hanae5cfde2016-11-30 12:01:44 -08003420 const int rows = block_size_high[bsize];
3421 const int cols = block_size_wide[bsize];
hui sude0c70a2017-01-09 17:12:17 -08003422 int is_directional_mode;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003423 uint8_t directional_mode_skip_mask[INTRA_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003424 const int src_stride = x->plane[0].src.stride;
3425 const uint8_t *src = x->plane[0].src.buf;
hui su5db97432016-10-14 16:10:14 -07003426#if CONFIG_FILTER_INTRA
3427 int beat_best_rd = 0;
hui su5db97432016-10-14 16:10:14 -07003428#endif // CONFIG_FILTER_INTRA
Urvang Joshi52648442016-10-13 17:27:51 -07003429 const int *bmode_costs;
hui sude0c70a2017-01-09 17:12:17 -08003430 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
Hui Sue87fb232017-10-05 15:00:15 -07003431 const int try_palette =
3432 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
3433 uint8_t *best_palette_color_map =
3434 try_palette ? x->palette_buffer->best_palette_color_map : NULL;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003435 const MODE_INFO *above_mi = xd->above_mi;
3436 const MODE_INFO *left_mi = xd->left_mi;
Jingning Han9010e202017-12-14 14:48:09 -08003437 const PREDICTION_MODE A = av1_above_block_mode(above_mi);
3438 const PREDICTION_MODE L = av1_left_block_mode(left_mi);
Jingning Hana45d8422017-10-05 09:57:38 -07003439
3440#if CONFIG_KF_CTX
3441 const int above_ctx = intra_mode_context[A];
3442 const int left_ctx = intra_mode_context[L];
3443 bmode_costs = x->y_mode_costs[above_ctx][left_ctx];
3444#else
Yue Chenb23d00a2017-07-28 17:01:21 -07003445 bmode_costs = x->y_mode_costs[A][L];
Jingning Hana45d8422017-10-05 09:57:38 -07003446#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003447
hui sude0c70a2017-01-09 17:12:17 -08003448 mbmi->angle_delta[0] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003449 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
hui su9cc10652017-04-27 17:22:07 -07003450 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003451 directional_mode_skip_mask);
3452 else
hui su9cc10652017-04-27 17:22:07 -07003453 angle_estimation(src, src_stride, rows, cols, bsize,
3454 directional_mode_skip_mask);
hui su5db97432016-10-14 16:10:14 -07003455#if CONFIG_FILTER_INTRA
Yue Chenb0571872017-12-18 18:12:59 -08003456 mbmi->filter_intra_mode_info.use_filter_intra = 0;
hui su5db97432016-10-14 16:10:14 -07003457#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003458 pmi->palette_size[0] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003459
3460 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
3461 x->use_default_intra_tx_type = 1;
3462 else
3463 x->use_default_intra_tx_type = 0;
3464
Hui Su00121ed2018-01-12 09:41:47 -08003465 MB_MODE_INFO best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003466 /* Y Search for intra prediction mode */
Hui Su00121ed2018-01-12 09:41:47 -08003467 for (int mode_idx = DC_PRED; mode_idx < INTRA_MODES; ++mode_idx) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003468 RD_STATS this_rd_stats;
hui su308a6392017-01-12 14:49:57 -08003469 int this_rate, this_rate_tokenonly, s;
3470 int64_t this_distortion, this_rd, this_model_rd;
Hui Su00121ed2018-01-12 09:41:47 -08003471 mbmi->mode = intra_rd_search_mode_order[mode_idx];
hui su308a6392017-01-12 14:49:57 -08003472 mbmi->angle_delta[0] = 0;
hui su9a416f52017-01-13 11:37:53 -08003473 this_model_rd = intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode]);
hui su308a6392017-01-12 14:49:57 -08003474 if (best_model_rd != INT64_MAX &&
3475 this_model_rd > best_model_rd + (best_model_rd >> 1))
3476 continue;
3477 if (this_model_rd < best_model_rd) best_model_rd = this_model_rd;
hui sude0c70a2017-01-09 17:12:17 -08003478 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
3479 if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
Joe Young830d4ce2017-05-30 17:48:13 -07003480 if (is_directional_mode && av1_use_angle_delta(bsize)) {
hui su45dc5972016-12-08 17:42:50 -08003481 this_rd_stats.rate = INT_MAX;
Yue Chenb0f808b2017-04-26 11:55:14 -07003482 rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
3483 bmode_costs[mbmi->mode], best_rd, &best_model_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003484 } else {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003485 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003486 }
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003487 this_rate_tokenonly = this_rd_stats.rate;
3488 this_distortion = this_rd_stats.dist;
3489 s = this_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003490
3491 if (this_rate_tokenonly == INT_MAX) continue;
3492
Rupert Swarbrickfcff0b22017-10-05 09:26:04 +01003493 if (!xd->lossless[mbmi->segment_id] &&
3494 block_signals_txsize(mbmi->sb_type)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003495 // super_block_yrd above includes the cost of the tx_size in the
3496 // tokenonly rate, but for intra blocks, tx_size is always coded
3497 // (prediction granularity), so we account for it in the full rate,
3498 // not the tokenonly rate.
Yue Chen3dd03e32017-10-17 15:39:52 -07003499 this_rate_tokenonly -=
3500 tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003501 }
Hui Su4665f092018-01-17 18:10:06 -08003502 this_rate =
3503 this_rd_stats.rate +
3504 intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]);
Urvang Joshi70006e42017-06-14 16:08:55 -07003505 this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003506 if (this_rd < best_rd) {
hui sude0c70a2017-01-09 17:12:17 -08003507 best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003508 best_rd = this_rd;
hui su5db97432016-10-14 16:10:14 -07003509#if CONFIG_FILTER_INTRA
3510 beat_best_rd = 1;
3511#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003512 *rate = this_rate;
3513 *rate_tokenonly = this_rate_tokenonly;
3514 *distortion = this_distortion;
3515 *skippable = s;
Jingning Hanc5c37032018-01-04 16:43:43 -08003516 memcpy(ctx->blk_skip[0], x->blk_skip[0],
3517 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003518 }
3519 }
3520
hui su9bc1d8d2017-03-24 12:36:03 -07003521 if (try_palette) {
Hui Su4665f092018-01-17 18:10:06 -08003522 rd_pick_palette_intra_sby(cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi,
3523 best_palette_color_map, &best_rd, &best_model_rd,
3524 rate, rate_tokenonly, distortion, skippable, ctx,
3525 ctx->blk_skip[0]);
hui sude0c70a2017-01-09 17:12:17 -08003526 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003527
hui su5db97432016-10-14 16:10:14 -07003528#if CONFIG_FILTER_INTRA
Yue Chen250dd962017-12-18 17:32:32 -08003529 if (beat_best_rd && !xd->lossless[mbmi->segment_id]) {
hui su5db97432016-10-14 16:10:14 -07003530 if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
3531 skippable, bsize, bmode_costs[DC_PRED],
Jingning Hanc5c37032018-01-04 16:43:43 -08003532 &best_rd, &best_model_rd, ctx)) {
hui sude0c70a2017-01-09 17:12:17 -08003533 best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003534 }
3535 }
hui su5db97432016-10-14 16:10:14 -07003536#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003537
Hui Sucdef3b32018-01-17 12:36:31 -08003538 // If previous searches use only the default tx type, do an extra search for
3539 // the best tx type.
Hui Su00121ed2018-01-12 09:41:47 -08003540 if (x->use_default_intra_tx_type) {
3541 *mbmi = best_mbmi;
3542 x->use_default_intra_tx_type = 0;
Hui Su4665f092018-01-17 18:10:06 -08003543 intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, rate_tokenonly,
3544 distortion, skippable, &best_mbmi, ctx);
Hui Su00121ed2018-01-12 09:41:47 -08003545 }
3546
hui sude0c70a2017-01-09 17:12:17 -08003547 *mbmi = best_mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003548 return best_rd;
3549}
3550
Yue Chena1e48dc2016-08-29 17:29:33 -07003551// Return value 0: early termination triggered, no valid rd cost available;
3552// 1: rd cost values are valid.
Angie Chiang284d7772016-11-08 11:06:45 -08003553static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
3554 RD_STATS *rd_stats, BLOCK_SIZE bsize,
3555 int64_t ref_best_rd) {
Yue Chena1e48dc2016-08-29 17:29:33 -07003556 MACROBLOCKD *const xd = &x->e_mbd;
3557 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Luc Trudeau52301a22017-11-29 11:02:55 -05003558 struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_U];
Debargha Mukherjee80592c72017-12-16 08:23:34 -08003559 const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
Yue Chena1e48dc2016-08-29 17:29:33 -07003560 int plane;
Yue Chena1e48dc2016-08-29 17:29:33 -07003561 int is_cost_valid = 1;
Angie Chiang284d7772016-11-08 11:06:45 -08003562 av1_init_rd_stats(rd_stats);
Yue Chena1e48dc2016-08-29 17:29:33 -07003563
3564 if (ref_best_rd < 0) is_cost_valid = 0;
Jingning Han9ce464c2017-02-20 15:36:30 -08003565
Jingning Han9ce464c2017-02-20 15:36:30 -08003566 if (x->skip_chroma_rd) return is_cost_valid;
Jingning Han2d2dac22017-04-11 09:41:10 -07003567
Luc Trudeau52301a22017-11-29 11:02:55 -05003568 bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
Jingning Han9ce464c2017-02-20 15:36:30 -08003569
Yue Chena1e48dc2016-08-29 17:29:33 -07003570 if (is_inter_block(mbmi) && is_cost_valid) {
Yue Chena1e48dc2016-08-29 17:29:33 -07003571 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
3572 av1_subtract_plane(x, bsize, plane);
3573 }
Yue Chena1e48dc2016-08-29 17:29:33 -07003574
Yushin Cho09de28b2016-06-21 14:51:23 -07003575 if (is_cost_valid) {
3576 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08003577 RD_STATS pn_rd_stats;
3578 txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize,
3579 uv_tx_size, cpi->sf.use_fast_coef_costing);
3580 if (pn_rd_stats.rate == INT_MAX) {
Yushin Cho09de28b2016-06-21 14:51:23 -07003581 is_cost_valid = 0;
3582 break;
3583 }
Angie Chiang284d7772016-11-08 11:06:45 -08003584 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Urvang Joshi70006e42017-06-14 16:08:55 -07003585 if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd &&
3586 RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) {
Yushin Cho09de28b2016-06-21 14:51:23 -07003587 is_cost_valid = 0;
3588 break;
3589 }
Yue Chena1e48dc2016-08-29 17:29:33 -07003590 }
3591 }
3592
3593 if (!is_cost_valid) {
3594 // reset cost value
Angie Chiang284d7772016-11-08 11:06:45 -08003595 av1_invalid_rd_stats(rd_stats);
Yue Chena1e48dc2016-08-29 17:29:33 -07003596 }
3597
3598 return is_cost_valid;
3599}
3600
Yaowu Xuf883b422016-08-30 14:01:10 -07003601void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
3602 int blk_row, int blk_col, int plane, int block,
Angie Chiang77368af2017-03-23 16:22:07 -07003603 int plane_bsize, const ENTROPY_CONTEXT *a,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003604 const ENTROPY_CONTEXT *l, RD_STATS *rd_stats, int fast,
3605 TX_SIZE_RD_INFO *rd_info_array) {
Angie Chiang22ba7512016-10-20 17:10:33 -07003606 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003607 MACROBLOCKD *xd = &x->e_mbd;
3608 const struct macroblock_plane *const p = &x->plane[plane];
3609 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han243b66b2017-06-23 12:11:47 -07003610#if CONFIG_TXK_SEL
Hui Su4a5c6cf2018-01-24 17:32:01 -08003611 search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
3612 a, l, 0, rd_stats);
Jingning Han243b66b2017-06-23 12:11:47 -07003613 return;
3614#endif
Debargha Mukherjeed2cfbef2017-12-03 16:15:27 -08003615 // This function is used only for inter
3616 assert(is_inter_block(&xd->mi[0]->mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07003617 int64_t tmp;
3618 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Luc Trudeau005feb62017-02-22 13:34:01 -05003619 PLANE_TYPE plane_type = get_plane_type(plane);
Luc Trudeau2eb9b842017-12-13 11:19:16 -05003620 TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size);
Urvang Joshi03f6fdc2016-10-14 15:53:39 -07003621 const SCAN_ORDER *const scan_order =
Angie Chiangbd99b382017-06-20 15:11:16 -07003622 get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003623 BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
Jingning Han9fdc4222016-10-27 21:32:19 -07003624 int bh = block_size_high[txm_bsize];
3625 int bw = block_size_wide[txm_bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003626 int src_stride = p->src.stride;
Jingning Han9ca05b72017-01-03 14:41:36 -08003627 uint8_t *src =
3628 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
3629 uint8_t *dst =
3630 &pd->dst
3631 .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003632 DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
3633 uint8_t *rec_buffer;
Timothy B. Terriberryd62e2a32017-06-10 16:04:21 -07003634 const int diff_stride = block_size_wide[plane_bsize];
Jingning Han9ca05b72017-01-03 14:41:36 -08003635 const int16_t *diff =
3636 &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
Angie Chiangd81fdb42016-11-03 12:20:58 -07003637 int txb_coeff_cost;
Cheng Chen82775f62018-01-18 12:09:54 -08003638 int rate_cost = 0;
Jingning Hand3fada82016-11-22 10:46:55 -08003639
3640 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003641
Jingning Han45027c62017-12-11 11:47:15 -08003642#if CONFIG_LV_MAP
3643 TXB_CTX txb_ctx;
3644 get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
3645 uint16_t cur_joint_ctx = (txb_ctx.dc_sign_ctx << 8) + txb_ctx.txb_skip_ctx;
3646#else
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003647 const int coeff_ctx = get_entropy_context(tx_size, a, l);
3648 const int coeff_ctx_one_byte = combine_entropy_contexts(*a, *l);
3649 const uint8_t cur_joint_ctx = (coeff_ctx << 2) + coeff_ctx_one_byte;
Jingning Han45027c62017-12-11 11:47:15 -08003650#endif
Angie Chiang77368af2017-03-23 16:22:07 -07003651
Monty Montgomery4a05a582017-11-01 21:21:07 -04003652 // Note: tmp below is pixel distortion, not TX domain
Jingning Han1a7f0a82017-07-27 09:48:05 -07003653 tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
3654 plane_bsize, txm_bsize);
3655
Jingning Han1a7f0a82017-07-27 09:48:05 -07003656 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3657 tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003658
Jingning Han1a7f0a82017-07-27 09:48:05 -07003659 rd_stats->sse += tmp << 4;
3660
3661 if (rd_stats->invalid_rate) {
3662 rd_stats->dist += tmp << 4;
3663 rd_stats->rate += rd_stats->zero_rate;
3664 rd_stats->skip = 1;
3665 return;
3666 }
3667
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003668 // Look up RD and terminate early in case when we've already processed exactly
3669 // the same residual with exactly the same entropy context.
3670 if (rd_info_array != NULL && rd_info_array[tx_type].valid &&
3671 rd_info_array[tx_type].entropy_context == cur_joint_ctx &&
3672 rd_info_array[tx_type].fast == fast) {
3673 rd_stats->dist += rd_info_array[tx_type].dist;
3674 rd_stats->rate += rd_info_array[tx_type].rate;
Hui Su8c2b9132017-12-09 10:40:15 -08003675 rd_stats->skip &= rd_info_array[tx_type].eob == 0;
3676 p->eobs[block] = rd_info_array[tx_type].eob;
Jingning Hand7e99112017-12-13 09:47:45 -08003677#if CONFIG_LV_MAP
3678 p->txb_entropy_ctx[block] = rd_info_array[tx_type].txb_entropy_ctx;
3679#endif
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003680 return;
3681 }
3682
3683 int64_t cur_dist = 0;
3684 int cur_rate = 0;
3685 uint8_t cur_skip = 1;
3686
Yaowu Xud3e7c682017-12-21 14:08:25 -08003687 // TODO(any): Use av1_dist_block to compute distortion
Yaowu Xuc27fc142016-08-22 16:08:15 -07003688 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3689 rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
Yaowu Xuf883b422016-08-30 14:01:10 -07003690 aom_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL,
Jingning Han9fdc4222016-10-27 21:32:19 -07003691 0, NULL, 0, bw, bh, xd->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003692 } else {
3693 rec_buffer = (uint8_t *)rec_buffer16;
Yaowu Xuf883b422016-08-30 14:01:10 -07003694 aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0,
Jingning Han9fdc4222016-10-27 21:32:19 -07003695 NULL, 0, bw, bh);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003696 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003697
Debargha Mukherjee6cf2b462018-01-12 15:53:50 -08003698 if (cpi->sf.optimize_coefficients != FULL_TRELLIS_OPT) {
3699 av1_xform_quant(
3700 cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
3701 USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP);
Jingning Han3bce7542017-07-25 10:53:57 -07003702
Cheng Chen82775f62018-01-18 12:09:54 -08003703 rate_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
3704 scan_order, a, l, 0);
Sarah Parker792c2ec2017-12-21 16:08:22 -08003705 } else {
3706 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
3707 AV1_XFORM_QUANT_FP);
Jingning Han3bce7542017-07-25 10:53:57 -07003708
Sebastien Alaiwan58596362018-01-26 10:11:35 +01003709 // TX-domain results need to shift down to Q2/D10 to match pixel
3710 // domain distortion values which are in Q2^2
Sarah Parker792c2ec2017-12-21 16:08:22 -08003711 const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
Sarah Parker792c2ec2017-12-21 16:08:22 -08003712 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
3713 const int buffer_length = av1_get_max_eob(tx_size);
3714 int64_t tmp_dist, tmp_sse;
Yunqing Wang24d2d5d2017-09-20 09:45:13 -07003715#if CONFIG_DIST_8X8
Sarah Parker792c2ec2017-12-21 16:08:22 -08003716 int blk_w = block_size_wide[plane_bsize];
3717 int blk_h = block_size_high[plane_bsize];
3718 int disable_early_skip =
3719 x->using_dist_8x8 && plane == 0 && blk_w >= 8 && blk_h >= 8 &&
3720 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
3721 x->tune_metric != AOM_TUNE_PSNR;
Yunqing Wang24d2d5d2017-09-20 09:45:13 -07003722#endif // CONFIG_DIST_8X8
3723
Sarah Parker792c2ec2017-12-21 16:08:22 -08003724 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3725 tmp_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp_sse,
3726 xd->bd);
3727 else
Sarah Parker792c2ec2017-12-21 16:08:22 -08003728 tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp_sse);
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07003729
Sarah Parker792c2ec2017-12-21 16:08:22 -08003730 tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
Jingning Han3bce7542017-07-25 10:53:57 -07003731
Sarah Parker792c2ec2017-12-21 16:08:22 -08003732 if (
Yushin Choa4817a62017-07-27 13:09:43 -07003733#if CONFIG_DIST_8X8
Sarah Parker792c2ec2017-12-21 16:08:22 -08003734 disable_early_skip ||
Yushin Choa4817a62017-07-27 13:09:43 -07003735#endif
Sarah Parker792c2ec2017-12-21 16:08:22 -08003736 RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
Michelle Findlay-Olynykfbab0622017-12-13 14:10:56 -08003737 av1_optimize_b(cpi, x, plane, blk_row, blk_col, block, plane_bsize,
Cheng Chen82775f62018-01-18 12:09:54 -08003738 tx_size, a, l, fast, &rate_cost);
3739
3740 const int eob = x->plane[plane].eobs[block];
3741 if (eob) {
3742#if CONFIG_TXK_SEL
3743 rate_cost += av1_tx_type_cost(cm, x, xd, xd->mi[0]->mbmi.sb_type, plane,
3744 tx_size, tx_type);
3745#endif
3746 } else {
3747 rate_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block,
3748 tx_size, scan_order, a, l, 0);
3749 }
Sarah Parker792c2ec2017-12-21 16:08:22 -08003750 } else {
3751 rd_stats->rate += rd_stats->zero_rate;
3752 rd_stats->dist += tmp << 4;
3753 rd_stats->skip = 1;
3754 rd_stats->invalid_rate = 1;
3755 return;
3756 }
Jingning Han3bce7542017-07-25 10:53:57 -07003757 }
Jingning Han3bce7542017-07-25 10:53:57 -07003758
Angie Chiang41fffae2017-04-03 10:33:18 -07003759 const int eob = p->eobs[block];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003760
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01003761 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, rec_buffer,
3762 MAX_TX_SIZE, eob, cm->reduced_tx_set_used);
Angie Chiang41fffae2017-04-03 10:33:18 -07003763 if (eob > 0) {
Yushin Chob7b60c52017-07-14 16:18:52 -07003764#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07003765 if (x->using_dist_8x8 && plane == 0 && (bw < 8 && bh < 8)) {
Yushin Cho04749122017-05-25 14:19:07 -07003766 // Save sub8x8 luma decoded pixels
3767 // since 8x8 luma decoded pixels are not available for daala-dist
3768 // after recursive split of BLOCK_8x8 is done.
3769 const int pred_stride = block_size_wide[plane_bsize];
3770 const int pred_idx = (blk_row * pred_stride + blk_col)
3771 << tx_size_wide_log2[0];
3772 int16_t *decoded = &pd->pred[pred_idx];
3773 int i, j;
3774
Yushin Cho8ab875d2017-06-23 14:47:21 -07003775 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3776 for (j = 0; j < bh; j++)
3777 for (i = 0; i < bw; i++)
3778 decoded[j * pred_stride + i] =
3779 CONVERT_TO_SHORTPTR(rec_buffer)[j * MAX_TX_SIZE + i];
3780 } else {
Yushin Cho8ab875d2017-06-23 14:47:21 -07003781 for (j = 0; j < bh; j++)
3782 for (i = 0; i < bw; i++)
3783 decoded[j * pred_stride + i] = rec_buffer[j * MAX_TX_SIZE + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -07003784 }
Yushin Cho04749122017-05-25 14:19:07 -07003785 }
Yushin Chob7b60c52017-07-14 16:18:52 -07003786#endif // CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07003787 tmp = pixel_dist(cpi, x, plane, src, src_stride, rec_buffer, MAX_TX_SIZE,
3788 blk_row, blk_col, plane_bsize, txm_bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003789 }
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003790 cur_dist = tmp * 16;
Cheng Chen82775f62018-01-18 12:09:54 -08003791 txb_coeff_cost = rate_cost;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003792 cur_rate = txb_coeff_cost;
3793 cur_skip = (eob == 0);
3794
3795 // Save RD results for possible reuse in future.
3796 if (rd_info_array != NULL) {
3797 rd_info_array[tx_type].valid = 1;
3798 rd_info_array[tx_type].entropy_context = cur_joint_ctx;
3799 rd_info_array[tx_type].fast = fast;
3800 rd_info_array[tx_type].dist = cur_dist;
3801 rd_info_array[tx_type].rate = cur_rate;
Hui Su8c2b9132017-12-09 10:40:15 -08003802 rd_info_array[tx_type].eob = eob;
Jingning Hand7e99112017-12-13 09:47:45 -08003803#if CONFIG_LV_MAP
3804 rd_info_array[tx_type].txb_entropy_ctx = p->txb_entropy_ctx[block];
3805#endif
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003806 }
3807
3808 rd_stats->dist += cur_dist;
3809 rd_stats->rate += cur_rate;
3810 rd_stats->skip &= cur_skip;
Jingning Han63cbf342016-11-09 15:37:48 -08003811
Angie Chiangd81fdb42016-11-03 12:20:58 -07003812#if CONFIG_RD_DEBUG
Angie Chiange94556b2016-11-09 10:59:30 -08003813 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
3814 txb_coeff_cost);
Fergus Simpson4063a682017-02-28 16:52:22 -08003815#endif // CONFIG_RD_DEBUG
Yaowu Xuc27fc142016-08-22 16:08:15 -07003816}
3817
Yaowu Xuf883b422016-08-30 14:01:10 -07003818static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
Debargha Mukherjee9245d892017-10-06 13:54:31 -07003819 int blk_col, int plane, int block, TX_SIZE tx_size,
3820 int depth, BLOCK_SIZE plane_bsize,
Jingning Han94d5bfc2016-10-21 10:14:36 -07003821 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
3822 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
Angie Chiangb5dda482016-11-02 16:19:58 -07003823 RD_STATS *rd_stats, int64_t ref_best_rd,
Alexander Bokov79a37242017-09-29 11:25:55 -07003824 int *is_cost_valid, int fast,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003825 int tx_split_prune_flag,
3826 TX_SIZE_RD_INFO_NODE *rd_info_node) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003827 MACROBLOCKD *const xd = &x->e_mbd;
3828 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3829 struct macroblock_plane *const p = &x->plane[plane];
3830 struct macroblockd_plane *const pd = &xd->plane[plane];
3831 const int tx_row = blk_row >> (1 - pd->subsampling_y);
3832 const int tx_col = blk_col >> (1 - pd->subsampling_x);
clang-format67948d32016-09-07 22:40:40 -07003833 TX_SIZE(*const inter_tx_size)
Yaowu Xuc27fc142016-08-22 16:08:15 -07003834 [MAX_MIB_SIZE] =
3835 (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
Jingning Hanf65b8702016-10-31 12:13:20 -07003836 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
3837 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
Jingning Han58224042016-10-27 16:35:32 -07003838 const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003839 int64_t this_rd = INT64_MAX;
3840 ENTROPY_CONTEXT *pta = ta + blk_col;
3841 ENTROPY_CONTEXT *ptl = tl + blk_row;
Jingning Han331662e2017-05-30 17:03:32 -07003842 int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
3843 mbmi->sb_type, tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003844 int64_t sum_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003845 int tmp_eob = 0;
3846 int zero_blk_rate;
Angie Chiangd7246172016-11-03 11:49:15 -07003847 RD_STATS sum_rd_stats;
Jingning Hane3b81bc2017-06-23 11:43:52 -07003848#if CONFIG_TXK_SEL
3849 TX_TYPE best_tx_type = TX_TYPES;
Angie Chiangbce07f12017-12-01 16:34:31 -08003850 int txk_idx = (blk_row << MAX_MIB_SIZE_LOG2) + blk_col;
Jingning Hane3b81bc2017-06-23 11:43:52 -07003851#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003852
Jingning Han63cbf342016-11-09 15:37:48 -08003853 av1_init_rd_stats(&sum_rd_stats);
Jingning Hanfe45b212016-11-22 10:30:23 -08003854
Jingning Hand3fada82016-11-22 10:46:55 -08003855 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003856
3857 if (ref_best_rd < 0) {
3858 *is_cost_valid = 0;
3859 return;
3860 }
3861
Angie Chiangc0feea82016-11-03 15:36:18 -07003862 av1_init_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003863
3864 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
3865
Jingning Han5a995d72017-07-02 15:20:54 -07003866#if CONFIG_LV_MAP
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08003867 TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
Jingning Han5a995d72017-07-02 15:20:54 -07003868 TXB_CTX txb_ctx;
3869 get_txb_ctx(plane_bsize, tx_size, plane, pta, ptl, &txb_ctx);
Jingning Hane9814912017-08-31 16:38:59 -07003870
Jingning Hane9814912017-08-31 16:38:59 -07003871 zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(plane)]
3872 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
3873#else
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08003874 TX_SIZE tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Jingning Han5a995d72017-07-02 15:20:54 -07003875 int coeff_ctx = get_entropy_context(tx_size, pta, ptl);
hui suc0cf71d2017-07-20 16:38:50 -07003876 zero_blk_rate =
3877 x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
Jingning Han5a995d72017-07-02 15:20:54 -07003878#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003879
Jingning Han3bce7542017-07-25 10:53:57 -07003880 rd_stats->ref_rdcost = ref_best_rd;
3881 rd_stats->zero_rate = zero_blk_rate;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003882 if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
3883 inter_tx_size[0][0] = tx_size;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003884 av1_tx_block_rd_b(
3885 cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, pta, ptl,
3886 rd_stats, fast,
3887 rd_info_node != NULL ? rd_info_node->rd_info_array : NULL);
Sarah Parkerde6f0722017-08-07 20:23:46 -07003888 if (rd_stats->rate == INT_MAX) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003889
Urvang Joshi70006e42017-06-14 16:08:55 -07003890 if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
3891 RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
Angie Chiangb5dda482016-11-02 16:19:58 -07003892 rd_stats->skip == 1) &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07003893 !xd->lossless[mbmi->segment_id]) {
Jingning Hanc7ea7612017-01-11 15:01:30 -08003894#if CONFIG_RD_DEBUG
3895 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
3896 zero_blk_rate - rd_stats->rate);
Fergus Simpson4063a682017-02-28 16:52:22 -08003897#endif // CONFIG_RD_DEBUG
Angie Chiangb5dda482016-11-02 16:19:58 -07003898 rd_stats->rate = zero_blk_rate;
3899 rd_stats->dist = rd_stats->sse;
3900 rd_stats->skip = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003901 x->blk_skip[plane][blk_row * bw + blk_col] = 1;
3902 p->eobs[block] = 0;
Jingning Han19b5c8f2017-07-06 15:10:12 -07003903#if CONFIG_TXK_SEL
3904 mbmi->txk_type[txk_idx] = DCT_DCT;
3905#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003906 } else {
3907 x->blk_skip[plane][blk_row * bw + blk_col] = 0;
Angie Chiangb5dda482016-11-02 16:19:58 -07003908 rd_stats->skip = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003909 }
3910
Jingning Han571189c2016-10-24 10:38:43 -07003911 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
Yue Chen171c17d2017-10-16 18:08:22 -07003912 rd_stats->rate += x->txfm_partition_cost[ctx][0];
Urvang Joshi70006e42017-06-14 16:08:55 -07003913 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
Jingning Han2f42d772017-07-05 16:28:18 -07003914#if CONFIG_LV_MAP
3915 tmp_eob = p->txb_entropy_ctx[block];
3916#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07003917 tmp_eob = p->eobs[block];
Jingning Han2f42d772017-07-05 16:28:18 -07003918#endif
3919
Jingning Hane3b81bc2017-06-23 11:43:52 -07003920#if CONFIG_TXK_SEL
3921 best_tx_type = mbmi->txk_type[txk_idx];
3922#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003923 }
3924
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01003925 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && tx_split_prune_flag == 0) {
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08003926 const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
Yue Chen0797a202017-10-27 17:24:56 -07003927 const int bsw = tx_size_wide_unit[sub_txs];
3928 const int bsh = tx_size_high_unit[sub_txs];
3929 int sub_step = bsw * bsh;
Angie Chiangb5dda482016-11-02 16:19:58 -07003930 RD_STATS this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003931 int this_cost_valid = 1;
3932 int64_t tmp_rd = 0;
Yushin Chob7b60c52017-07-14 16:18:52 -07003933#if CONFIG_DIST_8X8
Yue Chen0797a202017-10-27 17:24:56 -07003934 int sub8x8_eob[4] = { 0, 0, 0, 0 };
Yushin Cho04749122017-05-25 14:19:07 -07003935#endif
Yue Chen171c17d2017-10-16 18:08:22 -07003936 sum_rd_stats.rate = x->txfm_partition_cost[ctx][1];
Jingning Hand3fada82016-11-22 10:46:55 -08003937
3938 assert(tx_size < TX_SIZES_ALL);
3939
Jingning Han16a9df72017-07-26 15:27:43 -07003940 ref_best_rd = AOMMIN(this_rd, ref_best_rd);
3941
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003942 int blk_idx = 0;
Yue Chen0797a202017-10-27 17:24:56 -07003943 for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) {
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003944 for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw, ++blk_idx) {
3945 const int offsetr = blk_row + r;
3946 const int offsetc = blk_col + c;
Yue Chen0797a202017-10-27 17:24:56 -07003947 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003948 assert(blk_idx < 4);
3949 select_tx_block(
3950 cpi, x, offsetr, offsetc, plane, block, sub_txs, depth + 1,
3951 plane_bsize, ta, tl, tx_above, tx_left, &this_rd_stats,
3952 ref_best_rd - tmp_rd, &this_cost_valid, fast, 0,
3953 (rd_info_node != NULL) ? rd_info_node->children[blk_idx] : NULL);
Jingning Han98d6a1f2016-11-03 12:47:47 -07003954
Yushin Chob7b60c52017-07-14 16:18:52 -07003955#if CONFIG_DIST_8X8
Yue Chen0797a202017-10-27 17:24:56 -07003956 if (!x->using_dist_8x8)
Yushin Cho04749122017-05-25 14:19:07 -07003957#endif
Yue Chen0797a202017-10-27 17:24:56 -07003958 if (!this_cost_valid) break;
3959#if CONFIG_DIST_8X8
3960 if (x->using_dist_8x8 && plane == 0 && tx_size == TX_8X8) {
3961 sub8x8_eob[2 * (r / bsh) + (c / bsw)] = p->eobs[block];
3962 }
3963#endif // CONFIG_DIST_8X8
3964 av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
3965
3966 tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
3967#if CONFIG_DIST_8X8
3968 if (!x->using_dist_8x8)
3969#endif
3970 if (this_rd < tmp_rd) break;
3971 block += sub_step;
3972 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003973 }
Yushin Chob7b60c52017-07-14 16:18:52 -07003974#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07003975 if (x->using_dist_8x8 && this_cost_valid && plane == 0 &&
3976 tx_size == TX_8X8) {
Yushin Cho04749122017-05-25 14:19:07 -07003977 const int src_stride = p->src.stride;
3978 const int dst_stride = pd->dst.stride;
3979
3980 const uint8_t *src =
3981 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
3982 const uint8_t *dst =
3983 &pd->dst
3984 .buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
3985
Yushin Chob7b60c52017-07-14 16:18:52 -07003986 int64_t dist_8x8;
Yushin Cho04749122017-05-25 14:19:07 -07003987 int qindex = x->qindex;
3988 const int pred_stride = block_size_wide[plane_bsize];
3989 const int pred_idx = (blk_row * pred_stride + blk_col)
3990 << tx_size_wide_log2[0];
3991 int16_t *pred = &pd->pred[pred_idx];
Yue Chen0797a202017-10-27 17:24:56 -07003992 int i, j;
Yushin Cho04749122017-05-25 14:19:07 -07003993 int row, col;
3994
Yushin Cho8ab875d2017-06-23 14:47:21 -07003995 uint8_t *pred8;
3996 DECLARE_ALIGNED(16, uint16_t, pred8_16[8 * 8]);
Yushin Cho04749122017-05-25 14:19:07 -07003997
Yushin Choe30a47c2017-08-15 13:08:30 -07003998 dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
Yushin Chob7b60c52017-07-14 16:18:52 -07003999 BLOCK_8X8, 8, 8, 8, 8, qindex) *
4000 16;
Yushin Cho7cd2e112018-01-12 15:50:25 -08004001
4002#ifdef DEBUG_DIST_8X8
Yushin Chof986af12017-11-20 15:22:43 -08004003 if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
4004 assert(sum_rd_stats.sse == dist_8x8);
Yushin Cho7cd2e112018-01-12 15:50:25 -08004005#endif // DEBUG_DIST_8X8
4006
Yushin Chob7b60c52017-07-14 16:18:52 -07004007 sum_rd_stats.sse = dist_8x8;
Yushin Cho04749122017-05-25 14:19:07 -07004008
Yushin Cho8ab875d2017-06-23 14:47:21 -07004009 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
4010 pred8 = CONVERT_TO_BYTEPTR(pred8_16);
4011 else
4012 pred8 = (uint8_t *)pred8_16;
Yushin Cho04749122017-05-25 14:19:07 -07004013
Yushin Cho8ab875d2017-06-23 14:47:21 -07004014 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4015 for (row = 0; row < 2; ++row) {
4016 for (col = 0; col < 2; ++col) {
4017 int idx = row * 2 + col;
4018 int eob = sub8x8_eob[idx];
4019
4020 if (eob > 0) {
4021 for (j = 0; j < 4; j++)
4022 for (i = 0; i < 4; i++)
4023 CONVERT_TO_SHORTPTR(pred8)
4024 [(row * 4 + j) * 8 + 4 * col + i] =
4025 pred[(row * 4 + j) * pred_stride + 4 * col + i];
4026 } else {
4027 for (j = 0; j < 4; j++)
4028 for (i = 0; i < 4; i++)
4029 CONVERT_TO_SHORTPTR(pred8)
4030 [(row * 4 + j) * 8 + 4 * col + i] = CONVERT_TO_SHORTPTR(
4031 dst)[(row * 4 + j) * dst_stride + 4 * col + i];
4032 }
Yushin Cho04749122017-05-25 14:19:07 -07004033 }
4034 }
Yushin Cho8ab875d2017-06-23 14:47:21 -07004035 } else {
Yushin Cho8ab875d2017-06-23 14:47:21 -07004036 for (row = 0; row < 2; ++row) {
4037 for (col = 0; col < 2; ++col) {
4038 int idx = row * 2 + col;
4039 int eob = sub8x8_eob[idx];
4040
4041 if (eob > 0) {
4042 for (j = 0; j < 4; j++)
4043 for (i = 0; i < 4; i++)
4044 pred8[(row * 4 + j) * 8 + 4 * col + i] =
Yaowu Xu7a471702017-09-29 08:38:37 -07004045 (uint8_t)pred[(row * 4 + j) * pred_stride + 4 * col + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -07004046 } else {
4047 for (j = 0; j < 4; j++)
4048 for (i = 0; i < 4; i++)
4049 pred8[(row * 4 + j) * 8 + 4 * col + i] =
4050 dst[(row * 4 + j) * dst_stride + 4 * col + i];
4051 }
4052 }
4053 }
Yushin Cho04749122017-05-25 14:19:07 -07004054 }
Yushin Choe30a47c2017-08-15 13:08:30 -07004055 dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, pred8, 8, BLOCK_8X8, 8,
Yushin Chob7b60c52017-07-14 16:18:52 -07004056 8, 8, 8, qindex) *
4057 16;
Yushin Cho7cd2e112018-01-12 15:50:25 -08004058
4059#ifdef DEBUG_DIST_8X8
Yushin Chof986af12017-11-20 15:22:43 -08004060 if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
Yushin Cho1cd34622017-10-06 13:00:41 -07004061 assert(sum_rd_stats.dist == dist_8x8);
Yushin Cho7cd2e112018-01-12 15:50:25 -08004062#endif // DEBUG_DIST_8X8
4063
Yushin Chob7b60c52017-07-14 16:18:52 -07004064 sum_rd_stats.dist = dist_8x8;
Yushin Cho04749122017-05-25 14:19:07 -07004065 tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
4066 }
Yushin Chob7b60c52017-07-14 16:18:52 -07004067#endif // CONFIG_DIST_8X8
Yaowu Xuc27fc142016-08-22 16:08:15 -07004068 if (this_cost_valid) sum_rd = tmp_rd;
4069 }
4070
4071 if (this_rd < sum_rd) {
4072 int idx, idy;
Yue Chend6bdd462017-07-19 16:05:43 -07004073 TX_SIZE tx_size_selected = tx_size;
Jingning Han2f42d772017-07-05 16:28:18 -07004074
4075#if CONFIG_LV_MAP
Debargha Mukherjee35a4db32017-11-14 11:58:16 -08004076 p->txb_entropy_ctx[block] = tmp_eob;
Jingning Han2f42d772017-07-05 16:28:18 -07004077#else
4078 p->eobs[block] = tmp_eob;
4079#endif
4080
Yue Chend6bdd462017-07-19 16:05:43 -07004081 av1_set_txb_context(x, plane, block, tx_size_selected, pta, ptl);
Jingning Han2f42d772017-07-05 16:28:18 -07004082
Jingning Han331662e2017-05-30 17:03:32 -07004083 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
4084 tx_size);
Yue Chend6bdd462017-07-19 16:05:43 -07004085 inter_tx_size[0][0] = tx_size_selected;
Yue Chenc5252a62017-10-31 15:41:12 -07004086 for (idy = 0; idy < AOMMAX(1, tx_size_high_unit[tx_size] / 2); ++idy)
4087 for (idx = 0; idx < AOMMAX(1, tx_size_wide_unit[tx_size] / 2); ++idx)
Yue Chend6bdd462017-07-19 16:05:43 -07004088 inter_tx_size[idy][idx] = tx_size_selected;
4089 mbmi->tx_size = tx_size_selected;
Jingning Hane3b81bc2017-06-23 11:43:52 -07004090#if CONFIG_TXK_SEL
4091 mbmi->txk_type[txk_idx] = best_tx_type;
4092#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07004093 if (this_rd == INT64_MAX) *is_cost_valid = 0;
Debargha Mukherjee35a4db32017-11-14 11:58:16 -08004094 x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004095 } else {
Angie Chiangd7246172016-11-03 11:49:15 -07004096 *rd_stats = sum_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004097 if (sum_rd == INT64_MAX) *is_cost_valid = 0;
4098 }
4099}
4100
Debargha Mukherjee51666862017-10-24 14:29:13 -07004101static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
4102 RD_STATS *rd_stats, BLOCK_SIZE bsize,
Alexander Bokov79a37242017-09-29 11:25:55 -07004103 int64_t ref_best_rd, int fast,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004104 int tx_split_prune_flag,
4105 TX_SIZE_RD_INFO_NODE *rd_info_tree) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07004106 MACROBLOCKD *const xd = &x->e_mbd;
4107 int is_cost_valid = 1;
4108 int64_t this_rd = 0;
4109
4110 if (ref_best_rd < 0) is_cost_valid = 0;
4111
Angie Chiangc0feea82016-11-03 15:36:18 -07004112 av1_init_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004113
4114 if (is_cost_valid) {
4115 const struct macroblockd_plane *const pd = &xd->plane[0];
4116 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
Hui Suae9ec672018-01-11 16:32:19 -08004117 const int mi_width = mi_size_wide[plane_bsize];
4118 const int mi_height = mi_size_high[plane_bsize];
Yue Chen0797a202017-10-27 17:24:56 -07004119 const TX_SIZE max_tx_size = get_max_rect_tx_size(plane_bsize, 1);
Jingning Han18482fe2016-11-02 17:01:58 -07004120 const int bh = tx_size_high_unit[max_tx_size];
4121 const int bw = tx_size_wide_unit[max_tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004122 int idx, idy;
4123 int block = 0;
Jingning Han18482fe2016-11-02 17:01:58 -07004124 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004125 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4126 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
Jingning Han331662e2017-05-30 17:03:32 -07004127 TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
4128 TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004129
Angie Chiangb5dda482016-11-02 16:19:58 -07004130 RD_STATS pn_rd_stats;
Debargha Mukherjeeedc73462017-10-31 15:13:32 -07004131 const int init_depth = get_search_init_depth(mi_width, mi_height, &cpi->sf);
Angie Chiangc0feea82016-11-03 15:36:18 -07004132 av1_init_rd_stats(&pn_rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004133
Jingning Han9ca05b72017-01-03 14:41:36 -08004134 av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
Jingning Han331662e2017-05-30 17:03:32 -07004135 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
4136 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004137
4138 for (idy = 0; idy < mi_height; idy += bh) {
Jingning Han18482fe2016-11-02 17:01:58 -07004139 for (idx = 0; idx < mi_width; idx += bw) {
Sarah Parkerd25ef8c2017-10-06 12:17:30 -07004140 select_tx_block(cpi, x, idy, idx, 0, block, max_tx_size, init_depth,
4141 plane_bsize, ctxa, ctxl, tx_above, tx_left,
Debargha Mukherjee51666862017-10-24 14:29:13 -07004142 &pn_rd_stats, ref_best_rd - this_rd, &is_cost_valid,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004143 fast, tx_split_prune_flag, rd_info_tree);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004144 if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) {
Sarah Parkerde6f0722017-08-07 20:23:46 -07004145 av1_invalid_rd_stats(rd_stats);
4146 return;
4147 }
Angie Chiangc0feea82016-11-03 15:36:18 -07004148 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Debargha Mukherjee2c50f9a2017-11-15 08:04:57 -08004149 this_rd +=
4150 AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
4151 RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07004152 block += step;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004153 if (rd_info_tree != NULL) rd_info_tree += 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004154 }
4155 }
4156 }
Debargha Mukherjee9c8decb2017-12-01 15:14:40 -08004157 int64_t zero_rd = RDCOST(x->rdmult, rd_stats->zero_rate, rd_stats->sse);
4158 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
4159 if (zero_rd < this_rd) {
4160 this_rd = zero_rd;
Debargha Mukherjee644e9662017-12-10 15:36:28 -08004161 rd_stats->rate = rd_stats->zero_rate;
4162 rd_stats->dist = rd_stats->sse;
Debargha Mukherjee9c8decb2017-12-01 15:14:40 -08004163 rd_stats->skip = 1;
4164 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07004165 if (this_rd > ref_best_rd) is_cost_valid = 0;
4166
4167 if (!is_cost_valid) {
4168 // reset cost value
Angie Chiangc0feea82016-11-03 15:36:18 -07004169 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004170 }
4171}
4172
Yaowu Xuf883b422016-08-30 14:01:10 -07004173static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
Angie Chiangb5dda482016-11-02 16:19:58 -07004174 RD_STATS *rd_stats, BLOCK_SIZE bsize,
Debargha Mukherjee51666862017-10-24 14:29:13 -07004175 int mi_row, int mi_col,
Alexander Bokov79a37242017-09-29 11:25:55 -07004176 int64_t ref_best_rd, TX_TYPE tx_type,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004177 int tx_split_prune_flag,
4178 TX_SIZE_RD_INFO_NODE *rd_info_tree) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07004179 const int fast = cpi->sf.tx_size_search_method > USE_FULL_RD;
Yaowu Xuf883b422016-08-30 14:01:10 -07004180 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004181 MACROBLOCKD *const xd = &x->e_mbd;
4182 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004183 const int is_inter = is_inter_block(mbmi);
Zoe Liu1eed2df2017-10-16 17:13:15 -07004184 const int skip_ctx = av1_get_skip_context(xd);
4185 int s0 = x->skip_cost[skip_ctx][0];
4186 int s1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004187 int64_t rd;
Jingning Hane67b38a2016-11-04 10:30:00 -07004188 int row, col;
4189 const int max_blocks_high = max_block_high(xd, bsize, 0);
4190 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004191
Debargha Mukherjee51666862017-10-24 14:29:13 -07004192 // TODO(debargha): enable this as a speed feature where the
4193 // select_inter_block_yrd() function above will use a simplified search
4194 // such as not using full optimize, but the inter_block_yrd() function
4195 // will use more complex search given that the transform partitions have
4196 // already been decided.
4197
Zoe Liu1eed2df2017-10-16 17:13:15 -07004198 (void)cm;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004199 (void)mi_row;
4200 (void)mi_col;
Zoe Liu1eed2df2017-10-16 17:13:15 -07004201
Yaowu Xuc27fc142016-08-22 16:08:15 -07004202 mbmi->tx_type = tx_type;
Alexander Bokov79a37242017-09-29 11:25:55 -07004203 select_inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, fast,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004204 tx_split_prune_flag, rd_info_tree);
Angie Chiangb5dda482016-11-02 16:19:58 -07004205 if (rd_stats->rate == INT_MAX) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004206
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08004207 mbmi->min_tx_size = mbmi->inter_tx_size[0][0];
Jingning Hane67b38a2016-11-04 10:30:00 -07004208 for (row = 0; row < max_blocks_high / 2; ++row)
4209 for (col = 0; col < max_blocks_wide / 2; ++col)
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08004210 mbmi->min_tx_size =
4211 TXSIZEMIN(mbmi->min_tx_size, mbmi->inter_tx_size[row][col]);
Jingning Hane67b38a2016-11-04 10:30:00 -07004212
Debargha Mukherjee51666862017-10-24 14:29:13 -07004213 if (fast) {
4214 // Do a better (non-fast) search with tx sizes already decided.
4215 // Currently, trellis optimization is turned on only for this pass, and
4216 // the function below performs a more accurate rd cost calculation based
4217 // on that.
4218 if (!inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, 0))
4219 return INT64_MAX;
4220 }
4221
Jingning Han1643a0a2017-07-05 15:48:25 -07004222#if !CONFIG_TXK_SEL
Sarah Parkere68a3e42017-02-16 14:03:24 -08004223 if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter,
4224 cm->reduced_tx_set_used) > 1 &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07004225 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Sarah Parkere68a3e42017-02-16 14:03:24 -08004226 const int ext_tx_set = get_ext_tx_set(mbmi->min_tx_size, bsize, is_inter,
4227 cm->reduced_tx_set_used);
Jingning Han13c42772017-12-11 14:31:56 -08004228 if (ext_tx_set > 0)
4229 rd_stats->rate +=
4230 x->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->min_tx_size]]
4231 [mbmi->tx_type];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004232 }
Jingning Han1643a0a2017-07-05 15:48:25 -07004233#endif // CONFIG_TXK_SEL
Yaowu Xuc27fc142016-08-22 16:08:15 -07004234
Angie Chiangb5dda482016-11-02 16:19:58 -07004235 if (rd_stats->skip)
Urvang Joshi70006e42017-06-14 16:08:55 -07004236 rd = RDCOST(x->rdmult, s1, rd_stats->sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004237 else
Urvang Joshi70006e42017-06-14 16:08:55 -07004238 rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004239
Angie Chiangb5dda482016-11-02 16:19:58 -07004240 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
4241 !(rd_stats->skip))
Urvang Joshi70006e42017-06-14 16:08:55 -07004242 rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07004243
4244 return rd;
4245}
4246
Debargha Mukherjee51666862017-10-24 14:29:13 -07004247// Finds rd cost for a y block, given the transform size partitions
4248static void tx_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
4249 int blk_col, int plane, int block, TX_SIZE tx_size,
4250 BLOCK_SIZE plane_bsize, int depth,
4251 ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
4252 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
4253 int64_t ref_best_rd, RD_STATS *rd_stats, int fast) {
4254 MACROBLOCKD *const xd = &x->e_mbd;
4255 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4256 struct macroblockd_plane *const pd = &xd->plane[plane];
Debargha Mukherjee51666862017-10-24 14:29:13 -07004257 const int tx_row = blk_row >> (1 - pd->subsampling_y);
4258 const int tx_col = blk_col >> (1 - pd->subsampling_x);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004259 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4260 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
4261
4262 assert(tx_size < TX_SIZES_ALL);
4263
4264 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4265
Debargha Mukherjee8aec7f32017-12-20 15:48:49 -08004266 const TX_SIZE plane_tx_size =
Debargha Mukherjee6396d3a2017-12-21 00:12:38 -08004267 plane ? av1_get_uv_tx_size(mbmi, pd->subsampling_x, pd->subsampling_y)
Debargha Mukherjee51666862017-10-24 14:29:13 -07004268 : mbmi->inter_tx_size[tx_row][tx_col];
4269
4270 int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
4271 mbmi->sb_type, tx_size);
4272
4273 av1_init_rd_stats(rd_stats);
Debargha Mukherjee6396d3a2017-12-21 00:12:38 -08004274 if (tx_size == plane_tx_size || plane) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07004275 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
4276 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
4277#if CONFIG_LV_MAP
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08004278 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004279 TXB_CTX txb_ctx;
4280 get_txb_ctx(plane_bsize, tx_size, plane, ta, tl, &txb_ctx);
4281
4282 const int zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(plane)]
4283 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
4284#else
4285 const int coeff_ctx = get_entropy_context(tx_size, ta, tl);
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08004286 const TX_SIZE tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004287 const int zero_blk_rate =
4288 x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
4289#endif // CONFIG_LV_MAP
4290 rd_stats->zero_rate = zero_blk_rate;
4291 rd_stats->ref_rdcost = ref_best_rd;
4292 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004293 plane_bsize, ta, tl, rd_stats, fast, NULL);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004294 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4295 if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
4296 RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
4297 rd_stats->skip == 1) {
4298 rd_stats->rate = zero_blk_rate;
4299 rd_stats->dist = rd_stats->sse;
4300 rd_stats->skip = 1;
4301 x->blk_skip[plane][blk_row * mi_width + blk_col] = 1;
4302 x->plane[plane].eobs[block] = 0;
4303#if CONFIG_LV_MAP
4304 x->plane[plane].txb_entropy_ctx[block] = 0;
4305#endif // CONFIG_LV_MAP
4306 } else {
4307 rd_stats->skip = 0;
4308 x->blk_skip[plane][blk_row * mi_width + blk_col] = 0;
4309 }
4310 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
4311 rd_stats->rate += x->txfm_partition_cost[ctx][0];
4312 av1_set_txb_context(x, plane, block, tx_size, ta, tl);
4313 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
4314 tx_size);
4315 } else {
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004316 const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004317 const int bsw = tx_size_wide_unit[sub_txs];
4318 const int bsh = tx_size_high_unit[sub_txs];
4319 const int step = bsh * bsw;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004320 RD_STATS pn_rd_stats;
4321 int64_t this_rd = 0;
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004322 assert(bsw > 0 && bsh > 0);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004323
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004324 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
4325 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
4326 const int offsetr = blk_row + row;
4327 const int offsetc = blk_col + col;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004328
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004329 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004330
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004331 av1_init_rd_stats(&pn_rd_stats);
4332 tx_block_yrd(cpi, x, offsetr, offsetc, plane, block, sub_txs,
4333 plane_bsize, depth + 1, above_ctx, left_ctx, tx_above,
4334 tx_left, ref_best_rd - this_rd, &pn_rd_stats, fast);
4335 if (pn_rd_stats.rate == INT_MAX) {
4336 av1_invalid_rd_stats(rd_stats);
4337 return;
4338 }
4339 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
4340 this_rd += RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist);
4341 block += step;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004342 }
Debargha Mukherjee51666862017-10-24 14:29:13 -07004343 }
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004344
Debargha Mukherjee51666862017-10-24 14:29:13 -07004345 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
4346 rd_stats->rate += x->txfm_partition_cost[ctx][1];
4347 }
4348}
4349
4350// Return value 0: early termination triggered, no valid rd cost available;
4351// 1: rd cost values are valid.
4352int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
4353 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast) {
4354 MACROBLOCKD *const xd = &x->e_mbd;
4355 int is_cost_valid = 1;
4356 int64_t this_rd = 0;
4357
4358 if (ref_best_rd < 0) is_cost_valid = 0;
4359
4360 av1_init_rd_stats(rd_stats);
4361
4362 if (is_cost_valid) {
4363 const struct macroblockd_plane *const pd = &xd->plane[0];
4364 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
Hui Suae9ec672018-01-11 16:32:19 -08004365 const int mi_width = mi_size_wide[plane_bsize];
4366 const int mi_height = mi_size_high[plane_bsize];
Debargha Mukherjee891a8772017-11-22 10:09:37 -08004367 const TX_SIZE max_tx_size = get_vartx_max_txsize(
4368 xd, plane_bsize, pd->subsampling_x || pd->subsampling_y);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004369 const int bh = tx_size_high_unit[max_tx_size];
4370 const int bw = tx_size_wide_unit[max_tx_size];
Debargha Mukherjeeedc73462017-10-31 15:13:32 -07004371 const int init_depth = get_search_init_depth(mi_width, mi_height, &cpi->sf);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004372 int idx, idy;
4373 int block = 0;
4374 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
4375 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4376 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
4377 TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
4378 TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
Debargha Mukherjee51666862017-10-24 14:29:13 -07004379 RD_STATS pn_rd_stats;
4380
4381 av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
4382 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
4383 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
4384
4385 for (idy = 0; idy < mi_height; idy += bh) {
4386 for (idx = 0; idx < mi_width; idx += bw) {
4387 av1_init_rd_stats(&pn_rd_stats);
4388 tx_block_yrd(cpi, x, idy, idx, 0, block, max_tx_size, plane_bsize,
4389 init_depth, ctxa, ctxl, tx_above, tx_left,
4390 ref_best_rd - this_rd, &pn_rd_stats, fast);
4391 if (pn_rd_stats.rate == INT_MAX) {
4392 av1_invalid_rd_stats(rd_stats);
4393 return 0;
4394 }
4395 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08004396 this_rd +=
4397 AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
4398 RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse));
Debargha Mukherjee51666862017-10-24 14:29:13 -07004399 block += step;
4400 }
4401 }
4402 }
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08004403 int64_t zero_rd = RDCOST(x->rdmult, rd_stats->zero_rate, rd_stats->sse);
4404 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
4405 if (zero_rd < this_rd) {
4406 this_rd = zero_rd;
Debargha Mukherjee644e9662017-12-10 15:36:28 -08004407 rd_stats->rate = rd_stats->zero_rate;
4408 rd_stats->dist = rd_stats->sse;
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08004409 rd_stats->skip = 1;
4410 }
Debargha Mukherjee51666862017-10-24 14:29:13 -07004411 if (this_rd > ref_best_rd) is_cost_valid = 0;
4412
4413 if (!is_cost_valid) {
4414 // reset cost value
4415 av1_invalid_rd_stats(rd_stats);
4416 }
4417 return is_cost_valid;
4418}
4419
Hui Su1ddf2312017-08-19 15:21:34 -07004420static uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
4421 const int rows = block_size_high[bsize];
4422 const int cols = block_size_wide[bsize];
Hui Su1ddf2312017-08-19 15:21:34 -07004423 const struct macroblock_plane *const p = &x->plane[0];
4424 const int16_t *diff = &p->src_diff[0];
Debargha Mukherjeefd65c8d2017-11-07 15:45:55 -08004425 uint16_t hash_data[MAX_SB_SQUARE];
4426 memcpy(hash_data, diff, sizeof(*hash_data) * rows * cols);
4427 return (av1_get_crc_value(&x->tx_rd_record.crc_calculator,
4428 (uint8_t *)hash_data, 2 * rows * cols)
Hui Su1ddf2312017-08-19 15:21:34 -07004429 << 7) +
4430 bsize;
4431}
4432
4433static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
4434 const RD_STATS *const rd_stats,
Hui Su89ef4932017-11-28 10:54:31 -08004435 TX_RD_RECORD *tx_rd_record) {
4436 int index;
4437 if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
4438 index =
4439 (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN;
4440 ++tx_rd_record->num;
4441 } else {
4442 index = tx_rd_record->index_start;
4443 tx_rd_record->index_start =
4444 (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN;
4445 }
4446 TX_RD_INFO *const tx_rd_info = &tx_rd_record->tx_rd_info[index];
Hui Su1ddf2312017-08-19 15:21:34 -07004447 const MACROBLOCKD *const xd = &x->e_mbd;
4448 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4449 tx_rd_info->hash_value = hash;
4450 tx_rd_info->tx_type = mbmi->tx_type;
4451 tx_rd_info->tx_size = mbmi->tx_size;
Hui Su1ddf2312017-08-19 15:21:34 -07004452 tx_rd_info->min_tx_size = mbmi->min_tx_size;
4453 memcpy(tx_rd_info->blk_skip, x->blk_skip[0],
4454 sizeof(tx_rd_info->blk_skip[0]) * n4);
4455 for (int idy = 0; idy < xd->n8_h; ++idy)
4456 for (int idx = 0; idx < xd->n8_w; ++idx)
4457 tx_rd_info->inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
Hui Su1ddf2312017-08-19 15:21:34 -07004458#if CONFIG_TXK_SEL
4459 av1_copy(tx_rd_info->txk_type, mbmi->txk_type);
4460#endif // CONFIG_TXK_SEL
4461 tx_rd_info->rd_stats = *rd_stats;
4462}
4463
4464static void fetch_tx_rd_info(int n4, const TX_RD_INFO *const tx_rd_info,
4465 RD_STATS *const rd_stats, MACROBLOCK *const x) {
4466 MACROBLOCKD *const xd = &x->e_mbd;
4467 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4468 mbmi->tx_type = tx_rd_info->tx_type;
4469 mbmi->tx_size = tx_rd_info->tx_size;
Hui Su1ddf2312017-08-19 15:21:34 -07004470 mbmi->min_tx_size = tx_rd_info->min_tx_size;
4471 memcpy(x->blk_skip[0], tx_rd_info->blk_skip,
4472 sizeof(tx_rd_info->blk_skip[0]) * n4);
4473 for (int idy = 0; idy < xd->n8_h; ++idy)
4474 for (int idx = 0; idx < xd->n8_w; ++idx)
4475 mbmi->inter_tx_size[idy][idx] = tx_rd_info->inter_tx_size[idy][idx];
Hui Su1ddf2312017-08-19 15:21:34 -07004476#if CONFIG_TXK_SEL
4477 av1_copy(mbmi->txk_type, tx_rd_info->txk_type);
4478#endif // CONFIG_TXK_SEL
4479 *rd_stats = tx_rd_info->rd_stats;
4480}
4481
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004482static int find_tx_size_rd_info(TX_SIZE_RD_RECORD *cur_record,
4483 const uint32_t hash) {
4484 // Linear search through the circular buffer to find matching hash.
4485 int index;
4486 for (int i = cur_record->num - 1; i >= 0; i--) {
4487 index = (cur_record->index_start + i) % TX_SIZE_RD_RECORD_BUFFER_LEN;
4488 if (cur_record->hash_vals[index] == hash) return index;
4489 }
4490
4491 // If not found - add new RD info into the buffer and return its index
4492 if (cur_record->num < TX_SIZE_RD_RECORD_BUFFER_LEN) {
4493 index = (cur_record->index_start + cur_record->num) %
4494 TX_SIZE_RD_RECORD_BUFFER_LEN;
4495 cur_record->num++;
4496 } else {
4497 index = cur_record->index_start;
4498 cur_record->index_start =
4499 (cur_record->index_start + 1) % TX_SIZE_RD_RECORD_BUFFER_LEN;
4500 }
4501
4502 cur_record->hash_vals[index] = hash;
4503 av1_zero(cur_record->tx_rd_info[index]);
4504 return index;
4505}
4506
4507// Go through all TX blocks that could be used in TX size search, compute
4508// residual hash values for them and find matching RD info that stores previous
4509// RD search results for these TX blocks. The idea is to prevent repeated
4510// rate/distortion computations that happen because of the combination of
4511// partition and TX size search. The resulting RD info records are returned in
4512// the form of a quadtree for easier access in actual TX size search.
4513static int find_tx_size_rd_records(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
4514 int mi_col,
4515 TX_SIZE_RD_INFO_NODE *dst_rd_info) {
4516#if CONFIG_TX64X64
4517 TX_SIZE_RD_RECORD *rd_records_table[4] = { x->tx_size_rd_record_8X8,
4518 x->tx_size_rd_record_16X16,
4519 x->tx_size_rd_record_32X32,
4520 x->tx_size_rd_record_64X64 };
4521#else
4522 TX_SIZE_RD_RECORD *rd_records_table[3] = { x->tx_size_rd_record_8X8,
4523 x->tx_size_rd_record_16X16,
4524 x->tx_size_rd_record_32X32 };
4525#endif
4526 const TX_SIZE max_square_tx_size = max_txsize_lookup[bsize];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004527 const int bw = block_size_wide[bsize];
4528 const int bh = block_size_high[bsize];
Debargha Mukherjeede80e762017-11-30 13:58:56 -08004529
4530 // Hashing is performed only for square TX sizes larger than TX_4X4
Hui Sua4812ac2017-11-30 18:01:51 -08004531 if (max_square_tx_size < TX_8X8) return 0;
Debargha Mukherjeede80e762017-11-30 13:58:56 -08004532
Hui Sucdb89832017-11-30 17:36:21 -08004533 const int bw_mi = mi_size_wide[bsize];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004534 const int diff_stride = bw;
4535 const struct macroblock_plane *const p = &x->plane[0];
4536 const int16_t *diff = &p->src_diff[0];
4537
4538 // Coordinates of the top-left corner of current block within the superblock
4539 // measured in pixels:
4540 const int mi_row_in_sb = (mi_row % MAX_MIB_SIZE) << MI_SIZE_LOG2;
4541 const int mi_col_in_sb = (mi_col % MAX_MIB_SIZE) << MI_SIZE_LOG2;
4542 int cur_rd_info_idx = 0;
4543 int cur_tx_depth = 0;
Hui Sucdb89832017-11-30 17:36:21 -08004544 uint8_t parent_idx_buf[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
Hui Sua4812ac2017-11-30 18:01:51 -08004545 uint8_t child_idx_buf[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004546
Hui Sua4812ac2017-11-30 18:01:51 -08004547 TX_SIZE cur_tx_size = max_txsize_rect_lookup[1][bsize];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004548 while (cur_tx_depth <= MAX_VARTX_DEPTH) {
Hui Sucdb89832017-11-30 17:36:21 -08004549 const int cur_tx_bw = tx_size_wide[cur_tx_size];
4550 const int cur_tx_bh = tx_size_high[cur_tx_size];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004551 if (cur_tx_bw < 8 || cur_tx_bh < 8) break;
Hui Sua4812ac2017-11-30 18:01:51 -08004552 const TX_SIZE next_tx_size = sub_tx_size_map[1][cur_tx_size];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004553 for (int row = 0; row < bh; row += cur_tx_bh) {
4554 for (int col = 0; col < bw; col += cur_tx_bw) {
4555 if (cur_tx_bw != cur_tx_bh) {
4556 // Use dummy nodes for all rectangular transforms within the
4557 // TX size search tree.
4558 dst_rd_info[cur_rd_info_idx].rd_info_array = NULL;
4559 } else {
4560 // Get spatial location of this TX block within the superblock
4561 // (measured in cur_tx_bsize units).
4562 const int row_in_sb = (mi_row_in_sb + row) / cur_tx_bh;
4563 const int col_in_sb = (mi_col_in_sb + col) / cur_tx_bw;
4564
4565 // Compute FNV-1a hash for this TX block.
4566 uint32_t hash = 2166136261;
4567 for (int i = 0; i < cur_tx_bh; i++) {
4568 const int16_t *cur_diff_row = diff + (row + i) * diff_stride + col;
4569 for (int j = 0; j < cur_tx_bw; j++) {
4570 hash = hash ^ clip_pixel(cur_diff_row[j] + 128);
Yaowu Xu51021542017-11-20 18:23:42 -08004571 hash = (uint32_t)((int64_t)hash * 16777619);
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004572 }
4573 }
4574
4575 // Find corresponding RD info based on the hash value.
4576 const int rd_record_idx =
4577 row_in_sb * (MAX_MIB_SIZE >> (cur_tx_size + 1 - TX_8X8)) +
4578 col_in_sb;
4579 int idx = find_tx_size_rd_info(
4580 &rd_records_table[cur_tx_size - TX_8X8][rd_record_idx], hash);
4581 dst_rd_info[cur_rd_info_idx].rd_info_array =
4582 rd_records_table[cur_tx_size - TX_8X8][rd_record_idx]
4583 .tx_rd_info[idx];
4584 }
4585
4586 // Update the output quadtree RD info structure.
4587 av1_zero(dst_rd_info[cur_rd_info_idx].children);
Hui Sua4812ac2017-11-30 18:01:51 -08004588 const int this_mi_row = row / MI_SIZE;
4589 const int this_mi_col = col / MI_SIZE;
4590 if (cur_tx_depth > 0) { // Set up child pointers.
4591 const int mi_index = this_mi_row * bw_mi + this_mi_col;
4592 const int child_idx = child_idx_buf[mi_index];
4593 assert(child_idx < 4);
Hui Sucdb89832017-11-30 17:36:21 -08004594 dst_rd_info[parent_idx_buf[mi_index]].children[child_idx] =
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004595 &dst_rd_info[cur_rd_info_idx];
4596 }
Hui Sua4812ac2017-11-30 18:01:51 -08004597 if (cur_tx_depth < MAX_VARTX_DEPTH) { // Set up parent and child idx.
4598 const int tx_bh_mi = cur_tx_bh / MI_SIZE;
4599 const int tx_bw_mi = cur_tx_bw / MI_SIZE;
4600 for (int i = this_mi_row; i < this_mi_row + tx_bh_mi; ++i) {
4601 memset(parent_idx_buf + i * bw_mi + this_mi_col, cur_rd_info_idx,
4602 tx_bw_mi);
4603 }
4604 int child_idx = 0;
4605 const int next_tx_bh_mi = tx_size_wide_unit[next_tx_size];
4606 const int next_tx_bw_mi = tx_size_wide_unit[next_tx_size];
4607 for (int i = this_mi_row; i < this_mi_row + tx_bh_mi;
4608 i += next_tx_bh_mi) {
4609 for (int j = this_mi_col; j < this_mi_col + tx_bw_mi;
4610 j += next_tx_bw_mi) {
4611 assert(child_idx < 4);
4612 child_idx_buf[i * bw_mi + j] = child_idx++;
4613 }
4614 }
Hui Sucdb89832017-11-30 17:36:21 -08004615 }
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004616 ++cur_rd_info_idx;
4617 }
4618 }
Hui Sua4812ac2017-11-30 18:01:51 -08004619 cur_tx_size = next_tx_size;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004620 ++cur_tx_depth;
4621 }
4622 return 1;
4623}
4624
Hui Su991dd222017-11-27 16:32:00 -08004625static const uint32_t skip_pred_threshold[3][BLOCK_SIZES_ALL] = {
4626 {
Debargha Mukherjee5d149e12017-12-14 12:49:51 -08004627 50, 50, 50, 55, 47, 47, 53, 53, 53, 53, 53, 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004628#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004629 53, 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004630#endif
4631 50, 50, 55, 55, 53, 53,
4632#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004633 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004634#endif
4635 },
4636 {
Debargha Mukherjee5d149e12017-12-14 12:49:51 -08004637 69, 69, 69, 67, 68, 68, 53, 53, 53, 53, 53, 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004638#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004639 53, 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004640#endif
4641 69, 69, 67, 67, 53, 53,
4642#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004643 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004644#endif
4645 },
4646 {
Debargha Mukherjee5d149e12017-12-14 12:49:51 -08004647 70, 73, 73, 70, 73, 73, 58, 58, 58, 58, 58, 58, 58,
Hui Su991dd222017-11-27 16:32:00 -08004648#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004649 58, 58, 58,
Hui Su991dd222017-11-27 16:32:00 -08004650#endif
4651 70, 70, 70, 70, 58, 58,
4652#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004653 58, 58,
Hui Su991dd222017-11-27 16:32:00 -08004654#endif
4655 }
4656};
4657
Alexander Bokov8829a242017-08-31 18:07:05 -07004658// Uses simple features on top of DCT coefficients to quickly predict
4659// whether optimal RD decision is to skip encoding the residual.
Hui Su3889c6d2017-12-04 17:02:44 -08004660// The sse value is stored in dist.
4661static int predict_skip_flag(MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *dist) {
Hui Su00966432017-12-06 11:45:18 -08004662 int max_tx_size =
Hui Su991dd222017-11-27 16:32:00 -08004663 get_max_rect_tx_size(bsize, is_inter_block(&x->e_mbd.mi[0]->mbmi));
Hui Su00966432017-12-06 11:45:18 -08004664 if (tx_size_high[max_tx_size] > 16 || tx_size_wide[max_tx_size] > 16)
4665 max_tx_size = AOMMIN(max_txsize_lookup[bsize], TX_16X16);
Hui Su991dd222017-11-27 16:32:00 -08004666 const int tx_h = tx_size_high[max_tx_size];
4667 const int tx_w = tx_size_wide[max_tx_size];
Alexander Bokov8829a242017-08-31 18:07:05 -07004668 const int bw = block_size_wide[bsize];
4669 const int bh = block_size_high[bsize];
Alexander Bokov80eedf22017-11-02 12:48:52 -07004670 const MACROBLOCKD *xd = &x->e_mbd;
Hui Su3889c6d2017-12-04 17:02:44 -08004671 const uint32_t dc_q = (uint32_t)av1_dc_quant_QTX(x->qindex, 0, xd->bd);
4672
4673 *dist = pixel_diff_dist(x, 0, x->plane[0].src_diff, bw, 0, 0, bsize, bsize);
4674 const int64_t mse = *dist / bw / bh;
4675 // Normalized quantizer takes the transform upscaling factor (8 for tx size
4676 // smaller than 32) into account.
4677 const uint32_t normalized_dc_q = dc_q >> 3;
4678 const int64_t mse_thresh = (int64_t)normalized_dc_q * normalized_dc_q / 8;
4679 // Predict not to skip when mse is larger than threshold.
4680 if (mse > mse_thresh) return 0;
4681
Alexander Bokovf93feec2017-10-11 14:55:50 -07004682 DECLARE_ALIGNED(32, tran_low_t, DCT_coefs[32 * 32]);
Alexander Bokov8829a242017-08-31 18:07:05 -07004683 TxfmParam param;
4684 param.tx_type = DCT_DCT;
Hui Su991dd222017-11-27 16:32:00 -08004685 param.tx_size = max_tx_size;
Alexander Bokov80eedf22017-11-02 12:48:52 -07004686 param.bd = xd->bd;
Monty Montgomery26b8a992017-11-10 22:45:23 -05004687 param.is_hbd = get_bitdepth_data_path_index(xd);
Alexander Bokov8829a242017-08-31 18:07:05 -07004688 param.lossless = 0;
Sarah Parker90024e42017-10-06 16:50:47 -07004689 const struct macroblockd_plane *const pd = &xd->plane[0];
4690 const BLOCK_SIZE plane_bsize =
4691 get_plane_block_size(xd->mi[0]->mbmi.sb_type, pd);
4692 // TODO(sarahparker) This assumes reduced_tx_set_used == 0. I will do a
4693 // follow up refactor to make the actual value of reduced_tx_set_used
4694 // within this function.
4695 param.tx_set_type = get_ext_tx_set_type(param.tx_size, plane_bsize,
4696 is_inter_block(&xd->mi[0]->mbmi), 0);
Hui Su3889c6d2017-12-04 17:02:44 -08004697 const uint32_t ac_q = (uint32_t)av1_ac_quant_QTX(x->qindex, 0, xd->bd);
Hui Su991dd222017-11-27 16:32:00 -08004698 uint32_t max_quantized_coef = 0;
Hui Su00966432017-12-06 11:45:18 -08004699 const int bd_idx = (xd->bd == 8) ? 0 : ((xd->bd == 10) ? 1 : 2);
4700 const uint32_t max_qcoef_thresh = skip_pred_threshold[bd_idx][bsize];
Hui Su991dd222017-11-27 16:32:00 -08004701 const int16_t *src_diff = x->plane[0].src_diff;
4702 for (int row = 0; row < bh; row += tx_h) {
4703 for (int col = 0; col < bw; col += tx_w) {
Hui Su991dd222017-11-27 16:32:00 -08004704 av1_highbd_fwd_txfm(src_diff + col, DCT_coefs, bw, &param);
Alexander Bokov8829a242017-08-31 18:07:05 -07004705
Hui Su991dd222017-11-27 16:32:00 -08004706 // Operating on TX domain, not pixels; we want the QTX quantizers
4707 for (int i = 0; i < tx_w * tx_h; ++i) {
4708 uint32_t cur_quantized_coef =
Hui Su3889c6d2017-12-04 17:02:44 -08004709 (100 * (uint32_t)abs(DCT_coefs[i])) / (i ? ac_q : dc_q);
Hui Su00966432017-12-06 11:45:18 -08004710 if (cur_quantized_coef > max_quantized_coef) {
Hui Su991dd222017-11-27 16:32:00 -08004711 max_quantized_coef = cur_quantized_coef;
Hui Su00966432017-12-06 11:45:18 -08004712 if (max_quantized_coef >= max_qcoef_thresh) return 0;
4713 }
Hui Su991dd222017-11-27 16:32:00 -08004714 }
4715 }
4716 src_diff += tx_h * bw;
Alexander Bokov8829a242017-08-31 18:07:05 -07004717 }
Hui Su00966432017-12-06 11:45:18 -08004718 return max_quantized_coef < max_qcoef_thresh;
Alexander Bokov8829a242017-08-31 18:07:05 -07004719}
4720
4721// Used to set proper context for early termination with skip = 1.
4722static void set_skip_flag(const AV1_COMP *cpi, MACROBLOCK *x,
Hui Su3889c6d2017-12-04 17:02:44 -08004723 RD_STATS *rd_stats, int bsize, int64_t dist) {
Alexander Bokov8829a242017-08-31 18:07:05 -07004724 MACROBLOCKD *const xd = &x->e_mbd;
4725 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4726 const int n4 = bsize_to_num_blk(bsize);
Yue Chen0797a202017-10-27 17:24:56 -07004727 const TX_SIZE tx_size = get_max_rect_tx_size(bsize, is_inter_block(mbmi));
Alexander Bokov8829a242017-08-31 18:07:05 -07004728 mbmi->tx_type = DCT_DCT;
Angie Chiang04838132017-11-30 14:25:15 -08004729#if CONFIG_TXK_SEL
4730 memset(mbmi->txk_type, DCT_DCT,
4731 sizeof(mbmi->txk_type[0]) *
4732 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
4733#endif
Alexander Bokov8829a242017-08-31 18:07:05 -07004734 for (int idy = 0; idy < xd->n8_h; ++idy)
4735 for (int idx = 0; idx < xd->n8_w; ++idx)
4736 mbmi->inter_tx_size[idy][idx] = tx_size;
4737 mbmi->tx_size = tx_size;
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08004738 mbmi->min_tx_size = tx_size;
Alexander Bokov8829a242017-08-31 18:07:05 -07004739 memset(x->blk_skip[0], 1, sizeof(uint8_t) * n4);
4740 rd_stats->skip = 1;
4741
Yue Chen171c17d2017-10-16 18:08:22 -07004742 (void)cpi;
4743
Alexander Bokov8829a242017-08-31 18:07:05 -07004744 // Rate.
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08004745 const int tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Alexander Bokov8829a242017-08-31 18:07:05 -07004746 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4747 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
4748 av1_get_entropy_contexts(bsize, 0, &xd->plane[0], ctxa, ctxl);
Angie Chiang4639e082017-11-30 15:35:45 -08004749#if CONFIG_LV_MAP
4750 TXB_CTX txb_ctx;
4751 // Because plane is 0, plane_bsize equal to bsize
4752 get_txb_ctx(bsize, tx_size, 0, ctxa, ctxl, &txb_ctx);
4753 int rate = x->coeff_costs[tx_size_ctx][PLANE_TYPE_Y]
4754 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
4755#else
Alexander Bokov8829a242017-08-31 18:07:05 -07004756 int coeff_ctx = get_entropy_context(tx_size, ctxa, ctxl);
4757 int rate = x->token_head_costs[tx_size_ctx][PLANE_TYPE_Y][1][0][coeff_ctx][0];
Angie Chiang4639e082017-11-30 15:35:45 -08004758#endif
Alexander Bokov8829a242017-08-31 18:07:05 -07004759 if (tx_size > TX_4X4) {
4760 int ctx = txfm_partition_context(
4761 xd->above_txfm_context, xd->left_txfm_context, mbmi->sb_type, tx_size);
Yue Chen171c17d2017-10-16 18:08:22 -07004762 rate += x->txfm_partition_cost[ctx][0];
Alexander Bokov8829a242017-08-31 18:07:05 -07004763 }
4764#if !CONFIG_TXK_SEL
Alexander Bokov8829a242017-08-31 18:07:05 -07004765 const AV1_COMMON *cm = &cpi->common;
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -08004766 const int ext_tx_set =
4767 get_ext_tx_set(mbmi->min_tx_size, bsize, 1, cm->reduced_tx_set_used);
Alexander Bokov8829a242017-08-31 18:07:05 -07004768 if (get_ext_tx_types(mbmi->min_tx_size, bsize, 1, cm->reduced_tx_set_used) >
4769 1 &&
4770 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
4771 if (ext_tx_set > 0)
4772 rate +=
4773 x->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->min_tx_size]]
4774 [mbmi->tx_type];
4775 }
Alexander Bokov8829a242017-08-31 18:07:05 -07004776#endif // CONFIG_TXK_SEL
4777 rd_stats->rate = rate;
Alexander Bokov8829a242017-08-31 18:07:05 -07004778 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
Hui Su3889c6d2017-12-04 17:02:44 -08004779 dist = ROUND_POWER_OF_TWO(dist, (xd->bd - 8) * 2);
Hui Su3889c6d2017-12-04 17:02:44 -08004780 rd_stats->dist = rd_stats->sse = (dist << 4);
Alexander Bokov8829a242017-08-31 18:07:05 -07004781}
4782
Angie Chiangb5dda482016-11-02 16:19:58 -07004783static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
Yue Chen25dc0702017-10-18 23:36:06 -07004784 RD_STATS *rd_stats, BLOCK_SIZE bsize, int mi_row,
4785 int mi_col, int64_t ref_best_rd) {
Jingning Han2b0eeb12017-02-23 15:55:37 -08004786 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004787 const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
4788 MACROBLOCKD *const xd = &x->e_mbd;
4789 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4790 int64_t rd = INT64_MAX;
4791 int64_t best_rd = INT64_MAX;
4792 TX_TYPE tx_type, best_tx_type = DCT_DCT;
4793 const int is_inter = is_inter_block(mbmi);
Cheng Chen3c222602018-01-22 19:34:18 -08004794 TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE] = { { 0 } };
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004795 TX_SIZE best_tx = max_txsize_rect_lookup[1][bsize];
Jingning Hane67b38a2016-11-04 10:30:00 -07004796 TX_SIZE best_min_tx_size = TX_SIZES_ALL;
Jingning Han9ca05b72017-01-03 14:41:36 -08004797 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
Jingning Hane3b81bc2017-06-23 11:43:52 -07004798 TX_TYPE txk_start = DCT_DCT;
4799#if CONFIG_TXK_SEL
4800 TX_TYPE txk_end = DCT_DCT + 1;
4801#else
4802 TX_TYPE txk_end = TX_TYPES;
4803#endif
Angie Chiangf1cb0752017-04-10 16:01:20 -07004804 const int n4 = bsize_to_num_blk(bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004805 int idx, idy;
4806 int prune = 0;
Sarah Parker90024e42017-10-06 16:50:47 -07004807 // Get the tx_size 1 level down
Hui Su4e71fd92018-01-24 15:26:49 -08004808 const TX_SIZE min_tx_size =
4809 sub_tx_size_map[1][max_txsize_rect_lookup[1][bsize]];
Hui Suddbcde22017-09-18 17:22:02 -07004810 const TxSetType tx_set_type = get_ext_tx_set_type(
Sarah Parker90024e42017-10-06 16:50:47 -07004811 min_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
Jingning Han3de53532017-12-07 13:40:32 -08004812 int within_border = mi_row >= xd->tile.mi_row_start &&
4813 (mi_row + mi_size_high[bsize] < xd->tile.mi_row_end) &&
4814 mi_col >= xd->tile.mi_col_start &&
4815 (mi_col + mi_size_wide[bsize] < xd->tile.mi_col_end);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004816
Angie Chiangc0feea82016-11-03 15:36:18 -07004817 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004818
Hui Su1ddf2312017-08-19 15:21:34 -07004819 const uint32_t hash = get_block_residue_hash(x, bsize);
4820 TX_RD_RECORD *tx_rd_record = &x->tx_rd_record;
4821
Yue Chen25dc0702017-10-18 23:36:06 -07004822 if (ref_best_rd != INT64_MAX && within_border) {
Hui Su1ddf2312017-08-19 15:21:34 -07004823 for (int i = 0; i < tx_rd_record->num; ++i) {
4824 const int index = (tx_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN;
4825 // If there is a match in the tx_rd_record, fetch the RD decision and
4826 // terminate early.
4827 if (tx_rd_record->tx_rd_info[index].hash_value == hash) {
4828 TX_RD_INFO *tx_rd_info = &tx_rd_record->tx_rd_info[index];
4829 fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x);
4830 return;
4831 }
4832 }
4833 }
4834
Alexander Bokov80eedf22017-11-02 12:48:52 -07004835 // If we predict that skip is the optimal RD decision - set the respective
4836 // context and terminate early.
Hui Su3889c6d2017-12-04 17:02:44 -08004837 int64_t dist;
Alexander Bokov80eedf22017-11-02 12:48:52 -07004838 if (is_inter && cpi->sf.tx_type_search.use_skip_flag_prediction &&
Hui Su3889c6d2017-12-04 17:02:44 -08004839 predict_skip_flag(x, bsize, &dist)) {
4840 set_skip_flag(cpi, x, rd_stats, bsize, dist);
Hui Su89ef4932017-11-28 10:54:31 -08004841 // Save the RD search results into tx_rd_record.
4842 if (within_border) save_tx_rd_info(n4, hash, x, rd_stats, tx_rd_record);
Alexander Bokov80eedf22017-11-02 12:48:52 -07004843 return;
Alexander Bokov8829a242017-08-31 18:07:05 -07004844 }
4845
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004846 // Precompute residual hashes and find existing or add new RD records to
4847 // store and reuse rate and distortion values to speed up TX size search.
4848 TX_SIZE_RD_INFO_NODE matched_rd_info[16 + 64 + 256];
4849 int found_rd_info = 0;
4850 if (ref_best_rd != INT64_MAX && within_border) {
4851 found_rd_info =
4852 find_tx_size_rd_records(x, bsize, mi_row, mi_col, matched_rd_info);
4853 }
4854
Alexander Bokov0c7eb102017-09-07 18:49:00 -07004855 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
4856 !x->use_default_inter_tx_type && !xd->lossless[mbmi->segment_id]) {
Alexander Bokov79a37242017-09-29 11:25:55 -07004857 prune = prune_tx(cpi, bsize, x, xd, tx_set_type,
4858 cpi->sf.tx_type_search.use_tx_size_pruning);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07004859 }
Alexander Bokov8829a242017-08-31 18:07:05 -07004860
Rupert Swarbrickde2ea942017-10-09 15:21:21 +01004861 int found = 0;
4862
Alexander Bokov79a37242017-09-29 11:25:55 -07004863 int tx_split_prune_flag = 0;
4864 if (is_inter && cpi->sf.tx_type_search.prune_mode >= PRUNE_2D_ACCURATE)
4865 tx_split_prune_flag = ((prune >> TX_TYPES) & 1);
4866
Jingning Hane3b81bc2017-06-23 11:43:52 -07004867 for (tx_type = txk_start; tx_type < txk_end; ++tx_type) {
Angie Chiangb5dda482016-11-02 16:19:58 -07004868 RD_STATS this_rd_stats;
Angie Chiangc0feea82016-11-03 15:36:18 -07004869 av1_init_rd_stats(&this_rd_stats);
Hui Suddbcde22017-09-18 17:22:02 -07004870 if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
Hui Su4e71fd92018-01-24 15:26:49 -08004871#if !CONFIG_TXK_SEL
4872 if (is_inter) {
4873 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
4874 if (!do_tx_type_search(tx_type, prune,
4875 cpi->sf.tx_type_search.prune_mode))
4876 continue;
4877 }
4878 }
4879#endif // CONFIG_TXK_SEL
Yaowu Xuc27fc142016-08-22 16:08:15 -07004880 if (is_inter && x->use_default_inter_tx_type &&
Luc Trudeau2eb9b842017-12-13 11:19:16 -05004881 tx_type != get_default_tx_type(0, xd, max_tx_size))
Yaowu Xuc27fc142016-08-22 16:08:15 -07004882 continue;
4883
Jingning Hane67b38a2016-11-04 10:30:00 -07004884 if (xd->lossless[mbmi->segment_id])
4885 if (tx_type != DCT_DCT) continue;
4886
Debargha Mukherjee51666862017-10-24 14:29:13 -07004887 rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, mi_row, mi_col,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004888 ref_best_rd, tx_type, tx_split_prune_flag,
4889 found_rd_info ? matched_rd_info : NULL);
Hui Su4e71fd92018-01-24 15:26:49 -08004890#if !CONFIG_TXK_SEL
Sarah Parker90024e42017-10-06 16:50:47 -07004891 // If the current tx_type is not included in the tx_set for the smallest
4892 // tx size found, then all vartx partitions were actually transformed with
4893 // DCT_DCT and we should avoid picking it.
4894 const TxSetType min_tx_set_type = get_ext_tx_set_type(
4895 mbmi->min_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
Hui Su4e71fd92018-01-24 15:26:49 -08004896 if (!av1_ext_tx_used[min_tx_set_type][tx_type]) {
4897 mbmi->tx_type = DCT_DCT;
4898 if (this_rd_stats.rate != INT_MAX) {
4899 const int ext_tx_set = get_ext_tx_set(
4900 mbmi->min_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
Yaowu Xu92245c82018-01-27 16:12:51 -08004901 TX_SIZE square_tx_size = txsize_sqr_map[mbmi->min_tx_size];
4902#if CONFIG_TX64X64
4903 // For TX_64X64, use TX_32X32 to lookup cost
4904 if (square_tx_size == TX_64X64) square_tx_size = TX_32X32;
4905#endif // CONFIG_TX64X64
Hui Su4e71fd92018-01-24 15:26:49 -08004906 this_rd_stats.rate +=
4907 x->inter_tx_type_costs[ext_tx_set][square_tx_size][mbmi->tx_type];
4908 this_rd_stats.rate -=
4909 x->inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type];
4910 }
4911 }
4912#endif // CONFIG_TXK_SEL
Sarah Parker90024e42017-10-06 16:50:47 -07004913
Hui Suda816a12017-08-18 14:46:02 -07004914 ref_best_rd = AOMMIN(rd, ref_best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004915 if (rd < best_rd) {
4916 best_rd = rd;
Angie Chiangb5dda482016-11-02 16:19:58 -07004917 *rd_stats = this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004918 best_tx_type = mbmi->tx_type;
4919 best_tx = mbmi->tx_size;
Jingning Hane67b38a2016-11-04 10:30:00 -07004920 best_min_tx_size = mbmi->min_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004921 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
Rupert Swarbrickde2ea942017-10-09 15:21:21 +01004922 found = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004923 for (idy = 0; idy < xd->n8_h; ++idy)
4924 for (idx = 0; idx < xd->n8_w; ++idx)
4925 best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
4926 }
Cheng Chen3c222602018-01-22 19:34:18 -08004927
4928#if !CONFIG_TXK_SEL
4929 // stop searching other tx types if skip has better rdcost than DCT for
4930 // all tx blocks.
4931 if (cpi->sf.tx_type_search.skip_tx_search && is_inter && this_rd_stats.skip)
4932 break;
4933#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07004934 }
4935
Rupert Swarbrickde2ea942017-10-09 15:21:21 +01004936 // We should always find at least one candidate unless ref_best_rd is less
4937 // than INT64_MAX (in which case, all the calls to select_tx_size_fix_type
4938 // might have failed to find something better)
4939 assert(IMPLIES(!found, ref_best_rd != INT64_MAX));
4940 if (!found) return;
4941
4942 // We found a candidate transform to use. Copy our results from the "best"
4943 // array into mbmi.
Yaowu Xuc27fc142016-08-22 16:08:15 -07004944 mbmi->tx_type = best_tx_type;
4945 for (idy = 0; idy < xd->n8_h; ++idy)
4946 for (idx = 0; idx < xd->n8_w; ++idx)
4947 mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
4948 mbmi->tx_size = best_tx;
Jingning Hane67b38a2016-11-04 10:30:00 -07004949 mbmi->min_tx_size = best_min_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004950 memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
Hui Su1ddf2312017-08-19 15:21:34 -07004951
4952 // Save the RD search results into tx_rd_record.
Hui Su89ef4932017-11-28 10:54:31 -08004953 if (within_border) save_tx_rd_info(n4, hash, x, rd_stats, tx_rd_record);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004954}
4955
Yaowu Xuf883b422016-08-30 14:01:10 -07004956static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
Yaowu Xuc27fc142016-08-22 16:08:15 -07004957 int blk_col, int plane, int block, TX_SIZE tx_size,
4958 BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
Debargha Mukherjee51666862017-10-24 14:29:13 -07004959 ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats,
4960 int fast) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07004961 MACROBLOCKD *const xd = &x->e_mbd;
4962 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004963 struct macroblockd_plane *const pd = &xd->plane[plane];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004964 const int tx_row = blk_row >> (1 - pd->subsampling_y);
4965 const int tx_col = blk_col >> (1 - pd->subsampling_x);
Jingning Han18482fe2016-11-02 17:01:58 -07004966 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4967 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004968
Jingning Hand3fada82016-11-22 10:46:55 -08004969 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004970
Yaowu Xuc27fc142016-08-22 16:08:15 -07004971 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4972
Debargha Mukherjee8aec7f32017-12-20 15:48:49 -08004973 const TX_SIZE plane_tx_size =
Debargha Mukherjee6396d3a2017-12-21 00:12:38 -08004974 plane ? av1_get_uv_tx_size(mbmi, pd->subsampling_x, pd->subsampling_y)
Debargha Mukherjee2f123402016-08-30 17:43:38 -07004975 : mbmi->inter_tx_size[tx_row][tx_col];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004976
Debargha Mukherjee6396d3a2017-12-21 00:12:38 -08004977 if (tx_size == plane_tx_size || plane) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07004978 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
4979 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
Yaowu Xuf883b422016-08-30 14:01:10 -07004980 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004981 plane_bsize, ta, tl, rd_stats, fast, NULL);
Jingning Han328d57b2017-07-07 14:40:17 -07004982 av1_set_txb_context(x, plane, block, tx_size, ta, tl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004983 } else {
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004984 const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004985 assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
4986 assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
4987 const int bsw = tx_size_wide_unit[sub_txs];
4988 const int bsh = tx_size_high_unit[sub_txs];
4989 const int step = bsh * bsw;
4990 assert(bsw > 0 && bsh > 0);
4991 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
4992 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
4993 const int offsetr = blk_row + row;
4994 const int offsetc = blk_col + col;
4995 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
4996 tx_block_rd(cpi, x, offsetr, offsetc, plane, block, sub_txs,
4997 plane_bsize, above_ctx, left_ctx, rd_stats, fast);
4998 block += step;
4999 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005000 }
5001 }
5002}
5003
5004// Return value 0: early termination triggered, no valid rd cost available;
5005// 1: rd cost values are valid.
Debargha Mukherjee51666862017-10-24 14:29:13 -07005006int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
5007 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005008 MACROBLOCKD *const xd = &x->e_mbd;
5009 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5010 int plane;
5011 int is_cost_valid = 1;
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08005012 int64_t this_rd = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005013
5014 if (ref_best_rd < 0) is_cost_valid = 0;
5015
Angie Chiangc0feea82016-11-03 15:36:18 -07005016 av1_init_rd_stats(rd_stats);
Yue Chena1e48dc2016-08-29 17:29:33 -07005017
Jingning Han9ce464c2017-02-20 15:36:30 -08005018 if (x->skip_chroma_rd) return is_cost_valid;
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005019 const BLOCK_SIZE bsizec = scale_chroma_bsize(
5020 bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
Jingning Han9ce464c2017-02-20 15:36:30 -08005021
Yaowu Xuc27fc142016-08-22 16:08:15 -07005022 if (is_inter_block(mbmi) && is_cost_valid) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005023 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005024 av1_subtract_plane(x, bsizec, plane);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005025 }
5026
Debargha Mukherjee51666862017-10-24 14:29:13 -07005027 if (is_cost_valid) {
5028 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
5029 const struct macroblockd_plane *const pd = &xd->plane[plane];
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005030 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsizec, pd);
Debargha Mukherjee51666862017-10-24 14:29:13 -07005031 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
5032 const int mi_height =
5033 block_size_high[plane_bsize] >> tx_size_high_log2[0];
Debargha Mukherjee19619882017-11-22 13:13:14 -08005034 TX_SIZE max_tx_size = get_vartx_max_txsize(
Debargha Mukherjee891a8772017-11-22 10:09:37 -08005035 xd, plane_bsize, pd->subsampling_x || pd->subsampling_y);
Debargha Mukherjee51666862017-10-24 14:29:13 -07005036 const int bh = tx_size_high_unit[max_tx_size];
5037 const int bw = tx_size_wide_unit[max_tx_size];
5038 int idx, idy;
5039 int block = 0;
5040 const int step = bh * bw;
5041 ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
5042 ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
5043 RD_STATS pn_rd_stats;
5044 av1_init_rd_stats(&pn_rd_stats);
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005045 av1_get_entropy_contexts(bsizec, 0, pd, ta, tl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005046
Debargha Mukherjee51666862017-10-24 14:29:13 -07005047 for (idy = 0; idy < mi_height; idy += bh) {
5048 for (idx = 0; idx < mi_width; idx += bw) {
5049 tx_block_rd(cpi, x, idy, idx, plane, block, max_tx_size, plane_bsize,
5050 ta, tl, &pn_rd_stats, fast);
5051 block += step;
5052 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005053 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005054
Debargha Mukherjee51666862017-10-24 14:29:13 -07005055 if (pn_rd_stats.rate == INT_MAX) {
5056 is_cost_valid = 0;
5057 break;
5058 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005059
Debargha Mukherjee51666862017-10-24 14:29:13 -07005060 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005061
Debargha Mukherjee51666862017-10-24 14:29:13 -07005062 this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist),
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08005063 RDCOST(x->rdmult, rd_stats->zero_rate, rd_stats->sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07005064
Debargha Mukherjee51666862017-10-24 14:29:13 -07005065 if (this_rd > ref_best_rd) {
5066 is_cost_valid = 0;
5067 break;
5068 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005069 }
5070 }
5071
5072 if (!is_cost_valid) {
5073 // reset cost value
Angie Chiangc0feea82016-11-03 15:36:18 -07005074 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005075 }
5076
5077 return is_cost_valid;
5078}
Yaowu Xuc27fc142016-08-22 16:08:15 -07005079
hui su83c26632017-01-24 17:19:06 -08005080static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
5081 int dc_mode_cost,
5082 uint8_t *best_palette_color_map,
5083 MB_MODE_INFO *const best_mbmi,
5084 int64_t *best_rd, int *rate,
5085 int *rate_tokenonly, int64_t *distortion,
5086 int *skippable) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005087 MACROBLOCKD *const xd = &x->e_mbd;
5088 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005089 assert(!is_inter_block(mbmi));
Hui Su8b618f62017-12-20 12:03:35 -08005090 assert(
5091 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type));
hui sude0c70a2017-01-09 17:12:17 -08005092 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005093 const BLOCK_SIZE bsize = mbmi->sb_type;
Angie Chiang284d7772016-11-08 11:06:45 -08005094 int this_rate;
5095 int64_t this_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005096 int colors_u, colors_v, colors;
5097 const int src_stride = x->plane[1].src.stride;
5098 const uint8_t *const src_u = x->plane[1].src.buf;
5099 const uint8_t *const src_v = x->plane[2].src.buf;
hui sude0c70a2017-01-09 17:12:17 -08005100 uint8_t *const color_map = xd->plane[1].color_index_map;
Angie Chiang284d7772016-11-08 11:06:45 -08005101 RD_STATS tokenonly_rd_stats;
Urvang Joshi56ba91b2017-01-10 13:22:09 -08005102 int plane_block_width, plane_block_height, rows, cols;
5103 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
5104 &plane_block_height, &rows, &cols);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005105
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005106 mbmi->uv_mode = UV_DC_PRED;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005107
Hui Su4d51bed2017-11-29 15:52:40 -08005108 int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
Yaowu Xuc27fc142016-08-22 16:08:15 -07005109 if (cpi->common.use_highbitdepth) {
Yaowu Xuf883b422016-08-30 14:01:10 -07005110 colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols,
Hui Su4d51bed2017-11-29 15:52:40 -08005111 cpi->common.bit_depth, count_buf);
Yaowu Xuf883b422016-08-30 14:01:10 -07005112 colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols,
Hui Su4d51bed2017-11-29 15:52:40 -08005113 cpi->common.bit_depth, count_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005114 } else {
Hui Su4d51bed2017-11-29 15:52:40 -08005115 colors_u = av1_count_colors(src_u, src_stride, rows, cols, count_buf);
5116 colors_v = av1_count_colors(src_v, src_stride, rows, cols, count_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005117 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005118
hui su33567b22017-04-30 16:40:19 -07005119 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
Hui Su3748bc22017-08-23 11:30:41 -07005120 const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
hui su33567b22017-04-30 16:40:19 -07005121
Yaowu Xuc27fc142016-08-22 16:08:15 -07005122 colors = colors_u > colors_v ? colors_u : colors_v;
5123 if (colors > 1 && colors <= 64) {
5124 int r, c, n, i, j;
5125 const int max_itr = 50;
Hui Su5891f982017-12-18 16:18:23 -08005126 int lb_u, ub_u, val_u;
5127 int lb_v, ub_v, val_v;
5128 int *const data = x->palette_buffer->kmeans_data_buf;
5129 int centroids[2 * PALETTE_MAX_SIZE];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005130
Yaowu Xuc27fc142016-08-22 16:08:15 -07005131 uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
5132 uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
5133 if (cpi->common.use_highbitdepth) {
5134 lb_u = src_u16[0];
5135 ub_u = src_u16[0];
5136 lb_v = src_v16[0];
5137 ub_v = src_v16[0];
5138 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005139 lb_u = src_u[0];
5140 ub_u = src_u[0];
5141 lb_v = src_v[0];
5142 ub_v = src_v[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005143 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005144
Yaowu Xuc27fc142016-08-22 16:08:15 -07005145 for (r = 0; r < rows; ++r) {
5146 for (c = 0; c < cols; ++c) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005147 if (cpi->common.use_highbitdepth) {
5148 val_u = src_u16[r * src_stride + c];
5149 val_v = src_v16[r * src_stride + c];
5150 data[(r * cols + c) * 2] = val_u;
5151 data[(r * cols + c) * 2 + 1] = val_v;
5152 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005153 val_u = src_u[r * src_stride + c];
5154 val_v = src_v[r * src_stride + c];
5155 data[(r * cols + c) * 2] = val_u;
5156 data[(r * cols + c) * 2 + 1] = val_v;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005157 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005158 if (val_u < lb_u)
5159 lb_u = val_u;
5160 else if (val_u > ub_u)
5161 ub_u = val_u;
5162 if (val_v < lb_v)
5163 lb_v = val_v;
5164 else if (val_v > ub_v)
5165 ub_v = val_v;
5166 }
5167 }
5168
5169 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
5170 --n) {
5171 for (i = 0; i < n; ++i) {
5172 centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
5173 centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
5174 }
Yaowu Xuf883b422016-08-30 14:01:10 -07005175 av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
hui su33567b22017-04-30 16:40:19 -07005176 optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
hui sud13c24a2017-04-07 16:13:07 -07005177 // Sort the U channel colors in ascending order.
5178 for (i = 0; i < 2 * (n - 1); i += 2) {
5179 int min_idx = i;
Hui Su5891f982017-12-18 16:18:23 -08005180 int min_val = centroids[i];
hui sud13c24a2017-04-07 16:13:07 -07005181 for (j = i + 2; j < 2 * n; j += 2)
5182 if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
5183 if (min_idx != i) {
Hui Su5891f982017-12-18 16:18:23 -08005184 int temp_u = centroids[i], temp_v = centroids[i + 1];
hui sud13c24a2017-04-07 16:13:07 -07005185 centroids[i] = centroids[min_idx];
5186 centroids[i + 1] = centroids[min_idx + 1];
5187 centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
5188 }
5189 }
5190 av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
Urvang Joshi56ba91b2017-01-10 13:22:09 -08005191 extend_palette_color_map(color_map, cols, rows, plane_block_width,
5192 plane_block_height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005193 pmi->palette_size[1] = n;
5194 for (i = 1; i < 3; ++i) {
5195 for (j = 0; j < n; ++j) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005196 if (cpi->common.use_highbitdepth)
5197 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
5198 (int)centroids[j * 2 + i - 1], cpi->common.bit_depth);
5199 else
Yaowu Xuc27fc142016-08-22 16:08:15 -07005200 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
5201 clip_pixel((int)centroids[j * 2 + i - 1]);
5202 }
5203 }
5204
Angie Chiang284d7772016-11-08 11:06:45 -08005205 super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
5206 if (tokenonly_rd_stats.rate == INT_MAX) continue;
Hui Sub6d058d2018-01-18 14:12:36 -08005207 this_rate = tokenonly_rd_stats.rate +
5208 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, dc_mode_cost);
Urvang Joshi70006e42017-06-14 16:08:55 -07005209 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005210 if (this_rd < *best_rd) {
5211 *best_rd = this_rd;
hui su83c26632017-01-24 17:19:06 -08005212 *best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005213 memcpy(best_palette_color_map, color_map,
Urvang Joshi56ba91b2017-01-10 13:22:09 -08005214 plane_block_width * plane_block_height *
5215 sizeof(best_palette_color_map[0]));
Yaowu Xuc27fc142016-08-22 16:08:15 -07005216 *rate = this_rate;
Angie Chiang284d7772016-11-08 11:06:45 -08005217 *distortion = tokenonly_rd_stats.dist;
5218 *rate_tokenonly = tokenonly_rd_stats.rate;
5219 *skippable = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005220 }
5221 }
5222 }
hui su83c26632017-01-24 17:19:06 -08005223 if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
hui sude0c70a2017-01-09 17:12:17 -08005224 memcpy(color_map, best_palette_color_map,
Luc Trudeau0401e892017-08-31 00:37:11 -04005225 plane_block_width * plane_block_height *
5226 sizeof(best_palette_color_map[0]));
hui sude0c70a2017-01-09 17:12:17 -08005227 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005228}
5229
hui su45dc5972016-12-08 17:42:50 -08005230// Run RD calculation with given chroma intra prediction angle., and return
5231// the RD cost. Update the best mode info. if the RD cost is the best so far.
5232static int64_t pick_intra_angle_routine_sbuv(
5233 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
5234 int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
5235 int *best_angle_delta, int64_t *best_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005236 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005237 assert(!is_inter_block(mbmi));
Angie Chiang284d7772016-11-08 11:06:45 -08005238 int this_rate;
5239 int64_t this_rd;
5240 RD_STATS tokenonly_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005241
hui su45dc5972016-12-08 17:42:50 -08005242 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
5243 return INT64_MAX;
Hui Sub6d058d2018-01-18 14:12:36 -08005244 this_rate = tokenonly_rd_stats.rate +
5245 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead);
Urvang Joshi70006e42017-06-14 16:08:55 -07005246 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005247 if (this_rd < *best_rd) {
5248 *best_rd = this_rd;
5249 *best_angle_delta = mbmi->angle_delta[1];
5250 *rate = this_rate;
hui su45dc5972016-12-08 17:42:50 -08005251 rd_stats->rate = tokenonly_rd_stats.rate;
5252 rd_stats->dist = tokenonly_rd_stats.dist;
5253 rd_stats->skip = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005254 }
hui su45dc5972016-12-08 17:42:50 -08005255 return this_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005256}
5257
hui su45dc5972016-12-08 17:42:50 -08005258// With given chroma directional intra prediction mode, pick the best angle
5259// delta. Return true if a RD cost that is smaller than the input one is found.
Urvang Joshi52648442016-10-13 17:27:51 -07005260static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
Urvang Joshi52648442016-10-13 17:27:51 -07005261 BLOCK_SIZE bsize, int rate_overhead,
hui su45dc5972016-12-08 17:42:50 -08005262 int64_t best_rd, int *rate,
5263 RD_STATS *rd_stats) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005264 MACROBLOCKD *const xd = &x->e_mbd;
5265 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005266 assert(!is_inter_block(mbmi));
hui su45dc5972016-12-08 17:42:50 -08005267 int i, angle_delta, best_angle_delta = 0;
hui su0a6731f2017-04-26 15:23:47 -07005268 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005269
hui su45dc5972016-12-08 17:42:50 -08005270 rd_stats->rate = INT_MAX;
5271 rd_stats->skip = 0;
5272 rd_stats->dist = INT64_MAX;
hui su0a6731f2017-04-26 15:23:47 -07005273 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005274
hui su0a6731f2017-04-26 15:23:47 -07005275 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
hui su45dc5972016-12-08 17:42:50 -08005276 for (i = 0; i < 2; ++i) {
5277 best_rd_in = (best_rd == INT64_MAX)
5278 ? INT64_MAX
5279 : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
5280 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
5281 this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
5282 best_rd_in, rate, rd_stats,
5283 &best_angle_delta, &best_rd);
5284 rd_cost[2 * angle_delta + i] = this_rd;
5285 if (angle_delta == 0) {
5286 if (this_rd == INT64_MAX) return 0;
5287 rd_cost[1] = this_rd;
5288 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005289 }
5290 }
hui su45dc5972016-12-08 17:42:50 -08005291 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005292
hui su45dc5972016-12-08 17:42:50 -08005293 assert(best_rd != INT64_MAX);
hui su0a6731f2017-04-26 15:23:47 -07005294 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
hui su45dc5972016-12-08 17:42:50 -08005295 int64_t rd_thresh;
5296 for (i = 0; i < 2; ++i) {
5297 int skip_search = 0;
5298 rd_thresh = best_rd + (best_rd >> 5);
5299 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
5300 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
5301 skip_search = 1;
5302 if (!skip_search) {
5303 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
Yue Chenb0f808b2017-04-26 11:55:14 -07005304 pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
5305 rate, rd_stats, &best_angle_delta,
5306 &best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005307 }
5308 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005309 }
5310
5311 mbmi->angle_delta[1] = best_angle_delta;
hui su45dc5972016-12-08 17:42:50 -08005312 return rd_stats->rate != INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005313}
Yaowu Xuc27fc142016-08-22 16:08:15 -07005314
David Michael Barr2510f642017-07-11 23:39:20 +09005315#if CONFIG_CFL
David Michael Barrf2f31562017-10-31 22:58:15 +09005316#define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \
5317 (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1)
David Michael Barr1f8d0952017-10-11 17:46:39 +09005318static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
Luc Trudeaud1941f32017-11-22 14:17:21 -05005319 TX_SIZE tx_size, int64_t best_rd) {
David Michael Barr2510f642017-07-11 23:39:20 +09005320 MACROBLOCKD *const xd = &x->e_mbd;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005321 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Luc Trudeaud1941f32017-11-22 14:17:21 -05005322
5323 const BLOCK_SIZE bsize = mbmi->sb_type;
5324#if CONFIG_DEBUG
Luc Trudeaue425f472017-12-08 14:48:46 -05005325 assert(is_cfl_allowed(mbmi));
Debargha Mukherjee5d149e12017-12-14 12:49:51 -08005326 const BLOCK_SIZE plane_bsize =
5327 get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]);
Luc Trudeaud1941f32017-11-22 14:17:21 -05005328 assert(plane_bsize < BLOCK_SIZES_ALL);
Debargha Mukherjee80592c72017-12-16 08:23:34 -08005329 if (!xd->lossless[mbmi->segment_id]) {
5330 assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
5331 assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
5332 }
Luc Trudeaud1941f32017-11-22 14:17:21 -05005333#endif
David Michael Barr2510f642017-07-11 23:39:20 +09005334
Luc Trudeau467205a2017-12-12 23:23:47 -05005335 xd->cfl.use_dc_pred_cache = 1;
David Michael Barrf2f31562017-10-31 22:58:15 +09005336 const int64_t mode_rd =
David Michael Barrcb3a8ef2018-01-06 15:48:49 +09005337 RDCOST(x->rdmult,
5338 x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED], 0);
David Michael Barrf2f31562017-10-31 22:58:15 +09005339 int64_t best_rd_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
5340 int best_c[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
5341#if CONFIG_DEBUG
5342 int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
5343#endif // CONFIG_DEBUG
David Michael Barr2510f642017-07-11 23:39:20 +09005344
David Michael Barrf2f31562017-10-31 22:58:15 +09005345 for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
5346 RD_STATS rd_stats;
5347 av1_init_rd_stats(&rd_stats);
5348 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
5349 best_rd_uv[joint_sign][plane] = INT64_MAX;
5350 best_c[joint_sign][plane] = 0;
5351 }
5352 // Collect RD stats for an alpha value of zero in this plane.
5353 // Skip i == CFL_SIGN_ZERO as (0, 0) is invalid.
5354 for (int i = CFL_SIGN_NEG; i < CFL_SIGNS; i++) {
5355 const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, CFL_SIGN_ZERO, i);
5356 if (i == CFL_SIGN_NEG) {
5357 mbmi->cfl_alpha_idx = 0;
5358 mbmi->cfl_alpha_signs = joint_sign;
5359 txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize, tx_size,
5360 cpi->sf.use_fast_coef_costing);
5361 if (rd_stats.rate == INT_MAX) break;
5362 }
5363 const int alpha_rate = x->cfl_cost[joint_sign][plane][0];
5364 best_rd_uv[joint_sign][plane] =
5365 RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist);
5366#if CONFIG_DEBUG
5367 best_rate_uv[joint_sign][plane] = rd_stats.rate;
5368#endif // CONFIG_DEBUG
David Michael Barr1f8d0952017-10-11 17:46:39 +09005369 }
David Michael Barr2510f642017-07-11 23:39:20 +09005370 }
5371
David Michael Barrf2f31562017-10-31 22:58:15 +09005372 int best_joint_sign = -1;
David Michael Barr2510f642017-07-11 23:39:20 +09005373
David Michael Barrf2f31562017-10-31 22:58:15 +09005374 for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
5375 for (int pn_sign = CFL_SIGN_NEG; pn_sign < CFL_SIGNS; pn_sign++) {
David Michael Barr2fae28b2017-11-30 19:18:40 +09005376 int progress = 0;
David Michael Barrf2f31562017-10-31 22:58:15 +09005377 for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
David Michael Barr2fae28b2017-11-30 19:18:40 +09005378 int flag = 0;
David Michael Barrf2f31562017-10-31 22:58:15 +09005379 RD_STATS rd_stats;
David Michael Barr2fae28b2017-11-30 19:18:40 +09005380 if (c > 2 && progress < c) break;
David Michael Barrf2f31562017-10-31 22:58:15 +09005381 av1_init_rd_stats(&rd_stats);
5382 for (int i = 0; i < CFL_SIGNS; i++) {
5383 const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, pn_sign, i);
5384 if (i == 0) {
5385 mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c;
5386 mbmi->cfl_alpha_signs = joint_sign;
5387 txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize,
5388 tx_size, cpi->sf.use_fast_coef_costing);
5389 if (rd_stats.rate == INT_MAX) break;
5390 }
5391 const int alpha_rate = x->cfl_cost[joint_sign][plane][c];
5392 int64_t this_rd =
5393 RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist);
5394 if (this_rd >= best_rd_uv[joint_sign][plane]) continue;
5395 best_rd_uv[joint_sign][plane] = this_rd;
5396 best_c[joint_sign][plane] = c;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005397#if CONFIG_DEBUG
David Michael Barrf2f31562017-10-31 22:58:15 +09005398 best_rate_uv[joint_sign][plane] = rd_stats.rate;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005399#endif // CONFIG_DEBUG
David Michael Barr2fae28b2017-11-30 19:18:40 +09005400 flag = 2;
David Michael Barrf2f31562017-10-31 22:58:15 +09005401 if (best_rd_uv[joint_sign][!plane] == INT64_MAX) continue;
5402 this_rd += mode_rd + best_rd_uv[joint_sign][!plane];
5403 if (this_rd >= best_rd) continue;
5404 best_rd = this_rd;
5405 best_joint_sign = joint_sign;
David Michael Barr2510f642017-07-11 23:39:20 +09005406 }
David Michael Barr2fae28b2017-11-30 19:18:40 +09005407 progress += flag;
David Michael Barr2510f642017-07-11 23:39:20 +09005408 }
5409 }
5410 }
5411
David Michael Barrf2f31562017-10-31 22:58:15 +09005412 int best_rate_overhead = INT_MAX;
5413 int ind = 0;
5414 if (best_joint_sign >= 0) {
5415 const int u = best_c[best_joint_sign][CFL_PRED_U];
5416 const int v = best_c[best_joint_sign][CFL_PRED_V];
5417 ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
5418 best_rate_overhead = x->cfl_cost[best_joint_sign][CFL_PRED_U][u] +
5419 x->cfl_cost[best_joint_sign][CFL_PRED_V][v];
David Michael Barr1f8d0952017-10-11 17:46:39 +09005420#if CONFIG_DEBUG
David Michael Barrcb3a8ef2018-01-06 15:48:49 +09005421 xd->cfl.rate = x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED] +
David Michael Barrf2f31562017-10-31 22:58:15 +09005422 best_rate_overhead +
5423 best_rate_uv[best_joint_sign][CFL_PRED_U] +
5424 best_rate_uv[best_joint_sign][CFL_PRED_V];
David Michael Barr1f8d0952017-10-11 17:46:39 +09005425#endif // CONFIG_DEBUG
David Michael Barrf2f31562017-10-31 22:58:15 +09005426 } else {
5427 best_joint_sign = 0;
5428 }
5429
5430 mbmi->cfl_alpha_idx = ind;
5431 mbmi->cfl_alpha_signs = best_joint_sign;
Luc Trudeau467205a2017-12-12 23:23:47 -05005432 xd->cfl.use_dc_pred_cache = 0;
5433 xd->cfl.dc_pred_is_cached[0] = 0;
5434 xd->cfl.dc_pred_is_cached[1] = 0;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005435 return best_rate_overhead;
David Michael Barr2510f642017-07-11 23:39:20 +09005436}
5437#endif // CONFIG_CFL
5438
hui sueaddeee2017-05-30 12:19:38 -07005439static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005440 mbmi->uv_mode = UV_DC_PRED;
hui sueaddeee2017-05-30 12:19:38 -07005441 mbmi->palette_mode_info.palette_size[1] = 0;
hui sueaddeee2017-05-30 12:19:38 -07005442}
5443
Urvang Joshi52648442016-10-13 17:27:51 -07005444static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
5445 int *rate, int *rate_tokenonly,
5446 int64_t *distortion, int *skippable,
5447 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005448 MACROBLOCKD *xd = &x->e_mbd;
5449 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005450 assert(!is_inter_block(mbmi));
hui su83c26632017-01-24 17:19:06 -08005451 MB_MODE_INFO best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005452 int64_t best_rd = INT64_MAX, this_rd;
hui su5db97432016-10-14 16:10:14 -07005453
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005454 for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
hui su8a516a82017-07-06 10:00:36 -07005455 int this_rate;
5456 RD_STATS tokenonly_rd_stats;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005457 UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
hui su83c26632017-01-24 17:19:06 -08005458 const int is_directional_mode =
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005459 av1_is_directional_mode(get_uv_mode(mode), mbmi->sb_type);
Urvang Joshifeb925f2016-12-05 10:37:29 -08005460 if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
5461 (1 << mode)))
5462 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005463
5464 mbmi->uv_mode = mode;
David Michael Barr2510f642017-07-11 23:39:20 +09005465#if CONFIG_CFL
5466 int cfl_alpha_rate = 0;
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005467 if (mode == UV_CFL_PRED) {
Luc Trudeaue425f472017-12-08 14:48:46 -05005468 if (!is_cfl_allowed(mbmi)) continue;
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005469 assert(!is_directional_mode);
Debargha Mukherjee80592c72017-12-16 08:23:34 -08005470 const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
Luc Trudeaud1941f32017-11-22 14:17:21 -05005471 cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd);
David Michael Barr91345862017-10-14 22:20:36 +09005472 if (cfl_alpha_rate == INT_MAX) continue;
David Michael Barr2510f642017-07-11 23:39:20 +09005473 }
5474#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07005475 mbmi->angle_delta[1] = 0;
Joe Young830d4ce2017-05-30 17:48:13 -07005476 if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
David Michael Barrcb3a8ef2018-01-06 15:48:49 +09005477#if CONFIG_CFL
5478 const int rate_overhead =
5479 x->intra_uv_mode_cost[is_cfl_allowed(mbmi)][mbmi->mode][mode] +
5480#else
Yue Chenb23d00a2017-07-28 17:01:21 -07005481 const int rate_overhead = x->intra_uv_mode_cost[mbmi->mode][mode] +
David Michael Barrcb3a8ef2018-01-06 15:48:49 +09005482#endif // CONFIG_CFL
Joe Young3ca43bf2017-10-06 15:12:46 -07005483#if CONFIG_EXT_INTRA_MOD
David Michael Barrcb3a8ef2018-01-06 15:48:49 +09005484 0;
Joe Young3ca43bf2017-10-06 15:12:46 -07005485#else
hui su0a6731f2017-04-26 15:23:47 -07005486 write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
Joe Young3ca43bf2017-10-06 15:12:46 -07005487#endif // CONFIG_EXT_INTRA_MOD
hui su45dc5972016-12-08 17:42:50 -08005488 if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
5489 &this_rate, &tokenonly_rd_stats))
Yaowu Xuc27fc142016-08-22 16:08:15 -07005490 continue;
5491 } else {
Angie Chiang284d7772016-11-08 11:06:45 -08005492 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005493 continue;
Yushin Cho77bba8d2016-11-04 16:36:56 -07005494 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005495 }
Hui Sub6d058d2018-01-18 14:12:36 -08005496 const int mode_cost =
David Michael Barrcb3a8ef2018-01-06 15:48:49 +09005497#if CONFIG_CFL
Hui Sub6d058d2018-01-18 14:12:36 -08005498 x->intra_uv_mode_cost[is_cfl_allowed(mbmi)][mbmi->mode][mode] +
5499 cfl_alpha_rate;
David Michael Barrcb3a8ef2018-01-06 15:48:49 +09005500#else
Hui Sub6d058d2018-01-18 14:12:36 -08005501 x->intra_uv_mode_cost[mbmi->mode][mode];
David Michael Barrcb3a8ef2018-01-06 15:48:49 +09005502#endif
Hui Sub6d058d2018-01-18 14:12:36 -08005503 this_rate = tokenonly_rd_stats.rate +
5504 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost);
Luc Trudeaudff41922017-07-07 09:47:58 -04005505#if CONFIG_CFL
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005506 if (mode == UV_CFL_PRED) {
Luc Trudeaue425f472017-12-08 14:48:46 -05005507 assert(is_cfl_allowed(mbmi));
David Michael Barr1f8d0952017-10-11 17:46:39 +09005508#if CONFIG_DEBUG
Hui Sub6d058d2018-01-18 14:12:36 -08005509 if (!xd->lossless[mbmi->segment_id])
5510 assert(xd->cfl.rate == tokenonly_rd_stats.rate + mode_cost);
David Michael Barr1f8d0952017-10-11 17:46:39 +09005511#endif // CONFIG_DEBUG
Luc Trudeaudff41922017-07-07 09:47:58 -04005512 }
5513#endif
Urvang Joshi70006e42017-06-14 16:08:55 -07005514 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005515
5516 if (this_rd < best_rd) {
hui su83c26632017-01-24 17:19:06 -08005517 best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005518 best_rd = this_rd;
5519 *rate = this_rate;
Angie Chiang284d7772016-11-08 11:06:45 -08005520 *rate_tokenonly = tokenonly_rd_stats.rate;
5521 *distortion = tokenonly_rd_stats.dist;
5522 *skippable = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005523 }
5524 }
5525
Hui Sub6d058d2018-01-18 14:12:36 -08005526 const int try_palette =
5527 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
Rupert Swarbrick6f9cd942017-08-02 15:57:18 +01005528 if (try_palette) {
hui su8a516a82017-07-06 10:00:36 -07005529 uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
David Michael Barrcb3a8ef2018-01-06 15:48:49 +09005530 rd_pick_palette_intra_sbuv(
5531 cpi, x,
5532#if CONFIG_CFL
5533 x->intra_uv_mode_cost[is_cfl_allowed(mbmi)][mbmi->mode][UV_DC_PRED],
5534#else
5535 x->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED],
5536#endif
5537 best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly,
5538 distortion, skippable);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005539 }
5540
hui su83c26632017-01-24 17:19:06 -08005541 *mbmi = best_mbmi;
Urvang Joshifeb925f2016-12-05 10:37:29 -08005542 // Make sure we actually chose a mode
5543 assert(best_rd < INT64_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005544 return best_rd;
5545}
5546
Urvang Joshi52648442016-10-13 17:27:51 -07005547static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
Luc Trudeau9d4cbb82017-07-27 17:01:32 -04005548 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
5549 int *rate_uv, int *rate_uv_tokenonly,
5550 int64_t *dist_uv, int *skip_uv,
5551 UV_PREDICTION_MODE *mode_uv) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005552 MACROBLOCKD *xd = &x->e_mbd;
5553 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005554 // Use an estimated rd for uv_intra based on DC_PRED if the
5555 // appropriate speed flag is set.
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005556 init_sbuv_mode(mbmi);
Jingning Han9ce464c2017-02-20 15:36:30 -08005557 if (x->skip_chroma_rd) {
5558 *rate_uv = 0;
5559 *rate_uv_tokenonly = 0;
5560 *dist_uv = 0;
5561 *skip_uv = 1;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005562 *mode_uv = UV_DC_PRED;
Jingning Han9ce464c2017-02-20 15:36:30 -08005563 return;
5564 }
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005565 bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
5566 xd->plane[AOM_PLANE_U].subsampling_y);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005567#if CONFIG_CFL
5568 // Only store reconstructed luma when there's chroma RDO. When there's no
5569 // chroma RDO, the reconstructed luma will be stored in encode_superblock().
Luc Trudeau1e84af52017-11-25 15:00:28 -05005570 xd->cfl.store_y = !x->skip_chroma_rd;
5571 if (xd->cfl.store_y) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005572 // Perform one extra call to txfm_rd_in_plane(), with the values chosen
5573 // during luma RDO, so we can store reconstructed luma values
5574 RD_STATS this_rd_stats;
5575 txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
5576 mbmi->sb_type, mbmi->tx_size,
5577 cpi->sf.use_fast_coef_costing);
Luc Trudeau1e84af52017-11-25 15:00:28 -05005578 xd->cfl.store_y = 0;
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005579 }
5580#endif // CONFIG_CFL
Luc Trudeau9d4cbb82017-07-27 17:01:32 -04005581 rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
5582 bsize, max_tx_size);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005583 *mode_uv = mbmi->uv_mode;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005584}
5585
Yue Chenb23d00a2017-07-28 17:01:21 -07005586static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode,
Yaowu Xuc27fc142016-08-22 16:08:15 -07005587 int16_t mode_context) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005588 if (is_inter_compound_mode(mode)) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005589 return x
clang-format55ce9e02017-02-15 22:27:12 -08005590 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005591 }
David Barkercb03dc32017-04-07 13:05:09 +01005592
David Barkercb03dc32017-04-07 13:05:09 +01005593 int mode_cost = 0;
5594 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
5595 int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
5596
5597 assert(is_inter_mode(mode));
5598
5599 if (mode == NEWMV) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005600 mode_cost = x->newmv_mode_cost[mode_ctx][0];
David Barkercb03dc32017-04-07 13:05:09 +01005601 return mode_cost;
5602 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07005603 mode_cost = x->newmv_mode_cost[mode_ctx][1];
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005604 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
David Barkercb03dc32017-04-07 13:05:09 +01005605
5606 if (is_all_zero_mv) return mode_cost;
5607
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005608 if (mode == GLOBALMV) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005609 mode_cost += x->zeromv_mode_cost[mode_ctx][0];
David Barkercb03dc32017-04-07 13:05:09 +01005610 return mode_cost;
5611 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07005612 mode_cost += x->zeromv_mode_cost[mode_ctx][1];
David Barkercb03dc32017-04-07 13:05:09 +01005613 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
5614
5615 if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
5616 if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
5617 if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
5618
Yue Chenb23d00a2017-07-28 17:01:21 -07005619 mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
David Barkercb03dc32017-04-07 13:05:09 +01005620 return mode_cost;
5621 }
5622 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005623}
5624
Sarah Parker6fdc8532016-11-16 17:47:13 -08005625static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
5626 COMPOUND_TYPE comp_type) {
Debargha Mukherjeec5f735f2017-04-26 03:25:28 +00005627 (void)bsize;
Sarah Parker6fdc8532016-11-16 17:47:13 -08005628 switch (comp_type) {
5629 case COMPOUND_AVERAGE: return 0;
5630 case COMPOUND_WEDGE: return get_interinter_wedge_bits(bsize);
Sarah Parker569edda2016-12-14 14:57:38 -08005631 case COMPOUND_SEG: return 1;
Sarah Parker6fdc8532016-11-16 17:47:13 -08005632 default: assert(0); return 0;
5633 }
5634}
Sarah Parker6fdc8532016-11-16 17:47:13 -08005635
Yaowu Xuc27fc142016-08-22 16:08:15 -07005636typedef struct {
5637 int eobs;
5638 int brate;
5639 int byrate;
5640 int64_t bdist;
5641 int64_t bsse;
5642 int64_t brdcost;
5643 int_mv mvs[2];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005644 int_mv pred_mv[2];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005645 int_mv ref_mv[2];
Jingning Han276c2942016-12-05 12:37:02 -08005646
Yaowu Xuc27fc142016-08-22 16:08:15 -07005647 ENTROPY_CONTEXT ta[2];
5648 ENTROPY_CONTEXT tl[2];
5649} SEG_RDSTAT;
5650
5651typedef struct {
5652 int_mv *ref_mv[2];
5653 int_mv mvp;
5654
5655 int64_t segment_rd;
5656 int r;
5657 int64_t d;
5658 int64_t sse;
5659 int segment_yrate;
5660 PREDICTION_MODE modes[4];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005661 SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005662 int mvthresh;
5663} BEST_SEG_INFO;
5664
Alex Converse0fa0f422017-04-24 12:51:14 -07005665static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
5666 return (mv->row >> 3) < mv_limits->row_min ||
5667 (mv->row >> 3) > mv_limits->row_max ||
5668 (mv->col >> 3) < mv_limits->col_min ||
5669 (mv->col >> 3) > mv_limits->col_max;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005670}
5671
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005672// Check if NEARESTMV/NEARMV/GLOBALMV is the cheapest way encode zero motion.
Yaowu Xuc27fc142016-08-22 16:08:15 -07005673// TODO(aconverse): Find out if this is still productive then clean up or remove
5674static int check_best_zero_mv(
Yue Chenb23d00a2017-07-28 17:01:21 -07005675 const AV1_COMP *const cpi, const MACROBLOCK *const x,
5676 const int16_t mode_context[TOTAL_REFS_PER_FRAME],
Yaowu Xuc27fc142016-08-22 16:08:15 -07005677 const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
Yaowu Xuc27fc142016-08-22 16:08:15 -07005678 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
Luc Trudeau15a18e32017-12-13 14:15:25 -05005679 const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int mi_row,
5680 int mi_col) {
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005681 int_mv zeromv[2] = { {.as_int = 0 } };
Sarah Parkerc2d38712017-01-24 15:15:41 -08005682 int comp_pred_mode = ref_frames[1] > INTRA_FRAME;
David Barker45390c12017-02-20 14:44:40 +00005683 (void)mi_row;
5684 (void)mi_col;
Zoe Liubc030ee2017-07-31 15:20:46 -07005685 (void)cpi;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005686 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005687 for (int cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
Sarah Parkerc2d38712017-01-24 15:15:41 -08005688 zeromv[cur_frm].as_int =
5689 gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
Sarah Parkerae7c4582017-02-28 16:30:30 -08005690 cpi->common.allow_high_precision_mv, bsize,
Luc Trudeau2eb9b842017-12-13 11:19:16 -05005691 mi_col, mi_row
RogerZhou3b635242017-09-19 10:06:46 -07005692#if CONFIG_AMVR
5693 ,
RogerZhou10a03802017-10-26 11:49:48 -07005694 cpi->common.cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07005695#endif
5696 )
Sarah Parkerc2d38712017-01-24 15:15:41 -08005697 .as_int;
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005698 }
Sarah Parkerc2d38712017-01-24 15:15:41 -08005699 }
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005700
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005701 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
5702 this_mode == GLOBALMV) &&
Sarah Parkerc2d38712017-01-24 15:15:41 -08005703 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07005704 (ref_frames[1] <= INTRA_FRAME ||
Sarah Parkerc2d38712017-01-24 15:15:41 -08005705 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) {
Luc Trudeau15a18e32017-12-13 14:15:25 -05005706 int16_t rfc = av1_mode_context_analyzer(mode_context, ref_frames);
Yue Chenb23d00a2017-07-28 17:01:21 -07005707 int c1 = cost_mv_ref(x, NEARMV, rfc);
5708 int c2 = cost_mv_ref(x, NEARESTMV, rfc);
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005709 int c3 = cost_mv_ref(x, GLOBALMV, rfc);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005710
Yaowu Xuc27fc142016-08-22 16:08:15 -07005711 if (this_mode == NEARMV) {
5712 if (c1 > c3) return 0;
5713 } else if (this_mode == NEARESTMV) {
5714 if (c2 > c3) return 0;
5715 } else {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005716 assert(this_mode == GLOBALMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005717 if (ref_frames[1] <= INTRA_FRAME) {
5718 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
5719 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
5720 return 0;
5721 } else {
5722 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
5723 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
5724 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
5725 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
5726 return 0;
5727 }
5728 }
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005729 } else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005730 this_mode == GLOBAL_GLOBALMV) &&
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005731 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
5732 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005733 int16_t rfc = compound_mode_context[ref_frames[0]];
Yue Chenb23d00a2017-07-28 17:01:21 -07005734 int c2 = cost_mv_ref(x, NEAREST_NEARESTMV, rfc);
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005735 int c3 = cost_mv_ref(x, GLOBAL_GLOBALMV, rfc);
Yue Chenb23d00a2017-07-28 17:01:21 -07005736 int c5 = cost_mv_ref(x, NEAR_NEARMV, rfc);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005737
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -07005738 if (this_mode == NEAREST_NEARESTMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005739 if (c2 > c3) return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005740 } else if (this_mode == NEAR_NEARMV) {
5741 if (c5 > c3) return 0;
5742 } else {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005743 assert(this_mode == GLOBAL_GLOBALMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005744 if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
5745 frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
Yaowu Xuc27fc142016-08-22 16:08:15 -07005746 (c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 &&
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -07005747 frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0))
Yaowu Xuc27fc142016-08-22 16:08:15 -07005748 return 0;
5749 }
5750 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005751 return 1;
5752}
5753
Urvang Joshi52648442016-10-13 17:27:51 -07005754static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005755 BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
5756 int mi_col, int_mv *ref_mv_sub8x8[2],
5757 const uint8_t *mask, int mask_stride,
5758 int *rate_mv, const int block) {
Yaowu Xuf883b422016-08-30 14:01:10 -07005759 const AV1_COMMON *const cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00005760 const int num_planes = av1_num_planes(cm);
Jingning Hanae5cfde2016-11-30 12:01:44 -08005761 const int pw = block_size_wide[bsize];
5762 const int ph = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005763 MACROBLOCKD *xd = &x->e_mbd;
5764 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005765 // This function should only ever be called for compound modes
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005766 assert(has_second_ref(mbmi));
Zoe Liu122f3942017-04-25 11:18:38 -07005767 const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
Yaowu Xuc27fc142016-08-22 16:08:15 -07005768 int_mv ref_mv[2];
5769 int ite, ref;
James Zern89a015b2017-08-08 12:39:00 -04005770 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005771 const int ic = block & 1;
5772 const int ir = (block - ic) >> 1;
Jingning Hancb637672017-06-22 09:14:40 -07005773 struct macroblockd_plane *const pd = &xd->plane[0];
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005774 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
5775 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
5776 int is_global[2];
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005777 for (ref = 0; ref < 2; ++ref) {
Luc Trudeauf3bf8b12017-12-08 14:38:41 -05005778 const WarpedMotionParams *const wm =
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005779 &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
Luc Trudeau2eb9b842017-12-13 11:19:16 -05005780 is_global[ref] = is_global_mv_block(xd->mi[0], wm->wmtype);
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005781 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005782
5783 // Do joint motion search in compound mode to get more accurate mv.
5784 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
5785 int last_besterr[2] = { INT_MAX, INT_MAX };
5786 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
Zoe Liu122f3942017-04-25 11:18:38 -07005787 av1_get_scaled_ref_frame(cpi, refs[0]),
5788 av1_get_scaled_ref_frame(cpi, refs[1])
Yaowu Xuc27fc142016-08-22 16:08:15 -07005789 };
5790
Yaowu Xud3e7c682017-12-21 14:08:25 -08005791 // Prediction buffer from second frame.
Cheng Chenefc55fd2017-10-10 12:08:28 -07005792 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
5793 uint8_t *second_pred;
Jingning Han61418bb2017-01-23 17:12:48 -08005794 (void)ref_mv_sub8x8;
Jingning Han61418bb2017-01-23 17:12:48 -08005795
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005796 for (ref = 0; ref < 2; ++ref) {
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07005797 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005798
5799 if (scaled_ref_frame[ref]) {
5800 int i;
5801 // Swap out the reference frame for a version that's been scaled to
5802 // match the resolution of the current frame, allowing the existing
5803 // motion search code to be used without additional modifications.
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00005804 for (i = 0; i < num_planes; i++)
Yaowu Xuc27fc142016-08-22 16:08:15 -07005805 backup_yv12[ref][i] = xd->plane[i].pre[ref];
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00005806 av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL,
5807 num_planes);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005808 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005809 }
5810
Urvang Joshi48d3e632018-01-09 16:47:13 -08005811 assert(IMPLIES(scaled_ref_frame[0] != NULL,
5812 cm->width == scaled_ref_frame[0]->y_crop_width &&
5813 cm->height == scaled_ref_frame[0]->y_crop_height));
5814 assert(IMPLIES(scaled_ref_frame[1] != NULL,
5815 cm->width == scaled_ref_frame[1]->y_crop_width &&
5816 cm->height == scaled_ref_frame[1]->y_crop_height));
Yaowu Xuc27fc142016-08-22 16:08:15 -07005817
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005818 // Allow joint search multiple times iteratively for each reference frame
5819 // and break out of the search loop if it couldn't find a better mv.
5820 for (ite = 0; ite < 4; ite++) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005821 struct buf_2d ref_yv12[2];
5822 int bestsme = INT_MAX;
5823 int sadpb = x->sadperbit16;
5824 MV *const best_mv = &x->best_mv.as_mv;
5825 int search_range = 3;
5826
Alex Converse0fa0f422017-04-24 12:51:14 -07005827 MvLimits tmp_mv_limits = x->mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005828 int id = ite % 2; // Even iterations search in the first reference frame,
5829 // odd iterations search in the second. The predictor
5830 // found for the 'other' reference frame is factored in.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005831 const int plane = 0;
David Barkere64d51a2017-06-09 14:52:42 +01005832 ConvolveParams conv_params = get_conv_params(!id, 0, plane);
Cheng Chenefc55fd2017-10-10 12:08:28 -07005833#if CONFIG_JNT_COMP
Cheng Chen8b1732a2017-11-22 18:38:49 -08005834 conv_params.use_jnt_comp_avg = 0;
Cheng Chenefc55fd2017-10-10 12:08:28 -07005835#endif
Sarah Parker4c10a3c2017-04-10 19:37:59 -07005836 WarpTypesAllowed warp_types;
Sarah Parker4c10a3c2017-04-10 19:37:59 -07005837 warp_types.global_warp_allowed = is_global[!id];
Sarah Parker4c10a3c2017-04-10 19:37:59 -07005838 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005839
5840 // Initialized here because of compiler problem in Visual Studio.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005841 ref_yv12[0] = xd->plane[plane].pre[0];
5842 ref_yv12[1] = xd->plane[plane].pre[1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005843
Yaowu Xuc27fc142016-08-22 16:08:15 -07005844// Get the prediction block from the 'other' reference frame.
Cheng Chenefc55fd2017-10-10 12:08:28 -07005845#if CONFIG_JNT_COMP
5846 InterpFilters interp_filters = EIGHTTAP_REGULAR;
5847#endif // CONFIG_JNT_COMP
Zoe Liu85b66462017-04-20 14:28:19 -07005848
Hui Su5ebd8702018-01-08 18:09:20 -08005849 // Since we have scaled the reference frames to match the size of the
5850 // current frame we must use a unit scaling factor during mode selection.
Yaowu Xuc27fc142016-08-22 16:08:15 -07005851 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5852 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
Yaowu Xuf883b422016-08-30 14:01:10 -07005853 av1_highbd_build_inter_predictor(
Yaowu Xuc27fc142016-08-22 16:08:15 -07005854 ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
Zoe Liu85b66462017-04-20 14:28:19 -07005855 &frame_mv[refs[!id]].as_mv,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005856#if CONFIG_JNT_COMP
Hui Su5ebd8702018-01-08 18:09:20 -08005857 &cm->sf_identity, pw, ph, 0, interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005858#else
Hui Su5ebd8702018-01-08 18:09:20 -08005859 &cm->sf_identity, pw, ph, 0, mbmi->interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005860#endif // CONFIG_JNT_COMP
Sebastien Alaiwan48795802017-10-30 12:07:13 +01005861 &warp_types, p_col, p_row, plane, MV_PRECISION_Q3, mi_col * MI_SIZE,
5862 mi_row * MI_SIZE, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005863 } else {
5864 second_pred = (uint8_t *)second_pred_alloc_16;
Hui Su5ebd8702018-01-08 18:09:20 -08005865 av1_build_inter_predictor(
5866 ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
5867 &frame_mv[refs[!id]].as_mv,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005868#if CONFIG_JNT_COMP
Hui Su5ebd8702018-01-08 18:09:20 -08005869 &cm->sf_identity, pw, ph, &conv_params, interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005870#else
Hui Su5ebd8702018-01-08 18:09:20 -08005871 &cm->sf_identity, pw, ph, &conv_params, mbmi->interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005872#endif // CONFIG_JNT_COMP
Hui Su5ebd8702018-01-08 18:09:20 -08005873 &warp_types, p_col, p_row, plane, !id, MV_PRECISION_Q3,
5874 mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005875 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005876
Cheng Chenefc55fd2017-10-10 12:08:28 -07005877#if CONFIG_JNT_COMP
5878 const int order_idx = id != 0;
Cheng Chenf78632e2017-10-20 15:30:51 -07005879 av1_jnt_comp_weight_assign(cm, mbmi, order_idx, &xd->jcp_param.fwd_offset,
Cheng Chen8263f802017-11-14 15:50:00 -08005880 &xd->jcp_param.bck_offset,
5881 &xd->jcp_param.use_jnt_comp_avg, 1);
Cheng Chenefc55fd2017-10-10 12:08:28 -07005882#endif // CONFIG_JNT_COMP
5883
Yaowu Xuc27fc142016-08-22 16:08:15 -07005884 // Do compound motion search on the current reference frame.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005885 if (id) xd->plane[plane].pre[0] = ref_yv12[id];
Alex Converse0fa0f422017-04-24 12:51:14 -07005886 av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005887
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005888 // Use the mv result from the single mode as mv predictor.
5889 // Use the mv result from the single mode as mv predictor.
5890 *best_mv = frame_mv[refs[id]].as_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005891
5892 best_mv->col >>= 3;
5893 best_mv->row >>= 3;
5894
Imdad Sardharwallac23ad632017-11-28 14:12:38 +00005895 av1_set_mvcost(
5896 x, refs[id], id,
5897 mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
Yaowu Xuc27fc142016-08-22 16:08:15 -07005898
5899 // Small-range full-pixel motion search.
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005900 bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
5901 &cpi->fn_ptr[bsize], mask, mask_stride,
5902 id, &ref_mv[id].as_mv, second_pred);
David Barkerc155e012017-05-11 13:54:54 +01005903 if (bestsme < INT_MAX) {
David Barkerc155e012017-05-11 13:54:54 +01005904 if (mask)
5905 bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
5906 second_pred, mask, mask_stride, id,
5907 &cpi->fn_ptr[bsize], 1);
5908 else
David Barkerc155e012017-05-11 13:54:54 +01005909 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
5910 second_pred, &cpi->fn_ptr[bsize], 1);
5911 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005912
Alex Converse0fa0f422017-04-24 12:51:14 -07005913 x->mv_limits = tmp_mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005914
RogerZhou3b635242017-09-19 10:06:46 -07005915#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -07005916 if (cpi->common.cur_frame_force_integer_mv) {
RogerZhou3b635242017-09-19 10:06:46 -07005917 x->best_mv.as_mv.row *= 8;
5918 x->best_mv.as_mv.col *= 8;
5919 }
RogerZhou10a03802017-10-26 11:49:48 -07005920 if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0)
RogerZhou3b635242017-09-19 10:06:46 -07005921#else
Cheng Chen1483a712017-10-08 13:07:02 -07005922 if (bestsme < INT_MAX)
RogerZhou3b635242017-09-19 10:06:46 -07005923#endif
Cheng Chen1483a712017-10-08 13:07:02 -07005924 {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005925 int dis; /* TODO: use dis in distortion calculation later. */
5926 unsigned int sse;
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07005927 bestsme = cpi->find_fractional_mv_step(
5928 x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
5929 x->errorperbit, &cpi->fn_ptr[bsize], 0,
5930 cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005931 &dis, &sse, second_pred, mask, mask_stride, id, pw, ph,
5932 cpi->sf.use_upsampled_references);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005933 }
5934
5935 // Restore the pointer to the first (possibly scaled) prediction buffer.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005936 if (id) xd->plane[plane].pre[0] = ref_yv12[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005937
5938 if (bestsme < last_besterr[id]) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005939 frame_mv[refs[id]].as_mv = *best_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005940 last_besterr[id] = bestsme;
5941 } else {
5942 break;
5943 }
5944 }
5945
5946 *rate_mv = 0;
5947
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005948 for (ref = 0; ref < 2; ++ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005949 if (scaled_ref_frame[ref]) {
5950 // Restore the prediction frame pointers to their unscaled versions.
5951 int i;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00005952 for (i = 0; i < num_planes; i++)
Yaowu Xuc27fc142016-08-22 16:08:15 -07005953 xd->plane[i].pre[ref] = backup_yv12[ref][i];
5954 }
Zoe Liu85b66462017-04-20 14:28:19 -07005955
Imdad Sardharwallac23ad632017-11-28 14:12:38 +00005956 av1_set_mvcost(
5957 x, refs[ref], ref,
5958 mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
Zoe Liu85b66462017-04-20 14:28:19 -07005959
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005960 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
5961 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
5962 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005963 }
5964}
5965
Zoe Liuc082bbc2017-05-17 13:31:37 -07005966static void estimate_ref_frame_costs(
Yue Chen170678a2017-10-17 13:43:10 -07005967 const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x,
5968 int segment_id, unsigned int *ref_costs_single,
Zoe Liuc082bbc2017-05-17 13:31:37 -07005969#if CONFIG_EXT_COMP_REFS
Hui Su9d0c03d2017-12-27 16:05:23 -08005970 unsigned int (*ref_costs_comp)[TOTAL_REFS_PER_FRAME]
Zoe Liuc082bbc2017-05-17 13:31:37 -07005971#else
Hui Su9d0c03d2017-12-27 16:05:23 -08005972 unsigned int *ref_costs_comp
Zoe Liuc082bbc2017-05-17 13:31:37 -07005973#endif // CONFIG_EXT_COMP_REFS
Hui Su9d0c03d2017-12-27 16:05:23 -08005974 ) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005975 int seg_ref_active =
5976 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
5977 if (seg_ref_active) {
5978 memset(ref_costs_single, 0,
5979 TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
Zoe Liuc082bbc2017-05-17 13:31:37 -07005980#if CONFIG_EXT_COMP_REFS
5981 int ref_frame;
5982 for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
5983 memset(ref_costs_comp[ref_frame], 0,
5984 TOTAL_REFS_PER_FRAME * sizeof((*ref_costs_comp)[0]));
5985#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07005986 memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
Zoe Liuc082bbc2017-05-17 13:31:37 -07005987#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07005988 } else {
Yue Chen170678a2017-10-17 13:43:10 -07005989 int intra_inter_ctx = av1_get_intra_inter_context(xd);
Yue Chen170678a2017-10-17 13:43:10 -07005990 ref_costs_single[INTRA_FRAME] = x->intra_inter_cost[intra_inter_ctx][0];
Zoe Liud4d8b862017-12-06 10:56:01 -08005991 unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1];
5992
Sebastien Alaiwan3558a8c2018-01-15 09:55:33 +01005993 ref_costs_single[LAST_FRAME] = ref_costs_single[LAST2_FRAME] =
5994 ref_costs_single[LAST3_FRAME] = ref_costs_single[BWDREF_FRAME] =
5995 ref_costs_single[ALTREF2_FRAME] = ref_costs_single[GOLDEN_FRAME] =
5996 ref_costs_single[ALTREF_FRAME] = base_cost;
5997 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
5998 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
5999 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
6000 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
6001 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
6002 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006003
Sebastien Alaiwan3558a8c2018-01-15 09:55:33 +01006004 ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p1][0][0];
6005 ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p1][0][0];
6006 ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p1][0][0];
6007 ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p1][0][0];
6008 ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p1][0][1];
6009 ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p1][0][1];
6010 ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p1][0][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006011
Sebastien Alaiwan3558a8c2018-01-15 09:55:33 +01006012 ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p3][2][0];
6013 ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p3][2][0];
6014 ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p3][2][1];
6015 ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p3][2][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006016
Sebastien Alaiwan3558a8c2018-01-15 09:55:33 +01006017 ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p2][1][0];
6018 ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p2][1][0];
6019 ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p2][1][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006020
Sebastien Alaiwan3558a8c2018-01-15 09:55:33 +01006021 ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p4][3][0];
6022 ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p4][3][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006023
Sebastien Alaiwan3558a8c2018-01-15 09:55:33 +01006024 ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p5][4][0];
6025 ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p5][4][1];
Zoe Liue9b15e22017-07-19 15:53:01 -07006026
Sebastien Alaiwan3558a8c2018-01-15 09:55:33 +01006027 ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p6][5][0];
6028 ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p6][5][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006029
6030 if (cm->reference_mode != SINGLE_REFERENCE) {
Hui Su0bdf5f52018-01-05 14:54:32 -08006031 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(cm, xd);
6032 const int bwdref_comp_ctx_p1 =
6033 av1_get_pred_context_comp_bwdref_p1(cm, xd);
6034 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(cm, xd);
6035 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(cm, xd);
6036 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(cm, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006037
Zoe Liuc082bbc2017-05-17 13:31:37 -07006038#if CONFIG_EXT_COMP_REFS
Hui Su6b3d1e32018-01-05 11:25:40 -08006039 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006040 unsigned int ref_bicomp_costs[TOTAL_REFS_PER_FRAME] = { 0 };
6041
6042 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
6043 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
Hui Su6b3d1e32018-01-05 11:25:40 -08006044 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][1];
Zoe Liu3ac20932017-08-30 16:35:55 -07006045 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
Zoe Liuac889702017-08-23 14:22:58 -07006046 ref_bicomp_costs[ALTREF_FRAME] = 0;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006047
Hui Su0bdf5f52018-01-05 14:54:32 -08006048 ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
6049 ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
6050 ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
6051 ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
Zoe Liuc082bbc2017-05-17 13:31:37 -07006052
Hui Su0bdf5f52018-01-05 14:54:32 -08006053 ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][0];
6054 ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][1];
Zoe Liuc082bbc2017-05-17 13:31:37 -07006055
Hui Su0bdf5f52018-01-05 14:54:32 -08006056 ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][0];
6057 ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][1];
Zoe Liuc082bbc2017-05-17 13:31:37 -07006058
Hui Su0bdf5f52018-01-05 14:54:32 -08006059 ref_bicomp_costs[BWDREF_FRAME] +=
6060 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
6061 ref_bicomp_costs[ALTREF2_FRAME] +=
6062 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
6063 ref_bicomp_costs[ALTREF_FRAME] +=
6064 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
Zoe Liuc082bbc2017-05-17 13:31:37 -07006065
Hui Su0bdf5f52018-01-05 14:54:32 -08006066 ref_bicomp_costs[BWDREF_FRAME] +=
6067 x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
6068 ref_bicomp_costs[ALTREF2_FRAME] +=
6069 x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
Zoe Liuac889702017-08-23 14:22:58 -07006070
6071 int ref0, ref1;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006072 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
Zoe Liuac889702017-08-23 14:22:58 -07006073 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
6074 ref_costs_comp[ref0][ref1] =
6075 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
6076 }
Zoe Liuc082bbc2017-05-17 13:31:37 -07006077 }
6078
Hui Sua7e3bfe2018-01-05 12:14:48 -08006079 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
6080 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
6081 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006082 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
Hui Su6b3d1e32018-01-05 11:25:40 -08006083 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
Hui Sua7e3bfe2018-01-05 12:14:48 -08006084 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
6085 x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
Zoe Liufcf5fa22017-06-26 16:00:38 -07006086 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
Hui Su6b3d1e32018-01-05 11:25:40 -08006087 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
Hui Sua7e3bfe2018-01-05 12:14:48 -08006088 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
6089 x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
6090 x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
Zoe Liuc082bbc2017-05-17 13:31:37 -07006091 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
Hui Su6b3d1e32018-01-05 11:25:40 -08006092 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
Hui Sua7e3bfe2018-01-05 12:14:48 -08006093 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
6094 x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
6095 x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
Zoe Liuc082bbc2017-05-17 13:31:37 -07006096 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
Hui Su6b3d1e32018-01-05 11:25:40 -08006097 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
Hui Sua7e3bfe2018-01-05 12:14:48 -08006098 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006099#else // !CONFIG_EXT_COMP_REFS
Zoe Liuc082bbc2017-05-17 13:31:37 -07006100
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006101 ref_costs_comp[LAST_FRAME] = ref_costs_comp[LAST2_FRAME] =
6102 ref_costs_comp[LAST3_FRAME] = ref_costs_comp[GOLDEN_FRAME] =
6103 base_cost;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006104
Zoe Liu3ac20932017-08-30 16:35:55 -07006105 ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF2_FRAME] =
6106 ref_costs_comp[ALTREF_FRAME] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006107
Hui Su0bdf5f52018-01-05 14:54:32 -08006108 ref_costs_comp[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
6109 ref_costs_comp[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
6110 ref_costs_comp[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
6111 ref_costs_comp[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006112
Hui Su0bdf5f52018-01-05 14:54:32 -08006113 ref_costs_comp[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][0];
6114 ref_costs_comp[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006115
Hui Su0bdf5f52018-01-05 14:54:32 -08006116 ref_costs_comp[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][0];
6117 ref_costs_comp[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006118
Zoe Liufcf5fa22017-06-26 16:00:38 -07006119 // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
6120 // more bit.
Hui Su0bdf5f52018-01-05 14:54:32 -08006121 ref_costs_comp[BWDREF_FRAME] +=
6122 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
6123 ref_costs_comp[ALTREF2_FRAME] +=
6124 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
6125 ref_costs_comp[ALTREF_FRAME] +=
6126 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
Zoe Liue9b15e22017-07-19 15:53:01 -07006127
Hui Su0bdf5f52018-01-05 14:54:32 -08006128 ref_costs_comp[BWDREF_FRAME] +=
6129 x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
6130 ref_costs_comp[ALTREF2_FRAME] +=
6131 x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
Zoe Liuc082bbc2017-05-17 13:31:37 -07006132#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07006133 } else {
Zoe Liuc082bbc2017-05-17 13:31:37 -07006134#if CONFIG_EXT_COMP_REFS
Zoe Liuac889702017-08-23 14:22:58 -07006135 int ref0, ref1;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006136 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
Zoe Liuac889702017-08-23 14:22:58 -07006137 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
6138 ref_costs_comp[ref0][ref1] = 512;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006139 }
6140 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
Zoe Liufcf5fa22017-06-26 16:00:38 -07006141 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006142 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
6143 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006144#else // !CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07006145 ref_costs_comp[LAST_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006146 ref_costs_comp[LAST2_FRAME] = 512;
6147 ref_costs_comp[LAST3_FRAME] = 512;
6148 ref_costs_comp[BWDREF_FRAME] = 512;
Zoe Liue9b15e22017-07-19 15:53:01 -07006149 ref_costs_comp[ALTREF2_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006150 ref_costs_comp[ALTREF_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006151 ref_costs_comp[GOLDEN_FRAME] = 512;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006152#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07006153 }
6154 }
6155}
6156
6157static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
6158 int mode_index,
6159 int64_t comp_pred_diff[REFERENCE_MODES],
6160 int skippable) {
6161 MACROBLOCKD *const xd = &x->e_mbd;
6162
6163 // Take a snapshot of the coding context so it can be
6164 // restored if we decide to encode this way
6165 ctx->skip = x->skip;
6166 ctx->skippable = skippable;
6167 ctx->best_mode_index = mode_index;
6168 ctx->mic = *xd->mi[0];
6169 ctx->mbmi_ext = *x->mbmi_ext;
6170 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
6171 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
6172 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
6173}
6174
clang-format55ce9e02017-02-15 22:27:12 -08006175static void setup_buffer_inter(
6176 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
6177 BLOCK_SIZE block_size, int mi_row, int mi_col,
6178 int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
6179 int_mv frame_near_mv[TOTAL_REFS_PER_FRAME],
6180 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006181 const AV1_COMMON *cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006182 const int num_planes = av1_num_planes(cm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006183 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
6184 MACROBLOCKD *const xd = &x->e_mbd;
6185 MODE_INFO *const mi = xd->mi[0];
6186 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
6187 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
6188 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
6189
6190 assert(yv12 != NULL);
6191
6192 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
6193 // use the UV scaling factors.
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006194 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf,
6195 num_planes);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006196
6197 // Gets an initial list of candidate vectors from neighbours and orders them
Sebastien Alaiwane140c502017-04-27 09:52:34 +02006198 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
6199 mbmi_ext->ref_mv_stack[ref_frame],
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006200 mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
6201 NULL, NULL, mbmi_ext->mode_context);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006202
RogerZhou3b635242017-09-19 10:06:46 -07006203// Candidate refinement carried out at encoder and decoder
6204#if CONFIG_AMVR
6205 av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
6206 &frame_nearest_mv[ref_frame], &frame_near_mv[ref_frame],
RogerZhou10a03802017-10-26 11:49:48 -07006207 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07006208#else
Yaowu Xuf883b422016-08-30 14:01:10 -07006209 av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
6210 &frame_nearest_mv[ref_frame],
6211 &frame_near_mv[ref_frame]);
RogerZhou3b635242017-09-19 10:06:46 -07006212#endif
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07006213 // Further refinement that is encode side only to test the top few candidates
6214 // in full and choose the best as the centre point for subsequent searches.
6215 // The current implementation doesn't support scaling.
Jingning Han271bb2c2016-12-14 12:34:46 -08006216 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
6217 block_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006218}
6219
Urvang Joshi52648442016-10-13 17:27:51 -07006220static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
6221 BLOCK_SIZE bsize, int mi_row, int mi_col,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006222 int ref_idx, int *rate_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006223 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xuf883b422016-08-30 14:01:10 -07006224 const AV1_COMMON *cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006225 const int num_planes = av1_num_planes(cm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006226 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6227 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
6228 int bestsme = INT_MAX;
6229 int step_param;
6230 int sadpb = x->sadperbit16;
6231 MV mvp_full;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006232 int ref = mbmi->ref_frame[ref_idx];
Zoe Liu7f24e1b2017-03-17 17:42:05 -07006233 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006234
Alex Converse0fa0f422017-04-24 12:51:14 -07006235 MvLimits tmp_mv_limits = x->mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006236 int cost_list[5];
6237
6238 const YV12_BUFFER_CONFIG *scaled_ref_frame =
Yaowu Xuf883b422016-08-30 14:01:10 -07006239 av1_get_scaled_ref_frame(cpi, ref);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006240
6241 MV pred_mv[3];
6242 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
6243 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
6244 pred_mv[2] = x->pred_mv[ref];
6245
Yaowu Xuc27fc142016-08-22 16:08:15 -07006246 if (scaled_ref_frame) {
6247 int i;
6248 // Swap out the reference frame for a version that's been scaled to
6249 // match the resolution of the current frame, allowing the existing
6250 // motion search code to be used without additional modifications.
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006251 for (i = 0; i < num_planes; i++) backup_yv12[i] = xd->plane[i].pre[ref_idx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006252
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006253 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
6254 num_planes);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006255 }
6256
Imdad Sardharwallac23ad632017-11-28 14:12:38 +00006257 av1_set_mvcost(
6258 x, ref, ref_idx,
6259 mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
Yaowu Xu4306b6e2016-09-27 12:55:32 -07006260
Yaowu Xuc27fc142016-08-22 16:08:15 -07006261 // Work out the size of the first step in the mv step search.
Yaowu Xuf883b422016-08-30 14:01:10 -07006262 // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
Yaowu Xuc27fc142016-08-22 16:08:15 -07006263 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
James Zern89a015b2017-08-08 12:39:00 -04006264 // Take the weighted average of the step_params based on the last frame's
Yaowu Xuc27fc142016-08-22 16:08:15 -07006265 // max mv magnitude and that based on the best ref mvs of the current
6266 // block for the given reference.
6267 step_param =
Yaowu Xuf883b422016-08-30 14:01:10 -07006268 (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
Yaowu Xuc27fc142016-08-22 16:08:15 -07006269 2;
6270 } else {
6271 step_param = cpi->mv_step_param;
6272 }
6273
6274 if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
6275 int boffset =
6276 2 * (b_width_log2_lookup[cm->sb_size] -
Yaowu Xuf883b422016-08-30 14:01:10 -07006277 AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
6278 step_param = AOMMAX(step_param, boffset);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006279 }
6280
6281 if (cpi->sf.adaptive_motion_search) {
6282 int bwl = b_width_log2_lookup[bsize];
6283 int bhl = b_height_log2_lookup[bsize];
6284 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
6285
Debargha Mukherjee27be8742017-10-07 23:51:10 -07006286 if (tlevel < 5) {
6287 step_param += 2;
6288 step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 1);
6289 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006290
6291 // prev_mv_sad is not setup for dynamically scaled frames.
Debargha Mukherjee7166f222017-09-05 21:32:42 -07006292 if (cpi->oxcf.resize_mode != RESIZE_RANDOM) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006293 int i;
6294 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
6295 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
6296 x->pred_mv[ref].row = 0;
6297 x->pred_mv[ref].col = 0;
6298 x->best_mv.as_int = INVALID_MV;
6299
6300 if (scaled_ref_frame) {
Urvang Joshi454280d2016-10-14 16:51:44 -07006301 int j;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006302 for (j = 0; j < num_planes; ++j)
Urvang Joshi454280d2016-10-14 16:51:44 -07006303 xd->plane[j].pre[ref_idx] = backup_yv12[j];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006304 }
6305 return;
6306 }
6307 }
6308 }
6309 }
6310
Yunqing Wang5f4f7382018-01-09 10:33:09 -08006311 // Note: MV limits are modified here. Always restore the original values
6312 // after full-pixel motion search.
Alex Converse0fa0f422017-04-24 12:51:14 -07006313 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006314
Yue Chene9638cc2016-10-10 12:37:54 -07006315 if (mbmi->motion_mode != SIMPLE_TRANSLATION)
6316 mvp_full = mbmi->mv[0].as_mv;
6317 else
Yue Chene9638cc2016-10-10 12:37:54 -07006318 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006319
6320 mvp_full.col >>= 3;
6321 mvp_full.row >>= 3;
6322
6323 x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
6324
Yue Chene9638cc2016-10-10 12:37:54 -07006325 switch (mbmi->motion_mode) {
6326 case SIMPLE_TRANSLATION:
RogerZhoucc5d35d2017-08-07 22:20:15 -07006327#if CONFIG_HASH_ME
Yue Chene9638cc2016-10-10 12:37:54 -07006328 bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
6329 sadpb, cond_cost_list(cpi, cost_list),
RogerZhoucc5d35d2017-08-07 22:20:15 -07006330 &ref_mv, INT_MAX, 1, (MI_SIZE * mi_col),
RogerZhoud15e7c12017-09-26 08:49:28 -07006331 (MI_SIZE * mi_row), 0);
RogerZhoucc5d35d2017-08-07 22:20:15 -07006332#else
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01006333 bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
6334 sadpb, cond_cost_list(cpi, cost_list),
6335 &ref_mv, INT_MAX, 1);
RogerZhoucc5d35d2017-08-07 22:20:15 -07006336#endif
Yue Chene9638cc2016-10-10 12:37:54 -07006337 break;
6338 case OBMC_CAUSAL:
6339 bestsme = av1_obmc_full_pixel_diamond(
6340 cpi, x, &mvp_full, step_param, sadpb,
6341 MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
6342 &(x->best_mv.as_mv), 0);
6343 break;
James Zern88896732017-06-23 15:55:09 -07006344 default: assert(0 && "Invalid motion mode!\n");
Yue Chene9638cc2016-10-10 12:37:54 -07006345 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006346
Alex Converse0fa0f422017-04-24 12:51:14 -07006347 x->mv_limits = tmp_mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006348
RogerZhou3b635242017-09-19 10:06:46 -07006349#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -07006350 if (cpi->common.cur_frame_force_integer_mv) {
RogerZhou3b635242017-09-19 10:06:46 -07006351 x->best_mv.as_mv.row *= 8;
6352 x->best_mv.as_mv.col *= 8;
6353 }
Rupert Swarbrickd16d8ee2017-12-12 11:55:39 +00006354 const int use_fractional_mv =
6355 bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0;
RogerZhou3b635242017-09-19 10:06:46 -07006356#else
Rupert Swarbrickd16d8ee2017-12-12 11:55:39 +00006357 const int use_fractional_mv = bestsme < INT_MAX;
RogerZhou3b635242017-09-19 10:06:46 -07006358#endif
Rupert Swarbrickd16d8ee2017-12-12 11:55:39 +00006359 if (use_fractional_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006360 int dis; /* TODO: use dis in distortion calculation later. */
Yue Chene9638cc2016-10-10 12:37:54 -07006361 switch (mbmi->motion_mode) {
6362 case SIMPLE_TRANSLATION:
Yue Chene9638cc2016-10-10 12:37:54 -07006363 if (cpi->sf.use_upsampled_references) {
6364 int best_mv_var;
6365 const int try_second = x->second_best_mv.as_int != INVALID_MV &&
6366 x->second_best_mv.as_int != x->best_mv.as_int;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006367 const int pw = block_size_wide[bsize];
6368 const int ph = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006369
Yue Chene9638cc2016-10-10 12:37:54 -07006370 best_mv_var = cpi->find_fractional_mv_step(
Yaowu Xuc27fc142016-08-22 16:08:15 -07006371 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
6372 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6373 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006374 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
6375 0, 0, pw, ph, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006376
Yue Chene9638cc2016-10-10 12:37:54 -07006377 if (try_second) {
Alex Converse0fa0f422017-04-24 12:51:14 -07006378 const int minc =
6379 AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
6380 const int maxc =
6381 AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
6382 const int minr =
6383 AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
6384 const int maxr =
6385 AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
Yue Chene9638cc2016-10-10 12:37:54 -07006386 int this_var;
6387 MV best_mv = x->best_mv.as_mv;
6388
6389 x->best_mv = x->second_best_mv;
6390 if (x->best_mv.as_mv.row * 8 <= maxr &&
6391 x->best_mv.as_mv.row * 8 >= minr &&
6392 x->best_mv.as_mv.col * 8 <= maxc &&
6393 x->best_mv.as_mv.col * 8 >= minc) {
6394 this_var = cpi->find_fractional_mv_step(
6395 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
6396 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6397 cpi->sf.mv.subpel_iters_per_step,
6398 cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006399 &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1);
Yue Chene9638cc2016-10-10 12:37:54 -07006400 if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
6401 x->best_mv.as_mv = best_mv;
6402 }
6403 }
Yue Chene9638cc2016-10-10 12:37:54 -07006404 } else {
6405 cpi->find_fractional_mv_step(
6406 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
6407 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6408 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006409 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
6410 0, 0, 0, 0, 0);
Yue Chene9638cc2016-10-10 12:37:54 -07006411 }
Yue Chene9638cc2016-10-10 12:37:54 -07006412 break;
6413 case OBMC_CAUSAL:
6414 av1_find_best_obmc_sub_pixel_tree_up(
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006415 x, &x->best_mv.as_mv, &ref_mv, cm->allow_high_precision_mv,
6416 x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6417 cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis,
6418 &x->pred_sse[ref], 0, cpi->sf.use_upsampled_references);
Yue Chene9638cc2016-10-10 12:37:54 -07006419 break;
James Zern88896732017-06-23 15:55:09 -07006420 default: assert(0 && "Invalid motion mode!\n");
Yaowu Xuc27fc142016-08-22 16:08:15 -07006421 }
6422 }
Yaowu Xuf883b422016-08-30 14:01:10 -07006423 *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
6424 x->mvcost, MV_COST_WEIGHT);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006425
Yue Chene9638cc2016-10-10 12:37:54 -07006426 if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
Yue Chene9638cc2016-10-10 12:37:54 -07006427 x->pred_mv[ref] = x->best_mv.as_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006428
6429 if (scaled_ref_frame) {
6430 int i;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006431 for (i = 0; i < num_planes; i++) xd->plane[i].pre[ref_idx] = backup_yv12[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006432 }
6433}
6434
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006435static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst,
6436 const int num_planes) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006437 int i;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006438 for (i = 0; i < num_planes; i++) {
David Barkerac37fa32016-12-02 12:30:21 +00006439 xd->plane[i].dst.buf = dst.plane[i];
6440 xd->plane[i].dst.stride = dst.stride[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006441 }
6442}
6443
David Barker8dd9b572017-05-12 16:31:38 +01006444static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
David Barkerf19f35f2017-05-22 16:33:22 +01006445 BLOCK_SIZE bsize, const MV *other_mv,
David Barker8dd9b572017-05-12 16:31:38 +01006446 int mi_row, int mi_col, const int block,
6447 int ref_idx, uint8_t *second_pred) {
6448 const AV1_COMMON *const cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006449 const int num_planes = av1_num_planes(cm);
David Barker8dd9b572017-05-12 16:31:38 +01006450 const int pw = block_size_wide[bsize];
6451 const int ph = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006452 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006453 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
David Barker8dd9b572017-05-12 16:31:38 +01006454 const int other_ref = mbmi->ref_frame[!ref_idx];
David Barker8dd9b572017-05-12 16:31:38 +01006455 struct scale_factors sf;
David Barker8dd9b572017-05-12 16:31:38 +01006456 struct macroblockd_plane *const pd = &xd->plane[0];
James Zern89a015b2017-08-08 12:39:00 -04006457 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
David Barker8dd9b572017-05-12 16:31:38 +01006458 const int ic = block & 1;
6459 const int ir = (block - ic) >> 1;
6460 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
6461 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
Luc Trudeauf3bf8b12017-12-08 14:38:41 -05006462 const WarpedMotionParams *const wm = &xd->global_motion[other_ref];
Luc Trudeau2eb9b842017-12-13 11:19:16 -05006463 int is_global = is_global_mv_block(xd->mi[0], wm->wmtype);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006464
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006465 // This function should only ever be called for compound modes
David Barker8dd9b572017-05-12 16:31:38 +01006466 assert(has_second_ref(mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07006467
David Barker8dd9b572017-05-12 16:31:38 +01006468 struct buf_2d backup_yv12[MAX_MB_PLANE];
6469 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
6470 av1_get_scaled_ref_frame(cpi, other_ref);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006471
6472 if (scaled_ref_frame) {
David Barker8dd9b572017-05-12 16:31:38 +01006473 int i;
6474 // Swap out the reference frame for a version that's been scaled to
6475 // match the resolution of the current frame, allowing the existing
6476 // motion search code to be used without additional modifications.
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006477 for (i = 0; i < num_planes; i++)
David Barker8dd9b572017-05-12 16:31:38 +01006478 backup_yv12[i] = xd->plane[i].pre[!ref_idx];
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006479 av1_setup_pre_planes(xd, !ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
6480 num_planes);
David Barker8dd9b572017-05-12 16:31:38 +01006481 }
6482
Yaowu Xud3e7c682017-12-21 14:08:25 -08006483 // Since we have scaled the reference frames to match the size of the current
6484 // frame we must use a unit scaling factor during mode selection.
David Barker8dd9b572017-05-12 16:31:38 +01006485 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
6486 cm->height, cm->use_highbitdepth);
David Barker8dd9b572017-05-12 16:31:38 +01006487
6488 struct buf_2d ref_yv12;
6489
6490 const int plane = 0;
David Barkere64d51a2017-06-09 14:52:42 +01006491 ConvolveParams conv_params = get_conv_params(!ref_idx, 0, plane);
David Barker8dd9b572017-05-12 16:31:38 +01006492 WarpTypesAllowed warp_types;
David Barker8dd9b572017-05-12 16:31:38 +01006493 warp_types.global_warp_allowed = is_global;
David Barker8dd9b572017-05-12 16:31:38 +01006494 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
David Barker8dd9b572017-05-12 16:31:38 +01006495
6496 // Initialized here because of compiler problem in Visual Studio.
6497 ref_yv12 = xd->plane[plane].pre[!ref_idx];
6498
Yaowu Xud3e7c682017-12-21 14:08:25 -08006499 // Get the prediction block from the 'other' reference frame.
David Barker8dd9b572017-05-12 16:31:38 +01006500 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6501 av1_highbd_build_inter_predictor(
David Barkerf19f35f2017-05-22 16:33:22 +01006502 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
Sebastien Alaiwan48795802017-10-30 12:07:13 +01006503 0, mbmi->interp_filters, &warp_types, p_col, p_row, plane,
6504 MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
David Barker8dd9b572017-05-12 16:31:38 +01006505 } else {
David Barker8dd9b572017-05-12 16:31:38 +01006506 av1_build_inter_predictor(
David Barkerf19f35f2017-05-22 16:33:22 +01006507 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
Sebastien Alaiwan48795802017-10-30 12:07:13 +01006508 &conv_params, mbmi->interp_filters, &warp_types, p_col, p_row, plane,
6509 !ref_idx, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
David Barker8dd9b572017-05-12 16:31:38 +01006510 }
David Barker8dd9b572017-05-12 16:31:38 +01006511
Cheng Chenefc55fd2017-10-10 12:08:28 -07006512#if CONFIG_JNT_COMP
Cheng Chenf78632e2017-10-20 15:30:51 -07006513 av1_jnt_comp_weight_assign(cm, mbmi, 0, &xd->jcp_param.fwd_offset,
Cheng Chen8263f802017-11-14 15:50:00 -08006514 &xd->jcp_param.bck_offset,
6515 &xd->jcp_param.use_jnt_comp_avg, 1);
Cheng Chenefc55fd2017-10-10 12:08:28 -07006516#endif // CONFIG_JNT_COMP
6517
David Barker8dd9b572017-05-12 16:31:38 +01006518 if (scaled_ref_frame) {
6519 // Restore the prediction frame pointers to their unscaled versions.
6520 int i;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006521 for (i = 0; i < num_planes; i++)
David Barker8dd9b572017-05-12 16:31:38 +01006522 xd->plane[i].pre[!ref_idx] = backup_yv12[i];
6523 }
6524}
6525
6526// Search for the best mv for one component of a compound,
6527// given that the other component is fixed.
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006528static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
6529 BLOCK_SIZE bsize, MV *this_mv,
6530 int mi_row, int mi_col,
6531 const uint8_t *second_pred,
6532 const uint8_t *mask, int mask_stride,
6533 int *rate_mv, int ref_idx) {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006534 const AV1_COMMON *const cm = &cpi->common;
6535 const int num_planes = av1_num_planes(cm);
David Barker8dd9b572017-05-12 16:31:38 +01006536 const int pw = block_size_wide[bsize];
6537 const int ph = block_size_high[bsize];
6538 MACROBLOCKD *xd = &x->e_mbd;
6539 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6540 const int ref = mbmi->ref_frame[ref_idx];
David Barkerf19f35f2017-05-22 16:33:22 +01006541 int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
David Barker8dd9b572017-05-12 16:31:38 +01006542 struct macroblockd_plane *const pd = &xd->plane[0];
6543
6544 struct buf_2d backup_yv12[MAX_MB_PLANE];
David Barker8dd9b572017-05-12 16:31:38 +01006545 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
6546 av1_get_scaled_ref_frame(cpi, ref);
6547
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006548 // Check that this is either an interinter or an interintra block
Ryan Lei1d1df182017-06-15 11:38:59 -07006549 assert(has_second_ref(mbmi) ||
6550 (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
David Barker8dd9b572017-05-12 16:31:38 +01006551
David Barker8dd9b572017-05-12 16:31:38 +01006552 if (scaled_ref_frame) {
6553 int i;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006554 // Swap out the reference frame for a version that's been scaled to
6555 // match the resolution of the current frame, allowing the existing
6556 // motion search code to be used without additional modifications.
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006557 for (i = 0; i < num_planes; i++) backup_yv12[i] = xd->plane[i].pre[ref_idx];
6558 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL,
6559 num_planes);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006560 }
6561
David Barker8dd9b572017-05-12 16:31:38 +01006562 struct buf_2d orig_yv12;
6563 int bestsme = INT_MAX;
6564 int sadpb = x->sadperbit16;
6565 MV *const best_mv = &x->best_mv.as_mv;
6566 int search_range = 3;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006567
David Barker8dd9b572017-05-12 16:31:38 +01006568 MvLimits tmp_mv_limits = x->mv_limits;
David Barker8dd9b572017-05-12 16:31:38 +01006569
6570 // Initialized here because of compiler problem in Visual Studio.
6571 if (ref_idx) {
David Barkerf19f35f2017-05-22 16:33:22 +01006572 orig_yv12 = pd->pre[0];
6573 pd->pre[0] = pd->pre[ref_idx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006574 }
6575
David Barker8dd9b572017-05-12 16:31:38 +01006576 // Do compound motion search on the current reference frame.
6577 av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
6578
6579 // Use the mv result from the single mode as mv predictor.
David Barkerf19f35f2017-05-22 16:33:22 +01006580 *best_mv = *this_mv;
David Barker8dd9b572017-05-12 16:31:38 +01006581
6582 best_mv->col >>= 3;
6583 best_mv->row >>= 3;
6584
Imdad Sardharwallac23ad632017-11-28 14:12:38 +00006585 av1_set_mvcost(
6586 x, ref, ref_idx,
6587 mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
David Barker8dd9b572017-05-12 16:31:38 +01006588
6589 // Small-range full-pixel motion search.
6590 bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
6591 &cpi->fn_ptr[bsize], mask, mask_stride,
6592 ref_idx, &ref_mv.as_mv, second_pred);
6593 if (bestsme < INT_MAX) {
6594 if (mask)
6595 bestsme =
6596 av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
6597 mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
6598 else
6599 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
6600 &cpi->fn_ptr[bsize], 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006601 }
6602
Alex Converse0fa0f422017-04-24 12:51:14 -07006603 x->mv_limits = tmp_mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006604
RogerZhou3b635242017-09-19 10:06:46 -07006605#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -07006606 if (cpi->common.cur_frame_force_integer_mv) {
RogerZhou3b635242017-09-19 10:06:46 -07006607 x->best_mv.as_mv.row *= 8;
6608 x->best_mv.as_mv.col *= 8;
6609 }
Rupert Swarbrickd16d8ee2017-12-12 11:55:39 +00006610 const int use_fractional_mv =
6611 bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0;
RogerZhou3b635242017-09-19 10:06:46 -07006612#else
Rupert Swarbrickd16d8ee2017-12-12 11:55:39 +00006613 const int use_fractional_mv = bestsme < INT_MAX;
RogerZhou3b635242017-09-19 10:06:46 -07006614#endif
Rupert Swarbrickd16d8ee2017-12-12 11:55:39 +00006615 if (use_fractional_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006616 int dis; /* TODO: use dis in distortion calculation later. */
David Barker8dd9b572017-05-12 16:31:38 +01006617 unsigned int sse;
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006618 bestsme = cpi->find_fractional_mv_step(
6619 x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
6620 &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
6621 x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
6622 ref_idx, pw, ph, cpi->sf.use_upsampled_references);
David Barker8dd9b572017-05-12 16:31:38 +01006623 }
6624
6625 // Restore the pointer to the first (possibly scaled) prediction buffer.
David Barkerf19f35f2017-05-22 16:33:22 +01006626 if (ref_idx) pd->pre[0] = orig_yv12;
David Barker8dd9b572017-05-12 16:31:38 +01006627
Yue Chenf03907a2017-05-31 12:04:04 -07006628 if (bestsme < INT_MAX) *this_mv = *best_mv;
David Barker8dd9b572017-05-12 16:31:38 +01006629
6630 *rate_mv = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006631
6632 if (scaled_ref_frame) {
David Barker8dd9b572017-05-12 16:31:38 +01006633 // Restore the prediction frame pointers to their unscaled versions.
6634 int i;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00006635 for (i = 0; i < num_planes; i++) xd->plane[i].pre[ref_idx] = backup_yv12[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006636 }
David Barker8dd9b572017-05-12 16:31:38 +01006637
Imdad Sardharwallac23ad632017-11-28 14:12:38 +00006638 av1_set_mvcost(
6639 x, ref, ref_idx,
6640 mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0));
David Barkerf19f35f2017-05-22 16:33:22 +01006641 *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
6642 x->mvcost, MV_COST_WEIGHT);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006643}
6644
David Barker8dd9b572017-05-12 16:31:38 +01006645// Wrapper for compound_single_motion_search, for the common case
6646// where the second prediction is also an inter mode.
6647static void compound_single_motion_search_interinter(
6648 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
David Barkerf19f35f2017-05-22 16:33:22 +01006649 int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
6650 const int block, int ref_idx) {
6651 MACROBLOCKD *xd = &x->e_mbd;
6652 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6653
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006654 // This function should only ever be called for compound modes
David Barkerf19f35f2017-05-22 16:33:22 +01006655 assert(has_second_ref(mbmi));
David Barker8dd9b572017-05-12 16:31:38 +01006656
Yaowu Xud3e7c682017-12-21 14:08:25 -08006657 // Prediction buffer from second frame.
David Barker8dd9b572017-05-12 16:31:38 +01006658 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
6659 uint8_t *second_pred;
6660 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
6661 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
6662 else
6663 second_pred = (uint8_t *)second_pred_alloc_16;
David Barker8dd9b572017-05-12 16:31:38 +01006664
David Barkerf19f35f2017-05-22 16:33:22 +01006665 MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
6666 const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
6667
6668 build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
David Barker8dd9b572017-05-12 16:31:38 +01006669 ref_idx, second_pred);
6670
David Barkerf19f35f2017-05-22 16:33:22 +01006671 compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006672 second_pred, mask, mask_stride, rate_mv,
David Barkerf19f35f2017-05-22 16:33:22 +01006673 ref_idx);
David Barker8dd9b572017-05-12 16:31:38 +01006674}
6675
Sarah Parker6fdc8532016-11-16 17:47:13 -08006676static void do_masked_motion_search_indexed(
David Barkerc155e012017-05-11 13:54:54 +01006677 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
Sarah Parkerb9f757c2017-01-06 17:12:24 -08006678 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
Zoe Liu7f24e1b2017-03-17 17:42:05 -07006679 int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006680 // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
6681 MACROBLOCKD *xd = &x->e_mbd;
6682 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6683 BLOCK_SIZE sb_type = mbmi->sb_type;
6684 const uint8_t *mask;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006685 const int mask_stride = block_size_wide[bsize];
Sarah Parker569edda2016-12-14 14:57:38 -08006686
Sarah Parkerb9f757c2017-01-06 17:12:24 -08006687 mask = av1_get_compound_type_mask(comp_data, sb_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006688
David Barker8dd9b572017-05-12 16:31:38 +01006689 int_mv frame_mv[TOTAL_REFS_PER_FRAME];
6690 MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
Yaowu Xuc27fc142016-08-22 16:08:15 -07006691
David Barker8dd9b572017-05-12 16:31:38 +01006692 frame_mv[rf[0]].as_int = cur_mv[0].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006693 frame_mv[rf[1]].as_int = cur_mv[1].as_int;
David Barkerf19f35f2017-05-22 16:33:22 +01006694 if (which == 0 || which == 1) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006695 compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row,
6696 mi_col, mask, mask_stride, rate_mv,
6697 0, which);
David Barkerf19f35f2017-05-22 16:33:22 +01006698 } else if (which == 2) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006699 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask,
6700 mask_stride, rate_mv, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006701 }
David Barker8dd9b572017-05-12 16:31:38 +01006702 tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006703 tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006704}
Yaowu Xuc27fc142016-08-22 16:08:15 -07006705
James Zern89a015b2017-08-08 12:39:00 -04006706// In some situations we want to discount the apparent cost of a new motion
Yaowu Xuc27fc142016-08-22 16:08:15 -07006707// vector. Where there is a subtle motion field and especially where there is
6708// low spatial complexity then it can be hard to cover the cost of a new motion
6709// vector in a single block, even if that motion vector reduces distortion.
6710// However, once established that vector may be usable through the nearest and
6711// near mv modes to reduce distortion in subsequent blocks and also improve
6712// visual quality.
Urvang Joshi52648442016-10-13 17:27:51 -07006713static int discount_newmv_test(const AV1_COMP *const cpi, int this_mode,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006714 int_mv this_mv,
6715 int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
6716 int ref_frame) {
6717 return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
6718 (this_mv.as_int != 0) &&
6719 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
6720 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
6721 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
6722 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
6723}
6724
Yaowu Xu671f2bd2016-09-30 15:07:57 -07006725#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
6726#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
Yaowu Xuc27fc142016-08-22 16:08:15 -07006727
6728// TODO(jingning): this mv clamping function should be block size dependent.
6729static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
6730 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
6731 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
6732 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
6733 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
6734}
6735
Yaowu Xuf883b422016-08-30 14:01:10 -07006736static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006737 const BLOCK_SIZE bsize, const uint8_t *pred0,
6738 int stride0, const uint8_t *pred1, int stride1) {
6739 const struct macroblock_plane *const p = &x->plane[0];
6740 const uint8_t *src = p->src.buf;
6741 int src_stride = p->src.stride;
6742 const int f_index = bsize - BLOCK_8X8;
Jingning Han61418bb2017-01-23 17:12:48 -08006743 const int bw = block_size_wide[bsize];
6744 const int bh = block_size_high[bsize];
Yue Chenf03907a2017-05-31 12:04:04 -07006745 uint32_t esq[2][4];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006746 int64_t tl, br;
6747
Yaowu Xuc27fc142016-08-22 16:08:15 -07006748 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6749 pred0 = CONVERT_TO_BYTEPTR(pred0);
6750 pred1 = CONVERT_TO_BYTEPTR(pred1);
6751 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006752
Yue Chenf03907a2017-05-31 12:04:04 -07006753 cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
6754 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
6755 &esq[0][1]);
6756 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6757 pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
6758 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6759 pred0 + bh / 2 * stride0 + bw / 2, stride0,
6760 &esq[0][3]);
6761 cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
6762 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
6763 &esq[1][1]);
6764 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6765 pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
6766 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6767 pred1 + bh / 2 * stride1 + bw / 2, stride0,
6768 &esq[1][3]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006769
6770 tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
6771 (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
6772 br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
6773 (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
6774 return (tl + br > 0);
6775}
Yaowu Xuc27fc142016-08-22 16:08:15 -07006776
6777#if !CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -07006778static InterpFilter predict_interp_filter(
Yaowu Xuf883b422016-08-30 14:01:10 -07006779 const AV1_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006780 const int mi_row, const int mi_col,
James Zern7b9407a2016-05-18 23:48:05 -07006781 InterpFilter (*single_filter)[TOTAL_REFS_PER_FRAME]) {
6782 InterpFilter best_filter = SWITCHABLE;
Yaowu Xuf883b422016-08-30 14:01:10 -07006783 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006784 const MACROBLOCKD *xd = &x->e_mbd;
6785 int bsl = mi_width_log2_lookup[bsize];
6786 int pred_filter_search =
6787 cpi->sf.cb_pred_filter_search
6788 ? (((mi_row + mi_col) >> bsl) +
6789 get_chessboard_index(cm->current_video_frame)) &
6790 0x1
6791 : 0;
6792 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6793 const int is_comp_pred = has_second_ref(mbmi);
6794 const int this_mode = mbmi->mode;
6795 int refs[2] = { mbmi->ref_frame[0],
6796 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
Yaowu Xuc27fc142016-08-22 16:08:15 -07006797 if (pred_filter_search) {
James Zern7b9407a2016-05-18 23:48:05 -07006798 InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01006799 if (xd->up_available)
6800 af = av1_extract_interp_filter(
6801 xd->mi[-xd->mi_stride]->mbmi.interp_filters, 0);
6802 if (xd->left_available)
6803 lf = av1_extract_interp_filter(xd->mi[-1]->mbmi.interp_filters, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006804
Zoe Liu7f24e1b2017-03-17 17:42:05 -07006805 if ((this_mode != NEWMV && this_mode != NEW_NEWMV) || (af == lf))
Yaowu Xuc27fc142016-08-22 16:08:15 -07006806 best_filter = af;
6807 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006808 if (is_comp_pred) {
6809 if (cpi->sf.adaptive_mode_search) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006810 switch (this_mode) {
6811 case NEAREST_NEARESTMV:
6812 if (single_filter[NEARESTMV][refs[0]] ==
6813 single_filter[NEARESTMV][refs[1]])
6814 best_filter = single_filter[NEARESTMV][refs[0]];
6815 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006816 case NEAR_NEARMV:
6817 if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]])
6818 best_filter = single_filter[NEARMV][refs[0]];
6819 break;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07006820 case GLOBAL_GLOBALMV:
6821 if (single_filter[GLOBALMV][refs[0]] ==
6822 single_filter[GLOBALMV][refs[1]])
6823 best_filter = single_filter[GLOBALMV][refs[0]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006824 break;
6825 case NEW_NEWMV:
6826 if (single_filter[NEWMV][refs[0]] == single_filter[NEWMV][refs[1]])
6827 best_filter = single_filter[NEWMV][refs[0]];
6828 break;
6829 case NEAREST_NEWMV:
6830 if (single_filter[NEARESTMV][refs[0]] ==
6831 single_filter[NEWMV][refs[1]])
6832 best_filter = single_filter[NEARESTMV][refs[0]];
6833 break;
6834 case NEAR_NEWMV:
6835 if (single_filter[NEARMV][refs[0]] == single_filter[NEWMV][refs[1]])
6836 best_filter = single_filter[NEARMV][refs[0]];
6837 break;
6838 case NEW_NEARESTMV:
6839 if (single_filter[NEWMV][refs[0]] ==
6840 single_filter[NEARESTMV][refs[1]])
6841 best_filter = single_filter[NEWMV][refs[0]];
6842 break;
6843 case NEW_NEARMV:
6844 if (single_filter[NEWMV][refs[0]] == single_filter[NEARMV][refs[1]])
6845 best_filter = single_filter[NEWMV][refs[0]];
6846 break;
6847 default:
6848 if (single_filter[this_mode][refs[0]] ==
6849 single_filter[this_mode][refs[1]])
6850 best_filter = single_filter[this_mode][refs[0]];
6851 break;
6852 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006853 }
6854 }
Angie Chiang75c22092016-10-25 12:19:16 -07006855 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
6856 best_filter = EIGHTTAP_REGULAR;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006857 }
6858 return best_filter;
6859}
Fergus Simpson4063a682017-02-28 16:52:22 -08006860#endif // !CONFIG_DUAL_FILTER
Yaowu Xuc27fc142016-08-22 16:08:15 -07006861
Yaowu Xuc27fc142016-08-22 16:08:15 -07006862// Choose the best wedge index and sign
Yaowu Xuf883b422016-08-30 14:01:10 -07006863static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006864 const BLOCK_SIZE bsize, const uint8_t *const p0,
6865 const uint8_t *const p1, int *const best_wedge_sign,
6866 int *const best_wedge_index) {
6867 const MACROBLOCKD *const xd = &x->e_mbd;
6868 const struct buf_2d *const src = &x->plane[0].src;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006869 const int bw = block_size_wide[bsize];
6870 const int bh = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006871 const int N = bw * bh;
6872 int rate;
6873 int64_t dist;
6874 int64_t rd, best_rd = INT64_MAX;
6875 int wedge_index;
6876 int wedge_sign;
6877 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
6878 const uint8_t *mask;
6879 uint64_t sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006880 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6881 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006882
6883 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
6884 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6885 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6886 DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
6887
6888 int64_t sign_limit;
6889
Yaowu Xuc27fc142016-08-22 16:08:15 -07006890 if (hbd) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006891 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006892 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006893 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006894 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006895 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006896 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
Yaowu Xud3e7c682017-12-21 14:08:25 -08006897 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07006898 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
6899 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6900 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006901 }
6902
Yaowu Xuf883b422016-08-30 14:01:10 -07006903 sign_limit = ((int64_t)aom_sum_squares_i16(r0, N) -
6904 (int64_t)aom_sum_squares_i16(r1, N)) *
Yaowu Xuc27fc142016-08-22 16:08:15 -07006905 (1 << WEDGE_WEIGHT_BITS) / 2;
6906
Jingning Han61418bb2017-01-23 17:12:48 -08006907 if (N < 64)
6908 av1_wedge_compute_delta_squares_c(ds, r0, r1, N);
6909 else
6910 av1_wedge_compute_delta_squares(ds, r0, r1, N);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006911
6912 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006913 mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
Jingning Han61418bb2017-01-23 17:12:48 -08006914
6915 // TODO(jingning): Make sse2 functions support N = 16 case
6916 if (N < 64)
6917 wedge_sign = av1_wedge_sign_from_residuals_c(ds, mask, N, sign_limit);
6918 else
6919 wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006920
Yaowu Xuf883b422016-08-30 14:01:10 -07006921 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
Jingning Han61418bb2017-01-23 17:12:48 -08006922 if (N < 64)
6923 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
6924 else
6925 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006926 sse = ROUND_POWER_OF_TWO(sse, bd_round);
6927
6928 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
Urvang Joshi70006e42017-06-14 16:08:55 -07006929 rd = RDCOST(x->rdmult, rate, dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006930
6931 if (rd < best_rd) {
6932 *best_wedge_index = wedge_index;
6933 *best_wedge_sign = wedge_sign;
6934 best_rd = rd;
6935 }
6936 }
6937
6938 return best_rd;
6939}
6940
6941// Choose the best wedge index the specified sign
6942static int64_t pick_wedge_fixed_sign(
Yaowu Xuf883b422016-08-30 14:01:10 -07006943 const AV1_COMP *const cpi, const MACROBLOCK *const x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006944 const BLOCK_SIZE bsize, const uint8_t *const p0, const uint8_t *const p1,
6945 const int wedge_sign, int *const best_wedge_index) {
6946 const MACROBLOCKD *const xd = &x->e_mbd;
6947 const struct buf_2d *const src = &x->plane[0].src;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006948 const int bw = block_size_wide[bsize];
6949 const int bh = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006950 const int N = bw * bh;
6951 int rate;
6952 int64_t dist;
6953 int64_t rd, best_rd = INT64_MAX;
6954 int wedge_index;
6955 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
6956 const uint8_t *mask;
6957 uint64_t sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006958 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6959 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006960
6961 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6962 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6963
Yaowu Xuc27fc142016-08-22 16:08:15 -07006964 if (hbd) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006965 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006966 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006967 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006968 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
Yaowu Xud3e7c682017-12-21 14:08:25 -08006969 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07006970 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6971 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006972 }
6973
6974 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006975 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
Jingning Han61418bb2017-01-23 17:12:48 -08006976 if (N < 64)
6977 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
6978 else
6979 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006980 sse = ROUND_POWER_OF_TWO(sse, bd_round);
6981
6982 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
Urvang Joshi70006e42017-06-14 16:08:55 -07006983 rd = RDCOST(x->rdmult, rate, dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006984
6985 if (rd < best_rd) {
6986 *best_wedge_index = wedge_index;
6987 best_rd = rd;
6988 }
6989 }
6990
6991 return best_rd;
6992}
6993
Yaowu Xuf883b422016-08-30 14:01:10 -07006994static int64_t pick_interinter_wedge(const AV1_COMP *const cpi,
Sarah Parker2d0e9b72017-05-04 01:34:16 +00006995 MACROBLOCK *const x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006996 const BLOCK_SIZE bsize,
6997 const uint8_t *const p0,
6998 const uint8_t *const p1) {
Sarah Parker2d0e9b72017-05-04 01:34:16 +00006999 MACROBLOCKD *const xd = &x->e_mbd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07007000 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Jingning Hanae5cfde2016-11-30 12:01:44 -08007001 const int bw = block_size_wide[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07007002
7003 int64_t rd;
7004 int wedge_index = -1;
7005 int wedge_sign = 0;
7006
Sarah Parker42d96102017-01-31 21:05:27 -08007007 assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07007008 assert(cpi->common.allow_masked_compound);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007009
7010 if (cpi->sf.fast_wedge_sign_estimate) {
7011 wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
7012 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, wedge_sign, &wedge_index);
7013 } else {
7014 rd = pick_wedge(cpi, x, bsize, p0, p1, &wedge_sign, &wedge_index);
7015 }
7016
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007017 mbmi->wedge_sign = wedge_sign;
7018 mbmi->wedge_index = wedge_index;
Yaowu Xuc27fc142016-08-22 16:08:15 -07007019 return rd;
7020}
7021
Sarah Parkerddcea392017-04-25 15:57:22 -07007022static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007023 MACROBLOCK *const x, const BLOCK_SIZE bsize,
Sarah Parkerddcea392017-04-25 15:57:22 -07007024 const uint8_t *const p0,
7025 const uint8_t *const p1) {
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007026 MACROBLOCKD *const xd = &x->e_mbd;
Sarah Parker569edda2016-12-14 14:57:38 -08007027 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7028 const struct buf_2d *const src = &x->plane[0].src;
7029 const int bw = block_size_wide[bsize];
7030 const int bh = block_size_high[bsize];
7031 const int N = bw * bh;
7032 int rate;
7033 uint64_t sse;
7034 int64_t dist;
Debargha Mukherjeec30934b2017-04-25 01:23:51 -07007035 int64_t rd0;
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007036 SEG_MASK_TYPE cur_mask_type;
7037 int64_t best_rd = INT64_MAX;
7038 SEG_MASK_TYPE best_mask_type = 0;
Sarah Parker569edda2016-12-14 14:57:38 -08007039 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
7040 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
Sarah Parker569edda2016-12-14 14:57:38 -08007041 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
7042 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
7043 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
7044
Sarah Parker569edda2016-12-14 14:57:38 -08007045 if (hbd) {
7046 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
7047 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
7048 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
7049 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
7050 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
7051 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
Yaowu Xud3e7c682017-12-21 14:08:25 -08007052 } else {
Sarah Parker569edda2016-12-14 14:57:38 -08007053 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
7054 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
7055 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
7056 }
7057
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007058 // try each mask type and its inverse
7059 for (cur_mask_type = 0; cur_mask_type < SEG_MASK_TYPES; cur_mask_type++) {
Yaowu Xud3e7c682017-12-21 14:08:25 -08007060 // build mask and inverse
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007061 if (hbd)
7062 build_compound_seg_mask_highbd(
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007063 xd->seg_mask, cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw,
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007064 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
7065 else
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007066 build_compound_seg_mask(xd->seg_mask, cur_mask_type, p0, bw, p1, bw,
7067 bsize, bh, bw);
Sarah Parker569edda2016-12-14 14:57:38 -08007068
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007069 // compute rd for mask
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007070 sse = av1_wedge_sse_from_residuals(r1, d10, xd->seg_mask, N);
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007071 sse = ROUND_POWER_OF_TWO(sse, bd_round);
Sarah Parker569edda2016-12-14 14:57:38 -08007072
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007073 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
Urvang Joshi70006e42017-06-14 16:08:55 -07007074 rd0 = RDCOST(x->rdmult, rate, dist);
Sarah Parker569edda2016-12-14 14:57:38 -08007075
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007076 if (rd0 < best_rd) {
7077 best_mask_type = cur_mask_type;
7078 best_rd = rd0;
7079 }
7080 }
Sarah Parker569edda2016-12-14 14:57:38 -08007081
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007082 // make final mask
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007083 mbmi->mask_type = best_mask_type;
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007084 if (hbd)
7085 build_compound_seg_mask_highbd(
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007086 xd->seg_mask, mbmi->mask_type, CONVERT_TO_BYTEPTR(p0), bw,
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007087 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
7088 else
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007089 build_compound_seg_mask(xd->seg_mask, mbmi->mask_type, p0, bw, p1, bw,
7090 bsize, bh, bw);
Sarah Parker569edda2016-12-14 14:57:38 -08007091
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007092 return best_rd;
Sarah Parker569edda2016-12-14 14:57:38 -08007093}
Sarah Parker569edda2016-12-14 14:57:38 -08007094
Yaowu Xuf883b422016-08-30 14:01:10 -07007095static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007096 const MACROBLOCK *const x,
7097 const BLOCK_SIZE bsize,
7098 const uint8_t *const p0,
7099 const uint8_t *const p1) {
7100 const MACROBLOCKD *const xd = &x->e_mbd;
7101 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7102
7103 int64_t rd;
7104 int wedge_index = -1;
7105
7106 assert(is_interintra_wedge_used(bsize));
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07007107 assert(cpi->common.allow_interintra_compound);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007108
7109 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index);
7110
7111 mbmi->interintra_wedge_sign = 0;
7112 mbmi->interintra_wedge_index = wedge_index;
7113 return rd;
7114}
Sarah Parker6fdc8532016-11-16 17:47:13 -08007115
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007116static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
Sarah Parkerddcea392017-04-25 15:57:22 -07007117 const BLOCK_SIZE bsize,
7118 const uint8_t *const p0,
7119 const uint8_t *const p1) {
7120 const COMPOUND_TYPE compound_type =
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007121 x->e_mbd.mi[0]->mbmi.interinter_compound_type;
Sarah Parkerddcea392017-04-25 15:57:22 -07007122 switch (compound_type) {
Sarah Parkerddcea392017-04-25 15:57:22 -07007123 case COMPOUND_WEDGE: return pick_interinter_wedge(cpi, x, bsize, p0, p1);
Sarah Parkerddcea392017-04-25 15:57:22 -07007124 case COMPOUND_SEG: return pick_interinter_seg(cpi, x, bsize, p0, p1);
Sarah Parkerddcea392017-04-25 15:57:22 -07007125 default: assert(0); return 0;
7126 }
7127}
7128
David Barkerc155e012017-05-11 13:54:54 +01007129static int interinter_compound_motion_search(
7130 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
7131 const BLOCK_SIZE bsize, const int this_mode, int mi_row, int mi_col) {
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007132 MACROBLOCKD *const xd = &x->e_mbd;
Sarah Parker6fdc8532016-11-16 17:47:13 -08007133 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7134 int_mv tmp_mv[2];
David Barkerc155e012017-05-11 13:54:54 +01007135 int tmp_rate_mv = 0;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007136 const INTERINTER_COMPOUND_DATA compound_data = {
Debargha Mukherjee371968c2017-10-29 12:30:04 -07007137 mbmi->wedge_index, mbmi->wedge_sign, mbmi->mask_type, xd->seg_mask,
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007138 mbmi->interinter_compound_type
7139 };
Zoe Liu85b66462017-04-20 14:28:19 -07007140
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007141 if (this_mode == NEW_NEWMV) {
David Barkerc155e012017-05-11 13:54:54 +01007142 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
7143 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007144 mbmi->mv[0].as_int = tmp_mv[0].as_int;
7145 mbmi->mv[1].as_int = tmp_mv[1].as_int;
7146 } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
David Barkerc155e012017-05-11 13:54:54 +01007147 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
7148 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007149 mbmi->mv[0].as_int = tmp_mv[0].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007150 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
David Barkerc155e012017-05-11 13:54:54 +01007151 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
7152 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007153 mbmi->mv[1].as_int = tmp_mv[1].as_int;
7154 }
7155 return tmp_rate_mv;
7156}
7157
Sarah Parkerddcea392017-04-25 15:57:22 -07007158static int64_t build_and_cost_compound_type(
Sarah Parker569edda2016-12-14 14:57:38 -08007159 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
7160 const BLOCK_SIZE bsize, const int this_mode, int rs2, int rate_mv,
7161 BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, uint8_t **preds1,
7162 int *strides, int mi_row, int mi_col) {
Debargha Mukherjeead8be032017-05-09 15:28:45 -07007163 const AV1_COMMON *const cm = &cpi->common;
Sarah Parker569edda2016-12-14 14:57:38 -08007164 MACROBLOCKD *xd = &x->e_mbd;
7165 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7166 int rate_sum;
7167 int64_t dist_sum;
7168 int64_t best_rd_cur = INT64_MAX;
7169 int64_t rd = INT64_MAX;
7170 int tmp_skip_txfm_sb;
7171 int64_t tmp_skip_sse_sb;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007172 const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type;
Sarah Parker569edda2016-12-14 14:57:38 -08007173
Sarah Parkerddcea392017-04-25 15:57:22 -07007174 best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1);
Urvang Joshi70006e42017-06-14 16:08:55 -07007175 best_rd_cur += RDCOST(x->rdmult, rs2 + rate_mv, 0);
Sarah Parker569edda2016-12-14 14:57:38 -08007176
Sarah Parker2e604882017-01-17 17:31:25 -08007177 if (have_newmv_in_inter_mode(this_mode) &&
Sarah Parkerddcea392017-04-25 15:57:22 -07007178 use_masked_motion_search(compound_type)) {
David Barkerc155e012017-05-11 13:54:54 +01007179 *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
7180 this_mode, mi_row, mi_col);
Debargha Mukherjeead8be032017-05-09 15:28:45 -07007181 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007182 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
7183 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07007184 rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
Zoe Liu4d44f5a2016-12-14 17:46:19 -08007185 if (rd >= best_rd_cur) {
Sarah Parker6fdc8532016-11-16 17:47:13 -08007186 mbmi->mv[0].as_int = cur_mv[0].as_int;
7187 mbmi->mv[1].as_int = cur_mv[1].as_int;
7188 *out_rate_mv = rate_mv;
Sebastien Alaiwan0cf54d42017-10-16 16:10:04 +02007189 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
7190 preds1, strides);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007191 }
7192 av1_subtract_plane(x, bsize, 0);
7193 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7194 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7195 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07007196 rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007197 best_rd_cur = rd;
7198
7199 } else {
Sebastien Alaiwan0cf54d42017-10-16 16:10:04 +02007200 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
7201 preds1, strides);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007202 av1_subtract_plane(x, bsize, 0);
7203 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7204 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7205 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07007206 rd = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007207 best_rd_cur = rd;
7208 }
7209 return best_rd_cur;
7210}
Yaowu Xuc27fc142016-08-22 16:08:15 -07007211
Fergus Simpson073c6f32017-02-17 12:13:48 -08007212typedef struct {
Fergus Simpson073c6f32017-02-17 12:13:48 -08007213 // Inter prediction buffers and respective strides
7214 uint8_t *above_pred_buf[MAX_MB_PLANE];
7215 int above_pred_stride[MAX_MB_PLANE];
7216 uint8_t *left_pred_buf[MAX_MB_PLANE];
7217 int left_pred_stride[MAX_MB_PLANE];
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007218 int_mv *single_newmv;
Fergus Simpson073c6f32017-02-17 12:13:48 -08007219 // Pointer to array of motion vectors to use for each ref and their rates
7220 // Should point to first of 2 arrays in 2D array
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007221 int *single_newmv_rate;
Yunqing Wang5f4f7382018-01-09 10:33:09 -08007222 int *single_newmv_valid;
Fergus Simpson073c6f32017-02-17 12:13:48 -08007223 // Pointer to array of predicted rate-distortion
7224 // Should point to first of 2 arrays in 2D array
7225 int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME];
Fergus Simpson3424c2d2017-03-09 11:48:15 -08007226 InterpFilter single_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
Fergus Simpson073c6f32017-02-17 12:13:48 -08007227} HandleInterModeArgs;
7228
Fergus Simpson45509632017-02-22 15:30:50 -08007229static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
7230 const BLOCK_SIZE bsize,
7231 int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
7232 const int mi_row, const int mi_col,
7233 int *const rate_mv, int_mv *const single_newmv,
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08007234 HandleInterModeArgs *const args) {
Fergus Simpson45509632017-02-22 15:30:50 -08007235 const MACROBLOCKD *const xd = &x->e_mbd;
7236 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7237 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
7238 const int is_comp_pred = has_second_ref(mbmi);
7239 const PREDICTION_MODE this_mode = mbmi->mode;
Fergus Simpson45509632017-02-22 15:30:50 -08007240 const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
Fergus Simpson45509632017-02-22 15:30:50 -08007241 int_mv *const frame_mv = mode_mv[this_mode];
7242 const int refs[2] = { mbmi->ref_frame[0],
7243 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
7244 int i;
7245
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08007246 (void)args;
Fergus Simpson45509632017-02-22 15:30:50 -08007247
7248 if (is_comp_pred) {
Fergus Simpson45509632017-02-22 15:30:50 -08007249 for (i = 0; i < 2; ++i) {
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007250 single_newmv[refs[i]].as_int = args->single_newmv[refs[i]].as_int;
Fergus Simpson45509632017-02-22 15:30:50 -08007251 }
7252
7253 if (this_mode == NEW_NEWMV) {
7254 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
7255 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
7256
7257 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007258 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL,
7259 0, rate_mv, 0);
Fergus Simpson45509632017-02-22 15:30:50 -08007260 } else {
7261 *rate_mv = 0;
7262 for (i = 0; i < 2; ++i) {
Zoe Liu122f3942017-04-25 11:18:38 -07007263 av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
Fergus Simpson45509632017-02-22 15:30:50 -08007264 *rate_mv += av1_mv_bit_cost(
7265 &frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
7266 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
7267 }
7268 }
7269 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
7270 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
David Barker8dd9b572017-05-12 16:31:38 +01007271 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
7272 frame_mv[refs[0]].as_int =
7273 mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007274 compound_single_motion_search_interinter(
7275 cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
David Barker8dd9b572017-05-12 16:31:38 +01007276 } else {
Imdad Sardharwallac23ad632017-11-28 14:12:38 +00007277 av1_set_mvcost(x, refs[1], 1,
7278 mbmi->ref_mv_idx + (this_mode == NEAR_NEWMV ? 1 : 0));
David Barker8dd9b572017-05-12 16:31:38 +01007279 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
7280 &mbmi_ext->ref_mvs[refs[1]][0].as_mv,
7281 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
7282 }
Fergus Simpson45509632017-02-22 15:30:50 -08007283 } else {
David Barkercb03dc32017-04-07 13:05:09 +01007284 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
Fergus Simpson45509632017-02-22 15:30:50 -08007285 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
David Barker8dd9b572017-05-12 16:31:38 +01007286 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
7287 frame_mv[refs[1]].as_int =
7288 mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007289 compound_single_motion_search_interinter(
7290 cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
David Barker8dd9b572017-05-12 16:31:38 +01007291 } else {
Imdad Sardharwallac23ad632017-11-28 14:12:38 +00007292 av1_set_mvcost(x, refs[0], 0,
7293 mbmi->ref_mv_idx + (this_mode == NEW_NEARMV ? 1 : 0));
David Barker8dd9b572017-05-12 16:31:38 +01007294 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
7295 &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
7296 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
7297 }
Fergus Simpson45509632017-02-22 15:30:50 -08007298 }
Fergus Simpson45509632017-02-22 15:30:50 -08007299 } else {
Yunqing Wang5f4f7382018-01-09 10:33:09 -08007300 if (is_comp_interintra_pred && args->single_newmv_valid[refs[0]]) {
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007301 x->best_mv = args->single_newmv[refs[0]];
7302 *rate_mv = args->single_newmv_rate[refs[0]];
Fergus Simpson45509632017-02-22 15:30:50 -08007303 } else {
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007304 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv);
Yunqing Wang5f4f7382018-01-09 10:33:09 -08007305 if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
7306
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007307 args->single_newmv[refs[0]] = x->best_mv;
7308 args->single_newmv_rate[refs[0]] = *rate_mv;
Yunqing Wang5f4f7382018-01-09 10:33:09 -08007309 args->single_newmv_valid[refs[0]] = 1;
Fergus Simpson45509632017-02-22 15:30:50 -08007310 }
Fergus Simpson45509632017-02-22 15:30:50 -08007311
Fergus Simpson45509632017-02-22 15:30:50 -08007312 frame_mv[refs[0]] = x->best_mv;
Fergus Simpson45509632017-02-22 15:30:50 -08007313
7314 // Estimate the rate implications of a new mv but discount this
7315 // under certain circumstances where we want to help initiate a weak
7316 // motion field, where the distortion gain for a single block may not
7317 // be enough to overcome the cost of a new mv.
7318 if (discount_newmv_test(cpi, this_mode, x->best_mv, mode_mv, refs[0])) {
7319 *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1);
7320 }
7321 }
7322
7323 return 0;
7324}
7325
Cheng Chenf9cf5b62018-01-12 16:05:15 -08007326static int64_t interpolation_filter_search(
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007327 MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
7328 int mi_row, int mi_col, const BUFFER_SET *const tmp_dst,
7329 BUFFER_SET *const orig_dst,
7330 InterpFilter (*const single_filter)[TOTAL_REFS_PER_FRAME],
7331 int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb,
7332 int64_t *const skip_sse_sb) {
7333 const AV1_COMMON *cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007334 const int num_planes = av1_num_planes(cm);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007335 MACROBLOCKD *const xd = &x->e_mbd;
7336 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7337 int i;
7338 int tmp_rate;
7339 int64_t tmp_dist;
7340
7341 (void)single_filter;
7342
7343 InterpFilter assign_filter = SWITCHABLE;
7344
7345 if (cm->interp_filter == SWITCHABLE) {
7346#if !CONFIG_DUAL_FILTER
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07007347 assign_filter = av1_is_interp_needed(xd)
7348 ? predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
7349 single_filter)
7350 : cm->interp_filter;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007351#endif // !CONFIG_DUAL_FILTER
7352 } else {
7353 assign_filter = cm->interp_filter;
7354 }
7355
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07007356 set_default_interp_filters(mbmi, assign_filter);
7357
Yue Chenb23d00a2017-07-28 17:01:21 -07007358 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
Jingning Hanc44009c2017-05-06 11:36:49 -07007359 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007360 model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate, &tmp_dist,
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007361 skip_txfm_sb, skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07007362 *rd = RDCOST(x->rdmult, *switchable_rate + tmp_rate, tmp_dist);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007363
7364 if (assign_filter == SWITCHABLE) {
7365 // do interp_filter search
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07007366 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd)) {
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007367#if CONFIG_DUAL_FILTER
7368 const int filter_set_size = DUAL_FILTER_SET_SIZE;
7369#else
7370 const int filter_set_size = SWITCHABLE_FILTERS;
7371#endif // CONFIG_DUAL_FILTER
7372 int best_in_temp = 0;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007373 InterpFilters best_filters = mbmi->interp_filters;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007374 restore_dst_buf(xd, *tmp_dst, num_planes);
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007375
7376#if CONFIG_DUAL_FILTER // Speed feature use_fast_interpolation_filter_search
7377 if (cpi->sf.use_fast_interpolation_filter_search) {
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007378 int tmp_skip_sb = 0;
7379 int64_t tmp_skip_sse = INT64_MAX;
7380 int tmp_rs;
7381 int64_t tmp_rd;
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007382
7383 // default to (R,R): EIGHTTAP_REGULARxEIGHTTAP_REGULAR
7384 int best_dual_mode = 0;
7385 // Find best of {R}x{R,Sm,Sh}
7386 // EIGHTTAP_REGULAR mode is calculated beforehand
7387 for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
7388 tmp_skip_sb = 0;
7389 tmp_skip_sse = INT64_MAX;
7390
7391 mbmi->interp_filters =
7392 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
7393
7394 tmp_rs = av1_get_switchable_rate(cm, x, xd);
7395 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
7396 bsize);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007397 model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate,
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007398 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7399 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
7400
7401 if (tmp_rd < *rd) {
7402 best_dual_mode = i;
7403
7404 *rd = tmp_rd;
7405 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
7406 best_filters = mbmi->interp_filters;
7407 *skip_txfm_sb = tmp_skip_sb;
7408 *skip_sse_sb = tmp_skip_sse;
7409 best_in_temp = !best_in_temp;
7410 if (best_in_temp) {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007411 restore_dst_buf(xd, *orig_dst, num_planes);
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007412 } else {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007413 restore_dst_buf(xd, *tmp_dst, num_planes);
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007414 }
7415 }
7416 }
7417
7418 // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes
7419 for (i = best_dual_mode + SWITCHABLE_FILTERS; i < filter_set_size;
7420 i += SWITCHABLE_FILTERS) {
7421 tmp_skip_sb = 0;
7422 tmp_skip_sse = INT64_MAX;
7423
7424 mbmi->interp_filters =
7425 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
7426
7427 tmp_rs = av1_get_switchable_rate(cm, x, xd);
7428 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
7429 bsize);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007430 model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate,
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007431 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7432 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
7433
7434 if (tmp_rd < *rd) {
7435 *rd = tmp_rd;
7436 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
7437 best_filters = mbmi->interp_filters;
7438 *skip_txfm_sb = tmp_skip_sb;
7439 *skip_sse_sb = tmp_skip_sse;
7440 best_in_temp = !best_in_temp;
7441 if (best_in_temp) {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007442 restore_dst_buf(xd, *orig_dst, num_planes);
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007443 } else {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007444 restore_dst_buf(xd, *tmp_dst, num_planes);
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007445 }
7446 }
7447 }
7448 } else {
7449#endif // CONFIG_DUAL_FILTER Speed feature use_fast_interpolation_filter_search
7450 // EIGHTTAP_REGULAR mode is calculated beforehand
7451 for (i = 1; i < filter_set_size; ++i) {
7452 int tmp_skip_sb = 0;
7453 int64_t tmp_skip_sse = INT64_MAX;
7454 int tmp_rs;
7455 int64_t tmp_rd;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007456#if CONFIG_DUAL_FILTER
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007457 mbmi->interp_filters =
7458 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007459#else
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007460 mbmi->interp_filters = av1_broadcast_interp_filter((InterpFilter)i);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007461#endif // CONFIG_DUAL_FILTER
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007462 tmp_rs = av1_get_switchable_rate(cm, x, xd);
7463 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
7464 bsize);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007465 model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate,
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007466 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7467 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007468
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007469 if (tmp_rd < *rd) {
7470 *rd = tmp_rd;
7471 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
7472 best_filters = mbmi->interp_filters;
7473 *skip_txfm_sb = tmp_skip_sb;
7474 *skip_sse_sb = tmp_skip_sse;
7475 best_in_temp = !best_in_temp;
7476 if (best_in_temp) {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007477 restore_dst_buf(xd, *orig_dst, num_planes);
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007478 } else {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007479 restore_dst_buf(xd, *tmp_dst, num_planes);
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007480 }
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007481 }
7482 }
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007483#if CONFIG_DUAL_FILTER // Speed feature use_fast_interpolation_filter_search
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007484 }
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007485#endif // CONFIG_DUAL_FILTER Speed feature use_fast_interpolation_filter_search
7486
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007487 if (best_in_temp) {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007488 restore_dst_buf(xd, *tmp_dst, num_planes);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007489 } else {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007490 restore_dst_buf(xd, *orig_dst, num_planes);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007491 }
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007492 mbmi->interp_filters = best_filters;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007493 } else {
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007494 assert(mbmi->interp_filters ==
7495 av1_broadcast_interp_filter(EIGHTTAP_REGULAR));
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007496 }
7497 }
7498
7499 return 0;
7500}
7501
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007502#if CONFIG_DUAL_FILTER
7503static InterpFilters condition_interp_filters_on_mv(
7504 InterpFilters interp_filters, const MACROBLOCKD *xd) {
7505 InterpFilter filters[2];
7506 for (int i = 0; i < 2; ++i)
7507 filters[i] = (has_subpel_mv_component(xd->mi[0], xd, i))
7508 ? av1_extract_interp_filter(interp_filters, i)
7509 : EIGHTTAP_REGULAR;
7510
7511 return av1_make_interp_filters(filters[0], filters[1]);
7512}
7513#endif
7514
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007515// TODO(afergs): Refactor the MBMI references in here - there's four
7516// TODO(afergs): Refactor optional args - add them to a struct or remove
7517static int64_t motion_mode_rd(
7518 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
7519 RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
7520 int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
7521 int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
Yue Chenc5024212018-01-23 17:28:37 -08007522 const int *refs, int rate_mv, int *skip_txfm_sb, int64_t *skip_sse_sb,
7523 BUFFER_SET *orig_dst) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007524 const AV1_COMMON *const cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007525 const int num_planes = av1_num_planes(cm);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007526 MACROBLOCKD *xd = &x->e_mbd;
7527 MODE_INFO *mi = xd->mi[0];
7528 MB_MODE_INFO *mbmi = &mi->mbmi;
7529 const int is_comp_pred = has_second_ref(mbmi);
7530 const PREDICTION_MODE this_mode = mbmi->mode;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007531 int rate2_nocoeff = 0, best_xskip, best_disable_skip = 0;
7532 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
7533 MB_MODE_INFO base_mbmi, best_mbmi;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007534 uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
Yue Chenb8aa3992017-12-13 15:27:52 -08007535 int interintra_allowed =
7536 cm->allow_interintra_compound && is_interintra_allowed(mbmi);
Yunqing Wang97d6a372017-10-09 14:15:15 -07007537#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -07007538 int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
Yunqing Wang1bc82862017-06-28 15:49:48 -07007539 int total_samples;
7540#else
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007541 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
Yunqing Wang97d6a372017-10-09 14:15:15 -07007542#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007543
Yue Chenb8aa3992017-12-13 15:27:52 -08007544 (void)rate_mv;
7545
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007546 av1_invalid_rd_stats(&best_rd_stats);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007547
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007548 aom_clear_system_state();
Yunqing Wang97d6a372017-10-09 14:15:15 -07007549#if CONFIG_EXT_WARPED_MOTION
Yunqing Wangd3c13e82018-01-02 17:52:56 -08007550 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0);
Yunqing Wang1bc82862017-06-28 15:49:48 -07007551 total_samples = mbmi->num_proj_ref[0];
7552#else
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007553 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
Yunqing Wang97d6a372017-10-09 14:15:15 -07007554#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007555 rate2_nocoeff = rd_stats->rate;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007556 base_mbmi = *mbmi;
Yue Chenb8aa3992017-12-13 15:27:52 -08007557 MOTION_MODE last_motion_mode_allowed =
7558 motion_mode_allowed(xd->global_motion, xd, mi);
7559 assert(mbmi->ref_frame[1] != INTRA_FRAME);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007560
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007561 int64_t best_rd = INT64_MAX;
Yue Chenb8aa3992017-12-13 15:27:52 -08007562 for (int mode_index = (int)SIMPLE_TRANSLATION;
7563 mode_index <= (int)last_motion_mode_allowed + interintra_allowed;
7564 mode_index++) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007565 int64_t tmp_rd = INT64_MAX;
7566 int tmp_rate;
7567 int64_t tmp_dist;
Yue Chenc5024212018-01-23 17:28:37 -08007568 int tmp_rate2 = rate2_nocoeff;
Yue Chenb8aa3992017-12-13 15:27:52 -08007569 int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
7570
Hui Su5ef90ce2018-01-03 11:31:46 -08007571 *skip_txfm_sb = 0;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007572 *mbmi = base_mbmi;
Yue Chenb8aa3992017-12-13 15:27:52 -08007573 if (is_interintra_mode) {
7574 mbmi->motion_mode = SIMPLE_TRANSLATION;
7575 } else {
7576 mbmi->motion_mode = (MOTION_MODE)mode_index;
7577 assert(mbmi->ref_frame[1] != INTRA_FRAME);
7578 }
7579
7580 // OBMC mode
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007581 if (mbmi->motion_mode == OBMC_CAUSAL) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007582 mbmi->motion_mode = OBMC_CAUSAL;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007583 if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007584 int tmp_rate_mv = 0;
7585
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02007586 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, &tmp_rate_mv);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007587 mbmi->mv[0].as_int = x->best_mv.as_int;
7588 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7589 refs[0])) {
7590 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
7591 }
Yue Chenc5024212018-01-23 17:28:37 -08007592 tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007593#if CONFIG_DUAL_FILTER
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007594 mbmi->interp_filters =
7595 condition_interp_filters_on_mv(mbmi->interp_filters, xd);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007596#endif // CONFIG_DUAL_FILTER
Jingning Hanc44009c2017-05-06 11:36:49 -07007597 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007598 } else {
Jingning Hanc44009c2017-05-06 11:36:49 -07007599 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007600 }
7601 av1_build_obmc_inter_prediction(
7602 cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
7603 args->left_pred_buf, args->left_pred_stride);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007604 model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate,
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007605 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7606 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007607
Yue Chenb8aa3992017-12-13 15:27:52 -08007608 // Local warped motion mode
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007609 if (mbmi->motion_mode == WARPED_CAUSAL) {
Yunqing Wang97d6a372017-10-09 14:15:15 -07007610#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -07007611 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
Yunqing Wang97d6a372017-10-09 14:15:15 -07007612#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007613 mbmi->motion_mode = WARPED_CAUSAL;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007614 mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007615 mbmi->interp_filters = av1_broadcast_interp_filter(
7616 av1_unswitchable_filter(cm->interp_filter));
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007617
Yunqing Wang97d6a372017-10-09 14:15:15 -07007618#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007619 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
7620 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
Yunqing Wangd3c13e82018-01-02 17:52:56 -08007621 // Select the samples according to motion vector difference
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007622 if (mbmi->num_proj_ref[0] > 1) {
Yunqing Wangd3c13e82018-01-02 17:52:56 -08007623 mbmi->num_proj_ref[0] = selectSamples(
7624 &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref[0], bsize);
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007625 }
Yunqing Wang876a8b02017-11-13 17:13:27 -08007626#endif // CONFIG_EXT_WARPED_MOTION
7627
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007628 if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
7629 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
7630 &mbmi->wm_params[0], mi_row, mi_col)) {
7631 // Refine MV for NEWMV mode
7632 if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
7633 int tmp_rate_mv = 0;
7634 const int_mv mv0 = mbmi->mv[0];
7635 const WarpedMotionParams wm_params0 = mbmi->wm_params[0];
Yunqing Wang876a8b02017-11-13 17:13:27 -08007636#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007637 int num_proj_ref0 = mbmi->num_proj_ref[0];
Yunqing Wang876a8b02017-11-13 17:13:27 -08007638
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007639 // Refine MV in a small range.
7640 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0,
Yunqing Wangd3c13e82018-01-02 17:52:56 -08007641 total_samples);
Yunqing Wang876a8b02017-11-13 17:13:27 -08007642#else
7643 // Refine MV in a small range.
7644 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
7645#endif // CONFIG_EXT_WARPED_MOTION
7646
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007647 // Keep the refined MV and WM parameters.
7648 if (mv0.as_int != mbmi->mv[0].as_int) {
7649 const int ref = refs[0];
7650 const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
Yunqing Wang876a8b02017-11-13 17:13:27 -08007651
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007652 tmp_rate_mv =
7653 av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv, x->nmvjointcost,
7654 x->mvcost, MV_COST_WEIGHT);
Yunqing Wang876a8b02017-11-13 17:13:27 -08007655
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007656 if (cpi->sf.adaptive_motion_search)
7657 x->pred_mv[ref] = mbmi->mv[0].as_mv;
Yunqing Wang876a8b02017-11-13 17:13:27 -08007658
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007659 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7660 refs[0])) {
7661 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
Yunqing Wang876a8b02017-11-13 17:13:27 -08007662 }
Yue Chenc5024212018-01-23 17:28:37 -08007663 tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv;
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007664#if CONFIG_DUAL_FILTER
7665 mbmi->interp_filters =
7666 condition_interp_filters_on_mv(mbmi->interp_filters, xd);
7667#endif // CONFIG_DUAL_FILTER
7668 } else {
7669 // Restore the old MV and WM parameters.
7670 mbmi->mv[0] = mv0;
7671 mbmi->wm_params[0] = wm_params0;
7672#if CONFIG_EXT_WARPED_MOTION
7673 mbmi->num_proj_ref[0] = num_proj_ref0;
7674#endif // CONFIG_EXT_WARPED_MOTION
Yunqing Wang876a8b02017-11-13 17:13:27 -08007675 }
Yunqing Wang876a8b02017-11-13 17:13:27 -08007676 }
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007677
7678 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007679 model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate,
Yunqing Wang62fe78e2017-12-22 16:03:43 -08007680 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7681 } else {
7682 continue;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007683 }
7684 }
Yue Chenb8aa3992017-12-13 15:27:52 -08007685
7686 // Interintra mode
7687 if (is_interintra_mode) {
7688 INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
7689 int64_t rd, best_interintra_rd = INT64_MAX;
7690 int rmode, rate_sum;
7691 int64_t dist_sum;
7692 int j;
7693 int tmp_rate_mv = 0;
7694 int tmp_skip_txfm_sb;
7695 int bw = block_size_wide[bsize];
7696 int64_t tmp_skip_sse_sb;
7697 DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_INTERINTRA_SB_SQUARE]);
7698 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_INTERINTRA_SB_SQUARE]);
7699 uint8_t *tmp_buf, *intrapred;
7700 const int *const interintra_mode_cost =
7701 x->interintra_mode_cost[size_group_lookup[bsize]];
7702
7703 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
7704 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf_);
7705 intrapred = CONVERT_TO_BYTEPTR(intrapred_);
7706 } else {
7707 tmp_buf = tmp_buf_;
7708 intrapred = intrapred_;
7709 }
7710 const int_mv mv0 = mbmi->mv[0];
7711
7712 mbmi->ref_frame[1] = NONE_FRAME;
7713 xd->plane[0].dst.buf = tmp_buf;
7714 xd->plane[0].dst.stride = bw;
7715 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize);
7716
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007717 restore_dst_buf(xd, *orig_dst, num_planes);
Yue Chenb8aa3992017-12-13 15:27:52 -08007718 mbmi->ref_frame[1] = INTRA_FRAME;
7719 mbmi->use_wedge_interintra = 0;
7720 for (j = 0; j < INTERINTRA_MODES; ++j) {
7721 mbmi->interintra_mode = (INTERINTRA_MODE)j;
7722 rmode = interintra_mode_cost[mbmi->interintra_mode];
7723 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst,
7724 intrapred, bw);
7725 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7726 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
7727 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
7728 rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
7729 if (rd < best_interintra_rd) {
7730 best_interintra_rd = rd;
7731 best_interintra_mode = mbmi->interintra_mode;
7732 }
7733 }
7734 mbmi->interintra_mode = best_interintra_mode;
7735 rmode = interintra_mode_cost[mbmi->interintra_mode];
7736 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst,
7737 intrapred, bw);
7738 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7739 av1_subtract_plane(x, bsize, 0);
7740 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7741 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7742 if (rd != INT64_MAX)
7743 rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum, dist_sum);
7744 best_interintra_rd = rd;
7745
Yue Chen7ebfaf42018-01-16 10:40:38 -08007746 if (ref_best_rd < INT64_MAX && (best_interintra_rd >> 1) > ref_best_rd)
Yue Chenb8aa3992017-12-13 15:27:52 -08007747 continue;
7748
7749 if (is_interintra_wedge_used(bsize)) {
7750 int64_t best_interintra_rd_nowedge = INT64_MAX;
7751 int64_t best_interintra_rd_wedge = INT64_MAX;
7752 int_mv tmp_mv;
7753#if CONFIG_DUAL_FILTER
7754 InterpFilters backup_interp_filters = mbmi->interp_filters;
7755#endif // CONFIG_DUAL_FILTER
7756 int rwedge = x->wedge_interintra_cost[bsize][0];
7757 if (rd != INT64_MAX)
7758 rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum + rwedge, dist_sum);
7759 best_interintra_rd_nowedge = rd;
7760
7761 // Disable wedge search if source variance is small
7762 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
7763 mbmi->use_wedge_interintra = 1;
7764
7765 rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) +
7766 x->wedge_interintra_cost[bsize][1];
7767
7768 best_interintra_rd_wedge =
7769 pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
7770
7771 best_interintra_rd_wedge +=
7772 RDCOST(x->rdmult, rmode + rate_mv + rwedge, 0);
7773 // Refine motion vector.
7774 if (have_newmv_in_inter_mode(mbmi->mode)) {
7775 // get negative of mask
7776 const uint8_t *mask = av1_get_contiguous_soft_mask(
7777 mbmi->interintra_wedge_index, 1, bsize);
7778 tmp_mv.as_int = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_int;
7779 compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
7780 mi_col, intrapred, mask, bw,
7781 &tmp_rate_mv, 0);
7782#if CONFIG_DUAL_FILTER
7783 mbmi->interp_filters =
7784 condition_interp_filters_on_mv(mbmi->interp_filters, xd);
7785#endif // CONFIG_DUAL_FILTER
7786 mbmi->mv[0].as_int = tmp_mv.as_int;
7787 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst,
7788 bsize);
7789 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
7790 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
7791 rd = RDCOST(x->rdmult, tmp_rate_mv + rmode + rate_sum + rwedge,
7792 dist_sum);
7793 if (rd >= best_interintra_rd_wedge) {
7794 tmp_mv.as_int = mv0.as_int;
7795 tmp_rate_mv = rate_mv;
7796#if CONFIG_DUAL_FILTER
7797 mbmi->interp_filters = backup_interp_filters;
7798#endif
7799 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7800 }
7801 } else {
7802 tmp_mv.as_int = mv0.as_int;
7803 tmp_rate_mv = rate_mv;
7804 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
7805 }
7806 // Evaluate closer to true rd
7807 av1_subtract_plane(x, bsize, 0);
7808 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7809 &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
7810 INT64_MAX);
7811 if (rd != INT64_MAX)
7812 rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
7813 dist_sum);
7814 best_interintra_rd_wedge = rd;
7815 if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
7816 mbmi->use_wedge_interintra = 1;
7817 mbmi->mv[0].as_int = tmp_mv.as_int;
Yue Chen7ebfaf42018-01-16 10:40:38 -08007818 tmp_rate2 += tmp_rate_mv - rate_mv;
Yue Chenb8aa3992017-12-13 15:27:52 -08007819 } else {
7820 mbmi->use_wedge_interintra = 0;
7821 mbmi->mv[0].as_int = mv0.as_int;
7822#if CONFIG_DUAL_FILTER
7823 mbmi->interp_filters = backup_interp_filters;
7824#endif // CONFIG_DUAL_FILTER
7825 }
7826 } else {
7827 mbmi->use_wedge_interintra = 0;
7828 }
7829 } // if (is_interintra_wedge_used(bsize))
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007830 restore_dst_buf(xd, *orig_dst, num_planes);
Yue Chenb8aa3992017-12-13 15:27:52 -08007831 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007832 model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate,
Yue Chenb8aa3992017-12-13 15:27:52 -08007833 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7834 }
7835
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007836 x->skip = 0;
7837
7838 rd_stats->dist = 0;
7839 rd_stats->sse = 0;
7840 rd_stats->skip = 1;
7841 rd_stats->rate = tmp_rate2;
Yue Chenc5024212018-01-23 17:28:37 -08007842 if (av1_is_interp_needed(xd))
7843 rd_stats->rate += av1_get_switchable_rate(cm, x, xd);
Yue Chenb8aa3992017-12-13 15:27:52 -08007844 if (interintra_allowed) {
7845 rd_stats->rate += x->interintra_cost[size_group_lookup[bsize]]
7846 [mbmi->ref_frame[1] == INTRA_FRAME];
7847 if (mbmi->ref_frame[1] == INTRA_FRAME) {
7848 rd_stats->rate += x->interintra_mode_cost[size_group_lookup[bsize]]
7849 [mbmi->interintra_mode];
7850 if (is_interintra_wedge_used(bsize)) {
7851 rd_stats->rate +=
7852 x->wedge_interintra_cost[bsize][mbmi->use_wedge_interintra];
7853 if (mbmi->use_wedge_interintra) {
7854 rd_stats->rate +=
7855 av1_cost_literal(get_interintra_wedge_bits(bsize));
7856 }
7857 }
7858 }
7859 }
7860 if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
7861 (mbmi->ref_frame[1] != INTRA_FRAME)) {
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007862 if (last_motion_mode_allowed == WARPED_CAUSAL) {
Yue Chenbdc8dab2017-07-26 12:05:47 -07007863 rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode];
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007864 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07007865 rd_stats->rate += x->motion_mode_cost1[bsize][mbmi->motion_mode];
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007866 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007867 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007868 if (!*skip_txfm_sb) {
7869 int64_t rdcosty = INT64_MAX;
7870 int is_cost_valid_uv = 0;
7871
7872 // cost and distortion
7873 av1_subtract_plane(x, bsize, 0);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007874 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07007875 // Motion mode
Yue Chen25dc0702017-10-18 23:36:06 -07007876 select_tx_type_yrd(cpi, x, rd_stats_y, bsize, mi_row, mi_col,
7877 ref_best_rd);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007878 } else {
7879 int idx, idy;
7880 super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
7881 for (idy = 0; idy < xd->n8_h; ++idy)
7882 for (idx = 0; idx < xd->n8_w; ++idx)
7883 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
7884 memset(x->blk_skip[0], rd_stats_y->skip,
7885 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
7886 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007887
7888 if (rd_stats_y->rate == INT_MAX) {
7889 av1_invalid_rd_stats(rd_stats);
Yue Chenb8aa3992017-12-13 15:27:52 -08007890 if (mbmi->motion_mode != SIMPLE_TRANSLATION ||
7891 mbmi->ref_frame[1] == INTRA_FRAME) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007892 continue;
7893 } else {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007894 restore_dst_buf(xd, *orig_dst, num_planes);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007895 return INT64_MAX;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007896 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007897 }
7898
7899 av1_merge_rd_stats(rd_stats, rd_stats_y);
7900
Urvang Joshi70006e42017-06-14 16:08:55 -07007901 rdcosty = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
7902 rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, 0, rd_stats->sse));
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007903 if (num_planes > 1) {
7904 /* clang-format off */
7905 is_cost_valid_uv =
7906 inter_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty,
7907 0);
7908 if (!is_cost_valid_uv) {
7909 continue;
7910 }
7911 /* clang-format on */
7912 av1_merge_rd_stats(rd_stats, rd_stats_uv);
7913 } else {
7914 av1_init_rd_stats(rd_stats_uv);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007915 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007916#if CONFIG_RD_DEBUG
7917 // record transform block coefficient cost
7918 // TODO(angiebird): So far rd_debug tool only detects discrepancy of
7919 // coefficient cost. Therefore, it is fine to copy rd_stats into mbmi
7920 // here because we already collect the coefficient cost. Move this part to
7921 // other place when we need to compare non-coefficient cost.
7922 mbmi->rd_stats = *rd_stats;
7923#endif // CONFIG_RD_DEBUG
Zoe Liu1eed2df2017-10-16 17:13:15 -07007924 const int skip_ctx = av1_get_skip_context(xd);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007925 if (rd_stats->skip) {
7926 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
7927 rd_stats_y->rate = 0;
7928 rd_stats_uv->rate = 0;
Zoe Liu1eed2df2017-10-16 17:13:15 -07007929 rd_stats->rate += x->skip_cost[skip_ctx][1];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007930 mbmi->skip = 0;
7931 // here mbmi->skip temporarily plays a role as what this_skip2 does
7932 } else if (!xd->lossless[mbmi->segment_id] &&
Urvang Joshi70006e42017-06-14 16:08:55 -07007933 (RDCOST(x->rdmult,
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007934 rd_stats_y->rate + rd_stats_uv->rate +
Zoe Liu1eed2df2017-10-16 17:13:15 -07007935 x->skip_cost[skip_ctx][0],
7936 rd_stats->dist) >= RDCOST(x->rdmult,
7937 x->skip_cost[skip_ctx][1],
7938 rd_stats->sse))) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007939 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
Zoe Liu1eed2df2017-10-16 17:13:15 -07007940 rd_stats->rate += x->skip_cost[skip_ctx][1];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007941 rd_stats->dist = rd_stats->sse;
7942 rd_stats_y->rate = 0;
7943 rd_stats_uv->rate = 0;
7944 mbmi->skip = 1;
7945 } else {
Zoe Liu1eed2df2017-10-16 17:13:15 -07007946 rd_stats->rate += x->skip_cost[skip_ctx][0];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007947 mbmi->skip = 0;
7948 }
7949 *disable_skip = 0;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007950 } else {
7951 x->skip = 1;
7952 *disable_skip = 1;
7953 mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
7954
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01007955 // The cost of skip bit needs to be added.
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007956 mbmi->skip = 0;
Zoe Liu1eed2df2017-10-16 17:13:15 -07007957 rd_stats->rate += x->skip_cost[av1_get_skip_context(xd)][1];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007958
7959 rd_stats->dist = *skip_sse_sb;
7960 rd_stats->sse = *skip_sse_sb;
7961 rd_stats_y->rate = 0;
7962 rd_stats_uv->rate = 0;
7963 rd_stats->skip = 1;
7964 }
7965
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07007966 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007967 if (is_nontrans_global_motion(xd)) {
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007968 mbmi->interp_filters = av1_broadcast_interp_filter(
7969 av1_unswitchable_filter(cm->interp_filter));
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007970 }
7971 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007972
Urvang Joshi70006e42017-06-14 16:08:55 -07007973 tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
Yue Chenb8aa3992017-12-13 15:27:52 -08007974 if ((mbmi->motion_mode == SIMPLE_TRANSLATION &&
7975 mbmi->ref_frame[1] != INTRA_FRAME) ||
7976 (tmp_rd < best_rd)) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007977 best_mbmi = *mbmi;
7978 best_rd = tmp_rd;
7979 best_rd_stats = *rd_stats;
7980 best_rd_stats_y = *rd_stats_y;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007981 if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
7982 for (int i = 0; i < num_planes; ++i)
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007983 memcpy(best_blk_skip[i], x->blk_skip[i],
Angie Chiangdf682312018-01-08 11:19:55 -08007984 sizeof(best_blk_skip[i][0]) * xd->n8_h * xd->n8_w * 4);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007985 best_xskip = x->skip;
7986 best_disable_skip = *disable_skip;
7987 }
7988 }
7989
7990 if (best_rd == INT64_MAX) {
7991 av1_invalid_rd_stats(rd_stats);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007992 restore_dst_buf(xd, *orig_dst, num_planes);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007993 return INT64_MAX;
7994 }
7995 *mbmi = best_mbmi;
7996 *rd_stats = best_rd_stats;
7997 *rd_stats_y = best_rd_stats_y;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00007998 if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
7999 for (int i = 0; i < num_planes; ++i)
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008000 memcpy(x->blk_skip[i], best_blk_skip[i],
Angie Chiangdf682312018-01-08 11:19:55 -08008001 sizeof(x->blk_skip[i][0]) * xd->n8_h * xd->n8_w * 4);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008002 x->skip = best_xskip;
8003 *disable_skip = best_disable_skip;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008004
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008005 restore_dst_buf(xd, *orig_dst, num_planes);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008006 return 0;
8007}
8008
Zoe Liuf40a9572017-10-13 12:37:19 -07008009#if CONFIG_EXT_SKIP
8010static int64_t skip_mode_rd(const AV1_COMP *const cpi, MACROBLOCK *const x,
8011 BLOCK_SIZE bsize, int mi_row, int mi_col,
8012 BUFFER_SET *const orig_dst) {
8013 const AV1_COMMON *cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008014 const int num_planes = av1_num_planes(cm);
Zoe Liuf40a9572017-10-13 12:37:19 -07008015 MACROBLOCKD *const xd = &x->e_mbd;
8016 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8017
Zoe Liuf40a9572017-10-13 12:37:19 -07008018 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
8019
8020 int64_t total_sse = 0;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008021 for (int plane = 0; plane < num_planes; ++plane) {
Zoe Liuf40a9572017-10-13 12:37:19 -07008022 const struct macroblock_plane *const p = &x->plane[plane];
8023 const struct macroblockd_plane *const pd = &xd->plane[plane];
8024 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
8025 const int bw = block_size_wide[plane_bsize];
8026 const int bh = block_size_high[plane_bsize];
8027
8028 av1_subtract_plane(x, bsize, plane);
8029 int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh);
8030 sse = sse << 4;
8031 total_sse += sse;
8032 }
8033 x->skip_mode_dist = x->skip_mode_sse = total_sse;
8034 x->skip_mode_rate = 0;
8035 x->skip_mode_rdcost = RDCOST(x->rdmult, x->skip_mode_rate, x->skip_mode_dist);
8036
8037 // Save the ref frames / motion vectors
8038 x->skip_mode_ref_frame[0] = mbmi->ref_frame[0];
8039 x->skip_mode_ref_frame[1] = mbmi->ref_frame[1];
8040 x->skip_mode_mv[0].as_int = mbmi->mv[0].as_int;
8041 x->skip_mode_mv[1].as_int = mbmi->mv[1].as_int;
8042
8043 // Save the mode index
8044 x->skip_mode_index = x->skip_mode_index_candidate;
8045
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008046 restore_dst_buf(xd, *orig_dst, num_planes);
Zoe Liuf40a9572017-10-13 12:37:19 -07008047 return 0;
8048}
8049#endif // CONFIG_EXT_SKIP
8050
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008051static int64_t handle_inter_mode(
8052 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
8053 RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
8054 int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
8055 int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) {
Urvang Joshi52648442016-10-13 17:27:51 -07008056 const AV1_COMMON *cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008057 const int num_planes = av1_num_planes(cm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008058 MACROBLOCKD *xd = &x->e_mbd;
Sarah Parker19234cc2017-03-10 16:43:25 -08008059 MODE_INFO *mi = xd->mi[0];
8060 MB_MODE_INFO *mbmi = &mi->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008061 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8062 const int is_comp_pred = has_second_ref(mbmi);
8063 const int this_mode = mbmi->mode;
8064 int_mv *frame_mv = mode_mv[this_mode];
8065 int i;
8066 int refs[2] = { mbmi->ref_frame[0],
8067 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
8068 int_mv cur_mv[2];
8069 int rate_mv = 0;
Angie Chiang75c22092016-10-25 12:19:16 -07008070 int pred_exists = 1;
Jingning Hanae5cfde2016-11-30 12:01:44 -08008071 const int bw = block_size_wide[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008072 int_mv single_newmv[TOTAL_REFS_PER_FRAME];
Yaowu Xuf883b422016-08-30 14:01:10 -07008073 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008074 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008075 uint8_t *tmp_buf;
Angie Chiang75c22092016-10-25 12:19:16 -07008076 int64_t rd = INT64_MAX;
David Barkerac37fa32016-12-02 12:30:21 +00008077 BUFFER_SET orig_dst, tmp_dst;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008078 int rs = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008079
8080 int skip_txfm_sb = 0;
8081 int64_t skip_sse_sb = INT64_MAX;
Yaowu Xub0d0d002016-11-22 09:26:43 -08008082 int16_t mode_ctx;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008083
Yue Chen5e606542017-05-24 17:03:17 -07008084 int compmode_interinter_cost = 0;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008085 mbmi->interinter_compound_type = COMPOUND_AVERAGE;
Cheng Chen4a6977a2017-12-27 12:27:39 -08008086#if CONFIG_JNT_COMP
8087 mbmi->comp_group_idx = 0;
8088 mbmi->compound_idx = 1;
8089#endif
Yue Chenb8aa3992017-12-13 15:27:52 -08008090 if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008091
Yaowu Xuc27fc142016-08-22 16:08:15 -07008092 if (is_comp_pred)
8093 mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
8094 else
Luc Trudeau15a18e32017-12-13 14:15:25 -05008095 mode_ctx =
8096 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008097
Yaowu Xuc27fc142016-08-22 16:08:15 -07008098 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
8099 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf_);
8100 else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008101 tmp_buf = tmp_buf_;
David Barkerb8069f92016-11-18 14:49:56 +00008102 // Make sure that we didn't leave the plane destination buffers set
8103 // to tmp_buf at the end of the last iteration
8104 assert(xd->plane[0].dst.buf != tmp_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008105
Yue Chen69f18e12016-09-08 14:48:15 -07008106 mbmi->num_proj_ref[0] = 0;
8107 mbmi->num_proj_ref[1] = 0;
Yue Chen69f18e12016-09-08 14:48:15 -07008108
Yaowu Xuc27fc142016-08-22 16:08:15 -07008109 if (is_comp_pred) {
8110 if (frame_mv[refs[0]].as_int == INVALID_MV ||
8111 frame_mv[refs[1]].as_int == INVALID_MV)
8112 return INT64_MAX;
8113 }
8114
Yue Chene9638cc2016-10-10 12:37:54 -07008115 mbmi->motion_mode = SIMPLE_TRANSLATION;
Cheng Chen4a6977a2017-12-27 12:27:39 -08008116 const int masked_compound_used =
8117 is_any_masked_compound_used(bsize) && cm->allow_masked_compound;
8118 int64_t ret_val = INT64_MAX;
8119#if CONFIG_JNT_COMP
8120 const RD_STATS backup_rd_stats = *rd_stats;
8121 const RD_STATS backup_rd_stats_y = *rd_stats_y;
8122 const RD_STATS backup_rd_stats_uv = *rd_stats_uv;
8123 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
8124 int64_t best_rd = INT64_MAX;
8125 int best_compound_idx = 1;
8126 int64_t best_ret_val = INT64_MAX;
8127 uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
8128 const MB_MODE_INFO backup_mbmi = *mbmi;
8129 MB_MODE_INFO best_mbmi = *mbmi;
8130 int64_t early_terminate = 0;
8131
8132 int comp_idx;
8133 for (comp_idx = 0; comp_idx < 1 + is_comp_pred; ++comp_idx) {
8134 early_terminate = 0;
8135 *rd_stats = backup_rd_stats;
8136 *rd_stats_y = backup_rd_stats_y;
8137 *rd_stats_uv = backup_rd_stats_uv;
8138 *mbmi = backup_mbmi;
8139 mbmi->compound_idx = comp_idx;
8140
8141 if (is_comp_pred && comp_idx == 0) {
8142 mbmi->comp_group_idx = 0;
8143 mbmi->compound_idx = 0;
8144
8145 const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
8146 const int comp_index_ctx = get_comp_index_context(cm, xd);
8147 if (masked_compound_used)
8148 rd_stats->rate += x->comp_group_idx_cost[comp_group_idx_ctx][0];
8149 rd_stats->rate += x->comp_idx_cost[comp_index_ctx][0];
8150 }
8151#endif // CONFIG_JNT_COMP
8152
8153 if (have_newmv_in_inter_mode(this_mode)) {
8154 ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col, &rate_mv,
8155 single_newmv, args);
8156#if CONFIG_JNT_COMP
8157 if (ret_val != 0) {
8158 early_terminate = INT64_MAX;
8159 continue;
8160 } else {
8161 rd_stats->rate += rate_mv;
8162 }
8163#else
Fergus Simpson45509632017-02-22 15:30:50 -08008164 if (ret_val != 0)
8165 return ret_val;
8166 else
8167 rd_stats->rate += rate_mv;
Cheng Chen4a6977a2017-12-27 12:27:39 -08008168#endif // CONFIG_JNT_COMP
8169 }
8170 for (i = 0; i < is_comp_pred + 1; ++i) {
8171 cur_mv[i] = frame_mv[refs[i]];
8172 // Clip "next_nearest" so that it does not extend to far out of image
8173 if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
8174#if CONFIG_JNT_COMP
8175 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) {
8176 early_terminate = INT64_MAX;
8177 continue;
8178 }
8179#else
Alex Converse0fa0f422017-04-24 12:51:14 -07008180 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
Cheng Chen4a6977a2017-12-27 12:27:39 -08008181#endif // CONFIG_JNT_COMP
8182 mbmi->mv[i].as_int = cur_mv[i].as_int;
8183 }
8184
8185 if (this_mode == NEAREST_NEARESTMV) {
8186 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
8187 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
8188 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
8189
8190 for (i = 0; i < 2; ++i) {
8191 clamp_mv2(&cur_mv[i].as_mv, xd);
8192#if CONFIG_JNT_COMP
8193 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) {
8194 early_terminate = INT64_MAX;
8195 continue;
8196 }
8197#else
8198 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
8199#endif // CONFIG_JNT_COMP
8200 mbmi->mv[i].as_int = cur_mv[i].as_int;
8201 }
8202 }
8203 }
8204
8205 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
8206 if (this_mode == NEAREST_NEWMV) {
8207 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
8208
8209#if CONFIG_AMVR
8210 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
8211 cm->cur_frame_force_integer_mv);
8212#else
8213 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
8214#endif
8215 clamp_mv2(&cur_mv[0].as_mv, xd);
8216#if CONFIG_JNT_COMP
8217 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) {
8218 early_terminate = INT64_MAX;
8219 continue;
8220 }
8221#else
8222 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
8223#endif // CONFIG_JNT_COMP
8224 mbmi->mv[0].as_int = cur_mv[0].as_int;
8225 }
8226
8227 if (this_mode == NEW_NEARESTMV) {
8228 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
8229
8230#if CONFIG_AMVR
8231 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
8232 cm->cur_frame_force_integer_mv);
8233#else
8234 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
8235#endif
8236 clamp_mv2(&cur_mv[1].as_mv, xd);
8237#if CONFIG_JNT_COMP
8238 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) {
8239 early_terminate = INT64_MAX;
8240 continue;
8241 }
8242#else
8243 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
8244#endif // CONFIG_JNT_COMP
8245 mbmi->mv[1].as_int = cur_mv[1].as_int;
8246 }
8247 }
8248
8249 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
8250 int ref_mv_idx = mbmi->ref_mv_idx + 1;
8251 if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) {
8252 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
8253
8254#if CONFIG_AMVR
8255 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
8256 cm->cur_frame_force_integer_mv);
8257#else
8258 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
8259#endif
8260 clamp_mv2(&cur_mv[0].as_mv, xd);
8261#if CONFIG_JNT_COMP
8262 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) {
8263 early_terminate = INT64_MAX;
8264 continue;
8265 }
8266#else
8267 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
8268#endif // CONFIG_JNT_COMP
8269 mbmi->mv[0].as_int = cur_mv[0].as_int;
8270 }
8271
8272 if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) {
8273 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
8274
8275#if CONFIG_AMVR
8276 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
8277 cm->cur_frame_force_integer_mv);
8278#else
8279 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
8280#endif
8281 clamp_mv2(&cur_mv[1].as_mv, xd);
8282#if CONFIG_JNT_COMP
8283 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) {
8284 early_terminate = INT64_MAX;
8285 continue;
8286 }
8287#else
8288 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
8289#endif // CONFIG_JNT_COMP
8290 mbmi->mv[1].as_int = cur_mv[1].as_int;
8291 }
8292 }
8293
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008294 // Initialise tmp_dst and orig_dst buffers to prevent "may be used
8295 // uninitialized" warnings in GCC when the stream is monochrome.
8296 memset(tmp_dst.plane, 0, sizeof(tmp_dst.plane));
8297 memset(tmp_dst.stride, 0, sizeof(tmp_dst.stride));
8298 memset(orig_dst.plane, 0, sizeof(tmp_dst.plane));
8299 memset(orig_dst.stride, 0, sizeof(tmp_dst.stride));
8300
Cheng Chen4a6977a2017-12-27 12:27:39 -08008301 // do first prediction into the destination buffer. Do the next
8302 // prediction into a temporary buffer. Then keep track of which one
8303 // of these currently holds the best predictor, and use the other
8304 // one for future predictions. In the end, copy from tmp_buf to
8305 // dst if necessary.
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008306 for (i = 0; i < num_planes; i++) {
Cheng Chen4a6977a2017-12-27 12:27:39 -08008307 tmp_dst.plane[i] = tmp_buf + i * MAX_SB_SQUARE;
8308 tmp_dst.stride[i] = MAX_SB_SIZE;
8309 }
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008310 for (i = 0; i < num_planes; i++) {
Cheng Chen4a6977a2017-12-27 12:27:39 -08008311 orig_dst.plane[i] = xd->plane[i].dst.buf;
8312 orig_dst.stride[i] = xd->plane[i].dst.stride;
8313 }
8314
8315 // We don't include the cost of the second reference here, because there
8316 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
8317 // words if you present them in that order, the second one is always known
8318 // if the first is known.
8319 //
8320 // Under some circumstances we discount the cost of new mv mode to encourage
8321 // initiation of a motion field.
8322 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
8323 refs[0])) {
8324 rd_stats->rate +=
8325 AOMMIN(cost_mv_ref(x, this_mode, mode_ctx),
8326 cost_mv_ref(x, is_comp_pred ? NEAREST_NEARESTMV : NEARESTMV,
8327 mode_ctx));
8328 } else {
8329 rd_stats->rate += cost_mv_ref(x, this_mode, mode_ctx);
8330 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008331
Cheng Chen0a7f2f52017-10-10 15:16:09 -07008332#if CONFIG_JNT_COMP
Cheng Chen4a6977a2017-12-27 12:27:39 -08008333 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
8334 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
8335 early_terminate = INT64_MAX;
8336 continue;
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008337 }
RogerZhou3b635242017-09-19 10:06:46 -07008338#else
Urvang Joshi70006e42017-06-14 16:08:55 -07008339 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02008340 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV)
Yaowu Xuc27fc142016-08-22 16:08:15 -07008341 return INT64_MAX;
Cheng Chen4a6977a2017-12-27 12:27:39 -08008342#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -07008343
Cheng Chen4a6977a2017-12-27 12:27:39 -08008344 ret_val = interpolation_filter_search(
8345 x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst, args->single_filter,
8346 &rd, &rs, &skip_txfm_sb, &skip_sse_sb);
8347#if CONFIG_JNT_COMP
8348 if (ret_val != 0) {
8349 early_terminate = INT64_MAX;
8350 continue;
8351 }
8352#else
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08008353 if (ret_val != 0) return ret_val;
Cheng Chen4a6977a2017-12-27 12:27:39 -08008354#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -07008355
Cheng Chen3392c432017-10-23 15:58:23 -07008356#if CONFIG_JNT_COMP
Cheng Chen4a6977a2017-12-27 12:27:39 -08008357 if (is_comp_pred && comp_idx)
Cheng Chen3392c432017-10-23 15:58:23 -07008358#else
Yushin Cho67dda512017-07-25 14:58:22 -07008359 if (is_comp_pred)
Cheng Chen4a6977a2017-12-27 12:27:39 -08008360#endif
8361 {
8362 int rate_sum, rs2;
8363 int64_t dist_sum;
8364 int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX;
8365 INTERINTER_COMPOUND_DATA best_compound_data;
8366 int_mv best_mv[2];
8367 int best_tmp_rate_mv = rate_mv;
8368 int tmp_skip_txfm_sb;
8369 int64_t tmp_skip_sse_sb;
8370 DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]);
8371 DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]);
8372 uint8_t *preds0[1] = { pred0 };
8373 uint8_t *preds1[1] = { pred1 };
8374 int strides[1] = { bw };
8375 int tmp_rate_mv;
8376 COMPOUND_TYPE cur_type;
8377 int best_compmode_interinter_cost = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008378
Cheng Chen4a6977a2017-12-27 12:27:39 -08008379 best_mv[0].as_int = cur_mv[0].as_int;
8380 best_mv[1].as_int = cur_mv[1].as_int;
8381 memset(&best_compound_data, 0, sizeof(best_compound_data));
8382 uint8_t tmp_mask_buf[2 * MAX_SB_SQUARE];
8383 best_compound_data.seg_mask = tmp_mask_buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008384
Cheng Chen5a881722017-11-30 17:05:10 -08008385 if (masked_compound_used) {
Cheng Chen4a6977a2017-12-27 12:27:39 -08008386 // get inter predictors to use for masked compound modes
8387 av1_build_inter_predictors_for_planes_single_buf(
8388 xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
8389 av1_build_inter_predictors_for_planes_single_buf(
8390 xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008391 }
Cheng Chen4a6977a2017-12-27 12:27:39 -08008392
8393 for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
8394 if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
8395 if (!is_interinter_compound_used(cur_type, bsize)) continue;
8396 tmp_rate_mv = rate_mv;
8397 best_rd_cur = INT64_MAX;
8398 mbmi->interinter_compound_type = cur_type;
8399#if CONFIG_JNT_COMP
8400 int masked_type_cost = 0;
8401
8402 const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
8403 const int comp_index_ctx = get_comp_index_context(cm, xd);
8404 if (masked_compound_used) {
8405 if (cur_type == COMPOUND_AVERAGE) {
8406 mbmi->comp_group_idx = 0;
8407 mbmi->compound_idx = 1;
8408
8409 masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][0];
8410 masked_type_cost += x->comp_idx_cost[comp_index_ctx][1];
8411 } else {
8412 mbmi->comp_group_idx = 1;
8413 mbmi->compound_idx = 1;
8414
8415 masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][1];
8416 masked_type_cost +=
8417 x->compound_type_cost[bsize]
8418 [mbmi->interinter_compound_type - 1];
8419 }
8420 } else {
8421 mbmi->comp_group_idx = 0;
8422 mbmi->compound_idx = 1;
8423
8424 masked_type_cost += x->comp_idx_cost[comp_index_ctx][1];
8425 }
8426
8427 rs2 = av1_cost_literal(get_interinter_compound_type_bits(
8428 bsize, mbmi->interinter_compound_type)) +
8429 masked_type_cost;
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008430#else
Sarah Parker680b9b12017-08-16 18:55:34 -07008431 int masked_type_cost = 0;
8432 if (masked_compound_used) {
Sarah Parker680b9b12017-08-16 18:55:34 -07008433 if (!is_interinter_compound_used(COMPOUND_WEDGE, bsize))
8434 masked_type_cost += av1_cost_literal(1);
8435 else
Sarah Parker680b9b12017-08-16 18:55:34 -07008436 masked_type_cost +=
Yue Chena4245512017-08-31 11:58:08 -07008437 x->compound_type_cost[bsize][mbmi->interinter_compound_type];
Sarah Parker680b9b12017-08-16 18:55:34 -07008438 }
Sarah Parker6fdc8532016-11-16 17:47:13 -08008439 rs2 = av1_cost_literal(get_interinter_compound_type_bits(
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008440 bsize, mbmi->interinter_compound_type)) +
Sarah Parker680b9b12017-08-16 18:55:34 -07008441 masked_type_cost;
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008442#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -07008443
Cheng Chen4a6977a2017-12-27 12:27:39 -08008444 switch (cur_type) {
8445 case COMPOUND_AVERAGE:
8446 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
8447 bsize);
8448 av1_subtract_plane(x, bsize, 0);
8449 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8450 &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
8451 INT64_MAX);
8452 if (rd != INT64_MAX)
8453 best_rd_cur =
8454 RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
8455 best_rd_compound = best_rd_cur;
8456 break;
8457 case COMPOUND_WEDGE:
8458 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
8459 best_rd_compound / 3 < ref_best_rd) {
8460 best_rd_cur = build_and_cost_compound_type(
8461 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
8462 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
8463 }
8464 break;
8465 case COMPOUND_SEG:
8466 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
8467 best_rd_compound / 3 < ref_best_rd) {
8468 best_rd_cur = build_and_cost_compound_type(
8469 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
8470 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
8471 }
8472 break;
8473 default: assert(0); return 0;
8474 }
8475
8476 if (best_rd_cur < best_rd_compound) {
8477 best_rd_compound = best_rd_cur;
8478 best_compound_data.wedge_index = mbmi->wedge_index;
8479 best_compound_data.wedge_sign = mbmi->wedge_sign;
8480 best_compound_data.mask_type = mbmi->mask_type;
8481 memcpy(best_compound_data.seg_mask, xd->seg_mask,
8482 2 * MAX_SB_SQUARE * sizeof(uint8_t));
8483 best_compound_data.interinter_compound_type =
8484 mbmi->interinter_compound_type;
8485 best_compmode_interinter_cost = rs2;
8486 if (have_newmv_in_inter_mode(this_mode)) {
8487 if (use_masked_motion_search(cur_type)) {
8488 best_tmp_rate_mv = tmp_rate_mv;
8489 best_mv[0].as_int = mbmi->mv[0].as_int;
8490 best_mv[1].as_int = mbmi->mv[1].as_int;
8491 } else {
8492 best_mv[0].as_int = cur_mv[0].as_int;
8493 best_mv[1].as_int = cur_mv[1].as_int;
8494 }
8495 }
8496 }
8497 // reset to original mvs for next iteration
8498 mbmi->mv[0].as_int = cur_mv[0].as_int;
8499 mbmi->mv[1].as_int = cur_mv[1].as_int;
8500 }
8501 mbmi->wedge_index = best_compound_data.wedge_index;
8502 mbmi->wedge_sign = best_compound_data.wedge_sign;
8503 mbmi->mask_type = best_compound_data.mask_type;
8504 memcpy(xd->seg_mask, best_compound_data.seg_mask,
8505 2 * MAX_SB_SQUARE * sizeof(uint8_t));
8506 mbmi->interinter_compound_type =
8507 best_compound_data.interinter_compound_type;
8508 if (have_newmv_in_inter_mode(this_mode)) {
8509 mbmi->mv[0].as_int = best_mv[0].as_int;
8510 mbmi->mv[1].as_int = best_mv[1].as_int;
8511 if (use_masked_motion_search(mbmi->interinter_compound_type)) {
8512 rd_stats->rate += best_tmp_rate_mv - rate_mv;
8513 rate_mv = best_tmp_rate_mv;
8514 }
8515 }
8516
8517 if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008518 restore_dst_buf(xd, orig_dst, num_planes);
Cheng Chen4a6977a2017-12-27 12:27:39 -08008519#if CONFIG_JNT_COMP
8520 early_terminate = INT64_MAX;
8521 continue;
8522#else
8523 return INT64_MAX;
8524#endif // CONFIG_JNT_COMP
8525 }
8526
8527 pred_exists = 0;
8528
8529 compmode_interinter_cost = best_compmode_interinter_cost;
8530 }
8531
Cheng Chen4a6977a2017-12-27 12:27:39 -08008532 if (pred_exists == 0) {
8533 int tmp_rate;
8534 int64_t tmp_dist;
8535 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008536 model_rd_for_sb(cpi, bsize, x, xd, 0, num_planes - 1, &tmp_rate,
Cheng Chen4a6977a2017-12-27 12:27:39 -08008537 &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
8538 rd = RDCOST(x->rdmult, rs + tmp_rate, tmp_dist);
8539 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008540
Cheng Chen4a6977a2017-12-27 12:27:39 -08008541 if (!is_comp_pred)
8542 args->single_filter[this_mode][refs[0]] =
8543 av1_extract_interp_filter(mbmi->interp_filters, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008544
Cheng Chen4a6977a2017-12-27 12:27:39 -08008545 if (args->modelled_rd != NULL) {
8546 if (is_comp_pred) {
8547 const int mode0 = compound_ref0_mode(this_mode);
8548 const int mode1 = compound_ref1_mode(this_mode);
8549 const int64_t mrd = AOMMIN(args->modelled_rd[mode0][refs[0]],
8550 args->modelled_rd[mode1][refs[1]]);
8551 if (rd / 4 * 3 > mrd && ref_best_rd < INT64_MAX) {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008552 restore_dst_buf(xd, orig_dst, num_planes);
Cheng Chen4a6977a2017-12-27 12:27:39 -08008553#if CONFIG_JNT_COMP
8554 early_terminate = INT64_MAX;
8555 continue;
8556#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008557 return INT64_MAX;
Cheng Chen4a6977a2017-12-27 12:27:39 -08008558#endif // CONFIG_JNT_COMP
8559 }
Yue Chenb8aa3992017-12-13 15:27:52 -08008560 } else {
Cheng Chen4a6977a2017-12-27 12:27:39 -08008561 args->modelled_rd[this_mode][refs[0]] = rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008562 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008563 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008564
Cheng Chen4a6977a2017-12-27 12:27:39 -08008565 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
8566 // if current pred_error modeled rd is substantially more than the best
8567 // so far, do not bother doing full rd
8568 if (rd / 2 > ref_best_rd) {
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008569 restore_dst_buf(xd, orig_dst, num_planes);
Cheng Chen4a6977a2017-12-27 12:27:39 -08008570#if CONFIG_JNT_COMP
8571 early_terminate = INT64_MAX;
8572 continue;
8573#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008574 return INT64_MAX;
Cheng Chen4a6977a2017-12-27 12:27:39 -08008575#endif // CONFIG_JNT_COMP
8576 }
8577 }
8578
Cheng Chen4a6977a2017-12-27 12:27:39 -08008579 rd_stats->rate += compmode_interinter_cost;
8580
Yunqing Wang5f4f7382018-01-09 10:33:09 -08008581 ret_val =
8582 motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
8583 disable_skip, mode_mv, mi_row, mi_col, args, ref_best_rd,
Yue Chenc5024212018-01-23 17:28:37 -08008584 refs, rate_mv, &skip_txfm_sb, &skip_sse_sb, &orig_dst);
Cheng Chen4a6977a2017-12-27 12:27:39 -08008585#if CONFIG_JNT_COMP
8586 if (is_comp_pred && ret_val != INT64_MAX) {
8587 int64_t tmp_rd;
8588 const int skip_ctx = av1_get_skip_context(xd);
8589 if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) <
8590 RDCOST(x->rdmult, 0, rd_stats->sse))
8591 tmp_rd = RDCOST(x->rdmult, rd_stats->rate + x->skip_cost[skip_ctx][0],
8592 rd_stats->dist);
8593 else
8594 tmp_rd = RDCOST(x->rdmult,
8595 rd_stats->rate + x->skip_cost[skip_ctx][1] -
8596 rd_stats_y->rate - rd_stats_uv->rate,
8597 rd_stats->sse);
8598
8599 if (tmp_rd < best_rd) {
8600 best_rd_stats = *rd_stats;
8601 best_rd_stats_y = *rd_stats_y;
8602 best_rd_stats_uv = *rd_stats_uv;
8603 best_compound_idx = mbmi->compound_idx;
8604 best_ret_val = ret_val;
8605 best_rd = tmp_rd;
8606 best_mbmi = *mbmi;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008607 for (i = 0; i < num_planes; ++i)
Cheng Chen4a6977a2017-12-27 12:27:39 -08008608 memcpy(best_blk_skip[i], x->blk_skip[i],
8609 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
8610 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008611 }
8612 }
Cheng Chen4a6977a2017-12-27 12:27:39 -08008613 // re-instate status of the best choice
8614 if (is_comp_pred && best_ret_val != INT64_MAX) {
8615 *rd_stats = best_rd_stats;
8616 *rd_stats_y = best_rd_stats_y;
8617 *rd_stats_uv = best_rd_stats_uv;
8618 mbmi->compound_idx = best_compound_idx;
8619 ret_val = best_ret_val;
8620 *mbmi = best_mbmi;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008621 for (i = 0; i < num_planes; ++i)
Cheng Chen4a6977a2017-12-27 12:27:39 -08008622 memcpy(x->blk_skip[i], best_blk_skip[i],
8623 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
8624 }
8625 if (early_terminate == INT64_MAX) return INT64_MAX;
8626#endif // CONFIG_JNT_COMP
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008627 if (ret_val != 0) return ret_val;
Angie Chiang76159122016-11-09 12:13:22 -08008628
Yaowu Xuc27fc142016-08-22 16:08:15 -07008629 return 0; // The rate-distortion cost will be re-calculated by caller.
8630}
8631
Alex Converse28744302017-04-13 14:46:22 -07008632#if CONFIG_INTRABC
Alex Converse28744302017-04-13 14:46:22 -07008633static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
Angie Chiang2a2a7dd2017-04-25 16:08:47 -07008634 RD_STATS *rd_cost, BLOCK_SIZE bsize,
Alex Converse28744302017-04-13 14:46:22 -07008635 int64_t best_rd) {
Alex Converse3d8adf32017-04-24 12:35:42 -07008636 const AV1_COMMON *const cm = &cpi->common;
Hui Sueb2fd5c2017-12-15 14:38:01 -08008637 if (!av1_allow_intrabc(cm)) return INT64_MAX;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008638 const int num_planes = av1_num_planes(cm);
Alex Converse3d8adf32017-04-24 12:35:42 -07008639
Alex Converse28744302017-04-13 14:46:22 -07008640 MACROBLOCKD *const xd = &x->e_mbd;
Alex Converse3d8adf32017-04-24 12:35:42 -07008641 const TileInfo *tile = &xd->tile;
8642 MODE_INFO *const mi = xd->mi[0];
8643 const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
8644 const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
8645 const int w = block_size_wide[bsize];
8646 const int h = block_size_high[bsize];
Hui Su8de99a62017-12-01 12:04:56 -08008647 const int sb_row = mi_row >> cm->mib_size_log2;
8648 const int sb_col = mi_col >> cm->mib_size_log2;
Alex Converse3d8adf32017-04-24 12:35:42 -07008649
Alex Converse44c2bad2017-05-11 09:36:10 -07008650 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8651 MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
8652 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
8653 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
8654 mbmi_ext->ref_mv_stack[ref_frame],
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02008655 mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
8656 NULL, NULL, mbmi_ext->mode_context);
Alex Converse44c2bad2017-05-11 09:36:10 -07008657
8658 int_mv nearestmv, nearmv;
RogerZhou10a03802017-10-26 11:49:48 -07008659#if CONFIG_AMVR
8660 av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv, 0);
8661#else
Alex Converse44c2bad2017-05-11 09:36:10 -07008662 av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv);
RogerZhou10a03802017-10-26 11:49:48 -07008663#endif
Alex Converse44c2bad2017-05-11 09:36:10 -07008664
8665 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
Hui Su2d46afc2017-12-12 10:52:06 -08008666 if (dv_ref.as_int == 0)
8667 av1_find_ref_dv(&dv_ref, tile, cm->mib_size, mi_row, mi_col);
Hui Su1e6bf6b2017-11-02 13:00:29 -07008668 // Ref DV should not have sub-pel.
8669 assert((dv_ref.as_mv.col & 7) == 0);
8670 assert((dv_ref.as_mv.row & 7) == 0);
Alex Converse44c2bad2017-05-11 09:36:10 -07008671 mbmi_ext->ref_mvs[INTRA_FRAME][0] = dv_ref;
Alex Converse3d8adf32017-04-24 12:35:42 -07008672
Alex Converse3d8adf32017-04-24 12:35:42 -07008673 struct buf_2d yv12_mb[MAX_MB_PLANE];
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008674 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL,
8675 num_planes);
8676 for (int i = 0; i < num_planes; ++i) {
Alex Converse3d8adf32017-04-24 12:35:42 -07008677 xd->plane[i].pre[0] = yv12_mb[i];
8678 }
8679
Alex Converse861d7072017-05-15 14:19:53 -07008680 enum IntrabcMotionDirection {
8681 IBC_MOTION_ABOVE,
8682 IBC_MOTION_LEFT,
8683 IBC_MOTION_DIRECTIONS
8684 };
Alex Converse3d8adf32017-04-24 12:35:42 -07008685
Alex Converse3d8adf32017-04-24 12:35:42 -07008686 MB_MODE_INFO *mbmi = &mi->mbmi;
8687 MB_MODE_INFO best_mbmi = *mbmi;
8688 RD_STATS best_rdcost = *rd_cost;
8689 int best_skip = x->skip;
Alex Converse861d7072017-05-15 14:19:53 -07008690
Hui Su29bd5892018-01-09 11:07:42 -08008691 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8] = { 0 };
Alex Converse861d7072017-05-15 14:19:53 -07008692 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
8693 dir < IBC_MOTION_DIRECTIONS; ++dir) {
8694 const MvLimits tmp_mv_limits = x->mv_limits;
8695 switch (dir) {
8696 case IBC_MOTION_ABOVE:
8697 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
8698 x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
8699 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
Hui Su8de99a62017-12-01 12:04:56 -08008700 x->mv_limits.row_max = (sb_row * cm->mib_size - mi_row) * MI_SIZE - h;
Alex Converse861d7072017-05-15 14:19:53 -07008701 break;
8702 case IBC_MOTION_LEFT:
8703 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
Hui Su8de99a62017-12-01 12:04:56 -08008704 x->mv_limits.col_max = (sb_col * cm->mib_size - mi_col) * MI_SIZE - w;
Alex Converse861d7072017-05-15 14:19:53 -07008705 // TODO(aconverse@google.com): Minimize the overlap between above and
8706 // left areas.
8707 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
8708 int bottom_coded_mi_edge =
Hui Su8de99a62017-12-01 12:04:56 -08008709 AOMMIN((sb_row + 1) * cm->mib_size, tile->mi_row_end);
Alex Converse861d7072017-05-15 14:19:53 -07008710 x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
8711 break;
8712 default: assert(0);
8713 }
8714 assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
8715 assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
8716 assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
8717 assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
8718 av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
8719
8720 if (x->mv_limits.col_max < x->mv_limits.col_min ||
8721 x->mv_limits.row_max < x->mv_limits.row_min) {
8722 x->mv_limits = tmp_mv_limits;
8723 continue;
8724 }
8725
8726 int step_param = cpi->mv_step_param;
8727 MV mvp_full = dv_ref.as_mv;
8728 mvp_full.col >>= 3;
8729 mvp_full.row >>= 3;
8730 int sadpb = x->sadperbit16;
8731 int cost_list[5];
RogerZhoucc5d35d2017-08-07 22:20:15 -07008732#if CONFIG_HASH_ME
RogerZhoud15e7c12017-09-26 08:49:28 -07008733 int bestsme = av1_full_pixel_search(
8734 cpi, x, bsize, &mvp_full, step_param, sadpb,
8735 cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1,
8736 (MI_SIZE * mi_col), (MI_SIZE * mi_row), 1);
RogerZhoucc5d35d2017-08-07 22:20:15 -07008737#else
Alex Converse861d7072017-05-15 14:19:53 -07008738 int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
8739 sadpb, cond_cost_list(cpi, cost_list),
8740 &dv_ref.as_mv, INT_MAX, 1);
RogerZhoucc5d35d2017-08-07 22:20:15 -07008741#endif
Alex Converse861d7072017-05-15 14:19:53 -07008742
8743 x->mv_limits = tmp_mv_limits;
8744 if (bestsme == INT_MAX) continue;
8745 mvp_full = x->best_mv.as_mv;
8746 MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 };
8747 if (mv_check_bounds(&x->mv_limits, &dv)) continue;
Hui Su64463e72017-11-06 12:36:00 -08008748 if (!av1_is_dv_valid(dv, tile, mi_row, mi_col, bsize, cm->mib_size_log2))
8749 continue;
Alex Converse861d7072017-05-15 14:19:53 -07008750
Hui Su1e6bf6b2017-11-02 13:00:29 -07008751 // DV should not have sub-pel.
8752 assert((dv.col & 7) == 0);
8753 assert((dv.row & 7) == 0);
Alex Converse861d7072017-05-15 14:19:53 -07008754 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
Alex Converse861d7072017-05-15 14:19:53 -07008755 mbmi->use_intrabc = 1;
8756 mbmi->mode = DC_PRED;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04008757 mbmi->uv_mode = UV_DC_PRED;
Alex Converse861d7072017-05-15 14:19:53 -07008758 mbmi->mv[0].as_mv = dv;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01008759 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
Alex Converse861d7072017-05-15 14:19:53 -07008760 mbmi->skip = 0;
8761 x->skip = 0;
8762 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
Alex Converse3d8adf32017-04-24 12:35:42 -07008763
Hui Sudfcbfbd2017-11-13 12:05:30 -08008764 int *dvcost[2] = { (int *)&cpi->dv_cost[0][MV_MAX],
8765 (int *)&cpi->dv_cost[1][MV_MAX] };
Alex Conversed5d9b6c2017-05-23 15:23:45 -07008766 // TODO(aconverse@google.com): The full motion field defining discount
8767 // in MV_COST_WEIGHT is too large. Explore other values.
Hui Sudfcbfbd2017-11-13 12:05:30 -08008768 int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, cpi->dv_joint_cost,
8769 dvcost, MV_COST_WEIGHT_SUB);
Hui Su6c8584f2017-09-14 15:37:02 -07008770 const int rate_mode = x->intrabc_cost[1];
Alex Converse861d7072017-05-15 14:19:53 -07008771 RD_STATS rd_stats, rd_stats_uv;
8772 av1_subtract_plane(x, bsize, 0);
Hui Su12546aa2017-10-13 16:10:01 -07008773 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07008774 // Intrabc
Yue Chen25dc0702017-10-18 23:36:06 -07008775 select_tx_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, INT64_MAX);
Hui Su12546aa2017-10-13 16:10:01 -07008776 } else {
8777 int idx, idy;
8778 super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
8779 for (idy = 0; idy < xd->n8_h; ++idy)
8780 for (idx = 0; idx < xd->n8_w; ++idx)
8781 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
8782 memset(x->blk_skip[0], rd_stats.skip,
8783 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
8784 }
Hui Su29bd5892018-01-09 11:07:42 -08008785
Alex Converse861d7072017-05-15 14:19:53 -07008786 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
8787 av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
Alex Converse3d8adf32017-04-24 12:35:42 -07008788#if CONFIG_RD_DEBUG
Alex Converse861d7072017-05-15 14:19:53 -07008789 mbmi->rd_stats = rd_stats;
Alex Converse3d8adf32017-04-24 12:35:42 -07008790#endif
8791
Zoe Liu1eed2df2017-10-16 17:13:15 -07008792 const int skip_ctx = av1_get_skip_context(xd);
Alex Converse3d8adf32017-04-24 12:35:42 -07008793
Alex Converse861d7072017-05-15 14:19:53 -07008794 RD_STATS rdc_noskip;
8795 av1_init_rd_stats(&rdc_noskip);
8796 rdc_noskip.rate =
Zoe Liu1eed2df2017-10-16 17:13:15 -07008797 rate_mode + rate_mv + rd_stats.rate + x->skip_cost[skip_ctx][0];
Alex Converse861d7072017-05-15 14:19:53 -07008798 rdc_noskip.dist = rd_stats.dist;
Urvang Joshi70006e42017-06-14 16:08:55 -07008799 rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist);
Alex Converse861d7072017-05-15 14:19:53 -07008800 if (rdc_noskip.rdcost < best_rd) {
8801 best_rd = rdc_noskip.rdcost;
8802 best_mbmi = *mbmi;
8803 best_skip = x->skip;
8804 best_rdcost = rdc_noskip;
Hui Su29bd5892018-01-09 11:07:42 -08008805 memcpy(best_blk_skip, x->blk_skip[0],
8806 sizeof(x->blk_skip[0][0]) * xd->n8_h * xd->n8_w * 4);
Alex Converse861d7072017-05-15 14:19:53 -07008807 }
Alex Converse3d8adf32017-04-24 12:35:42 -07008808
Hui Su29bd5892018-01-09 11:07:42 -08008809 if (!xd->lossless[mbmi->segment_id]) {
8810 x->skip = 1;
8811 mbmi->skip = 1;
8812 RD_STATS rdc_skip;
8813 av1_init_rd_stats(&rdc_skip);
8814 rdc_skip.rate = rate_mode + rate_mv + x->skip_cost[skip_ctx][1];
8815 rdc_skip.dist = rd_stats.sse;
8816 rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist);
8817 if (rdc_skip.rdcost < best_rd) {
8818 best_rd = rdc_skip.rdcost;
8819 best_mbmi = *mbmi;
8820 best_skip = x->skip;
8821 best_rdcost = rdc_skip;
8822 memcpy(best_blk_skip, x->blk_skip[0],
8823 sizeof(x->blk_skip[0][0]) * xd->n8_h * xd->n8_w * 4);
8824 }
Alex Converse861d7072017-05-15 14:19:53 -07008825 }
Alex Converse3d8adf32017-04-24 12:35:42 -07008826 }
8827 *mbmi = best_mbmi;
8828 *rd_cost = best_rdcost;
8829 x->skip = best_skip;
Hui Su29bd5892018-01-09 11:07:42 -08008830 memcpy(x->blk_skip[0], best_blk_skip,
8831 sizeof(x->blk_skip[0][0]) * xd->n8_h * xd->n8_w * 4);
Alex Converse3d8adf32017-04-24 12:35:42 -07008832 return best_rd;
Alex Converse28744302017-04-13 14:46:22 -07008833}
8834#endif // CONFIG_INTRABC
8835
Jingning Han159f72c2018-01-05 09:17:23 -08008836void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
8837 int mi_col, RD_STATS *rd_cost, BLOCK_SIZE bsize,
Urvang Joshi52648442016-10-13 17:27:51 -07008838 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
8839 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008840 MACROBLOCKD *const xd = &x->e_mbd;
Luc Trudeau14fc5042017-06-16 12:40:29 -04008841 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008842 const int num_planes = av1_num_planes(cm);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008843 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
8844 int y_skip = 0, uv_skip = 0;
8845 int64_t dist_y = 0, dist_uv = 0;
8846 TX_SIZE max_uv_tx_size;
Jingning Han271bb2c2016-12-14 12:34:46 -08008847
Zoe Liu1eed2df2017-10-16 17:13:15 -07008848 (void)cm;
Jingning Han159f72c2018-01-05 09:17:23 -08008849 (void)mi_row;
8850 (void)mi_col;
Zoe Liu1eed2df2017-10-16 17:13:15 -07008851
Yaowu Xuc27fc142016-08-22 16:08:15 -07008852 ctx->skip = 0;
Luc Trudeau14fc5042017-06-16 12:40:29 -04008853 mbmi->ref_frame[0] = INTRA_FRAME;
8854 mbmi->ref_frame[1] = NONE_FRAME;
Alex Converse28744302017-04-13 14:46:22 -07008855#if CONFIG_INTRABC
Luc Trudeau14fc5042017-06-16 12:40:29 -04008856 mbmi->use_intrabc = 0;
8857 mbmi->mv[0].as_int = 0;
Alex Converse28744302017-04-13 14:46:22 -07008858#endif // CONFIG_INTRABC
Yaowu Xuc27fc142016-08-22 16:08:15 -07008859
Jingning Hanc5c37032018-01-04 16:43:43 -08008860 const int64_t intra_yrd =
8861 rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
8862 &y_skip, bsize, best_rd, ctx);
Alex Conversed1b6fad2017-04-26 15:39:37 -07008863
8864 if (intra_yrd < best_rd) {
Luc Trudeau14fc5042017-06-16 12:40:29 -04008865#if CONFIG_CFL
Luc Trudeaub05eeae2017-08-18 15:14:30 -04008866 // Only store reconstructed luma when there's chroma RDO. When there's no
8867 // chroma RDO, the reconstructed luma will be stored in encode_superblock().
Luc Trudeau1e84af52017-11-25 15:00:28 -05008868 xd->cfl.store_y = !x->skip_chroma_rd;
8869 if (xd->cfl.store_y) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04008870 // Perform one extra call to txfm_rd_in_plane(), with the values chosen
8871 // during luma RDO, so we can store reconstructed luma values
Jingning Han159f72c2018-01-05 09:17:23 -08008872 memcpy(x->blk_skip[0], ctx->blk_skip[0],
8873 sizeof(uint8_t) * ctx->num_4x4_blk);
Ryan5b8c5fd2018-01-11 10:53:45 -08008874 av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, x->optimize,
8875 mi_row, mi_col);
Luc Trudeau1e84af52017-11-25 15:00:28 -05008876 xd->cfl.store_y = 0;
Luc Trudeau32306c22017-08-14 14:44:26 -04008877 }
Luc Trudeau6e1cd782017-06-21 13:52:36 -04008878#endif // CONFIG_CFL
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00008879 if (num_planes > 1) {
8880 max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
8881 init_sbuv_mode(mbmi);
8882 if (!x->skip_chroma_rd)
8883 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
8884 &uv_skip, bsize, max_uv_tx_size);
8885 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008886
Rupert Swarbrickc6cc1f52017-10-04 11:52:12 +01008887 if (y_skip && (uv_skip || x->skip_chroma_rd)) {
Alex Conversed1b6fad2017-04-26 15:39:37 -07008888 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
Zoe Liu1eed2df2017-10-16 17:13:15 -07008889 x->skip_cost[av1_get_skip_context(xd)][1];
Alex Conversed1b6fad2017-04-26 15:39:37 -07008890 rd_cost->dist = dist_y + dist_uv;
8891 } else {
8892 rd_cost->rate =
Zoe Liu1eed2df2017-10-16 17:13:15 -07008893 rate_y + rate_uv + x->skip_cost[av1_get_skip_context(xd)][0];
Alex Conversed1b6fad2017-04-26 15:39:37 -07008894 rd_cost->dist = dist_y + dist_uv;
8895 }
Urvang Joshi70006e42017-06-14 16:08:55 -07008896 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008897 } else {
Alex Conversed1b6fad2017-04-26 15:39:37 -07008898 rd_cost->rate = INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008899 }
8900
Alex Converse28744302017-04-13 14:46:22 -07008901#if CONFIG_INTRABC
Alex Conversed1b6fad2017-04-26 15:39:37 -07008902 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
8903 best_rd = rd_cost->rdcost;
Alex Converse28744302017-04-13 14:46:22 -07008904 if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
8905 ctx->skip = x->skip; // FIXME where is the proper place to set this?!
Hui Su29bd5892018-01-09 11:07:42 -08008906 memcpy(ctx->blk_skip[0], x->blk_skip[0],
8907 sizeof(x->blk_skip[0][0]) * ctx->num_4x4_blk);
Alex Conversed1b6fad2017-04-26 15:39:37 -07008908 assert(rd_cost->rate != INT_MAX);
Alex Converse28744302017-04-13 14:46:22 -07008909 }
8910#endif
Alex Conversed1b6fad2017-04-26 15:39:37 -07008911 if (rd_cost->rate == INT_MAX) return;
Alex Converse28744302017-04-13 14:46:22 -07008912
Yaowu Xuc27fc142016-08-22 16:08:15 -07008913 ctx->mic = *xd->mi[0];
8914 ctx->mbmi_ext = *x->mbmi_ext;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008915}
8916
Yaowu Xuc27fc142016-08-22 16:08:15 -07008917// Do we have an internal image edge (e.g. formatting bars).
Urvang Joshi52648442016-10-13 17:27:51 -07008918int av1_internal_image_edge(const AV1_COMP *cpi) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008919 return (cpi->oxcf.pass == 2) &&
8920 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
8921 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
8922}
8923
8924// Checks to see if a super block is on a horizontal image edge.
8925// In most cases this is the "real" edge unless there are formatting
8926// bars embedded in the stream.
Urvang Joshi52648442016-10-13 17:27:51 -07008927int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008928 int top_edge = 0;
8929 int bottom_edge = cpi->common.mi_rows;
8930 int is_active_h_edge = 0;
8931
8932 // For two pass account for any formatting bars detected.
8933 if (cpi->oxcf.pass == 2) {
Urvang Joshi52648442016-10-13 17:27:51 -07008934 const TWO_PASS *const twopass = &cpi->twopass;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008935
8936 // The inactive region is specified in MBs not mi units.
8937 // The image edge is in the following MB row.
8938 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
8939
8940 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
Yaowu Xuf883b422016-08-30 14:01:10 -07008941 bottom_edge = AOMMAX(top_edge, bottom_edge);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008942 }
8943
8944 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
8945 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
8946 is_active_h_edge = 1;
8947 }
8948 return is_active_h_edge;
8949}
8950
8951// Checks to see if a super block is on a vertical image edge.
8952// In most cases this is the "real" edge unless there are formatting
8953// bars embedded in the stream.
Urvang Joshi52648442016-10-13 17:27:51 -07008954int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008955 int left_edge = 0;
8956 int right_edge = cpi->common.mi_cols;
8957 int is_active_v_edge = 0;
8958
8959 // For two pass account for any formatting bars detected.
8960 if (cpi->oxcf.pass == 2) {
Urvang Joshi52648442016-10-13 17:27:51 -07008961 const TWO_PASS *const twopass = &cpi->twopass;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008962
8963 // The inactive region is specified in MBs not mi units.
8964 // The image edge is in the following MB row.
8965 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
8966
8967 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
Yaowu Xuf883b422016-08-30 14:01:10 -07008968 right_edge = AOMMAX(left_edge, right_edge);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008969 }
8970
8971 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
8972 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
8973 is_active_v_edge = 1;
8974 }
8975 return is_active_v_edge;
8976}
8977
8978// Checks to see if a super block is at the edge of the active image.
8979// In most cases this is the "real" edge unless there are formatting
8980// bars embedded in the stream.
Urvang Joshi52648442016-10-13 17:27:51 -07008981int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
Yaowu Xuf883b422016-08-30 14:01:10 -07008982 return av1_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
8983 av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008984}
8985
Urvang Joshi52648442016-10-13 17:27:51 -07008986static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008987 MACROBLOCKD *const xd = &x->e_mbd;
8988 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8989 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
8990 const BLOCK_SIZE bsize = mbmi->sb_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008991 int src_stride = x->plane[1].src.stride;
8992 const uint8_t *const src_u = x->plane[1].src.buf;
8993 const uint8_t *const src_v = x->plane[2].src.buf;
Hui Su5891f982017-12-18 16:18:23 -08008994 int *const data = x->palette_buffer->kmeans_data_buf;
8995 int centroids[2 * PALETTE_MAX_SIZE];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008996 uint8_t *const color_map = xd->plane[1].color_index_map;
8997 int r, c;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008998 const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
8999 const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
Urvang Joshi56ba91b2017-01-10 13:22:09 -08009000 int plane_block_width, plane_block_height, rows, cols;
9001 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
9002 &plane_block_height, &rows, &cols);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009003 (void)cpi;
9004
9005 for (r = 0; r < rows; ++r) {
9006 for (c = 0; c < cols; ++c) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009007 if (cpi->common.use_highbitdepth) {
9008 data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
9009 data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
9010 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009011 data[(r * cols + c) * 2] = src_u[r * src_stride + c];
9012 data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009013 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009014 }
9015 }
9016
9017 for (r = 1; r < 3; ++r) {
9018 for (c = 0; c < pmi->palette_size[1]; ++c) {
9019 centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
9020 }
9021 }
9022
Yaowu Xuf883b422016-08-30 14:01:10 -07009023 av1_calc_indices(data, centroids, color_map, rows * cols,
9024 pmi->palette_size[1], 2);
Urvang Joshi56ba91b2017-01-10 13:22:09 -08009025 extend_palette_color_map(color_map, cols, rows, plane_block_width,
9026 plane_block_height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009027}
9028
Yaowu Xuf883b422016-08-30 14:01:10 -07009029static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
9030 const MACROBLOCKD *xd, int mi_row,
9031 int mi_col, const uint8_t *above,
9032 int above_stride, const uint8_t *left,
Yue Chene9638cc2016-10-10 12:37:54 -07009033 int left_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009034
Zoe Liuf40a9572017-10-13 12:37:19 -07009035#if CONFIG_EXT_SKIP
9036static void estimate_skip_mode_rdcost(
9037 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
9038 BLOCK_SIZE bsize, int mi_row, int mi_col,
9039 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME],
9040 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
9041 const AV1_COMMON *const cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009042 const int num_planes = av1_num_planes(cm);
Zoe Liuf40a9572017-10-13 12:37:19 -07009043 MACROBLOCKD *const xd = &x->e_mbd;
9044 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
9045 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
9046
9047 int *mode_map = tile_data->mode_map[bsize];
9048 static const int flag_list[TOTAL_REFS_PER_FRAME] = { 0,
9049 AOM_LAST_FLAG,
9050 AOM_LAST2_FLAG,
9051 AOM_LAST3_FLAG,
9052 AOM_GOLD_FLAG,
9053 AOM_BWD_FLAG,
9054 AOM_ALT2_FLAG,
9055 AOM_ALT_FLAG };
9056 int i;
9057
9058 for (int midx = 0; midx < MAX_MODES; ++midx) {
9059 const int mode_index = mode_map[midx];
9060 x->skip_mode_index_candidate = mode_index;
9061
9062 const MV_REFERENCE_FRAME ref_frame =
9063 av1_mode_order[mode_index].ref_frame[0];
9064 const MV_REFERENCE_FRAME second_ref_frame =
9065 av1_mode_order[mode_index].ref_frame[1];
9066 const int comp_pred = second_ref_frame > INTRA_FRAME;
9067
9068 if (!comp_pred) continue;
9069
9070 const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
9071
9072 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
9073 if (comp_pred && !(cpi->ref_frame_flags & flag_list[second_ref_frame]))
9074 continue;
9075 // Check whether current refs/mode align with skip_mode
9076 if (!(ref_frame == (LAST_FRAME + cm->ref_frame_idx_0) &&
9077 second_ref_frame == (LAST_FRAME + cm->ref_frame_idx_1) &&
9078 this_mode == NEAREST_NEARESTMV)) {
9079 continue;
9080 }
9081
9082 frame_mv[this_mode][ref_frame].as_int =
9083 frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
9084 frame_mv[this_mode][second_ref_frame].as_int =
9085 frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
9086
9087 if (frame_mv[this_mode][ref_frame].as_int == INVALID_MV ||
9088 frame_mv[this_mode][second_ref_frame].as_int == INVALID_MV)
9089 break;
9090
Zoe Liuf40a9572017-10-13 12:37:19 -07009091 mbmi->mode = this_mode;
9092 mbmi->uv_mode = UV_DC_PRED;
9093 mbmi->ref_frame[0] = ref_frame;
9094 mbmi->ref_frame[1] = second_ref_frame;
9095
9096 // Obtain NEAREST_NEARESTMV.
9097 {
9098 for (i = 0; i < 2; ++i) {
9099 int_mv cur_mv = frame_mv[mbmi->mode][mbmi->ref_frame[i]];
9100 clamp_mv2(&cur_mv.as_mv, xd);
9101 if (mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
9102 x->skip_mode_rdcost = INT64_MAX;
9103 break;
9104 }
9105 mbmi->mv[i].as_int = cur_mv.as_int;
9106 }
9107 if (x->skip_mode_rdcost == INT64_MAX) break;
9108
9109 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
9110 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
9111 for (i = 0; i < 2; ++i) {
9112 int_mv cur_mv =
9113 (i == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
9114 : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9115 clamp_mv2(&cur_mv.as_mv, xd);
9116 if (mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
9117 x->skip_mode_rdcost = INT64_MAX;
9118 break;
9119 }
9120 mbmi->mv[i].as_int = cur_mv.as_int;
9121 }
9122 if (x->skip_mode_rdcost == INT64_MAX) break;
9123 }
9124 }
9125
9126#if CONFIG_FILTER_INTRA
Yue Chen994dba22017-12-19 15:27:26 -08009127 mbmi->filter_intra_mode_info.use_filter_intra = 0;
Zoe Liuf40a9572017-10-13 12:37:19 -07009128#endif // CONFIG_FILTER_INTRA
9129 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
Zoe Liu50c810d2017-12-05 16:09:37 -08009130#if CONFIG_JNT_COMP
9131 mbmi->comp_group_idx = 0;
Zoe Liu104d62e2017-12-07 12:44:45 -08009132 mbmi->compound_idx = x->compound_idx;
Zoe Liu50c810d2017-12-05 16:09:37 -08009133#endif // CONFIG_JNT_COMP
Zoe Liuf40a9572017-10-13 12:37:19 -07009134 mbmi->interinter_compound_type = COMPOUND_AVERAGE;
9135 mbmi->motion_mode = SIMPLE_TRANSLATION;
9136 mbmi->ref_mv_idx = 0;
9137 mbmi->skip_mode = mbmi->skip = 1;
9138
Zoe Liu8a5d3432017-11-30 16:33:44 -08009139 set_default_interp_filters(mbmi, cm->interp_filter);
9140
Zoe Liuf40a9572017-10-13 12:37:19 -07009141 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009142 for (i = 0; i < num_planes; i++) {
Zoe Liuf40a9572017-10-13 12:37:19 -07009143 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
9144 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
9145 }
9146
9147 BUFFER_SET orig_dst;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009148 for (i = 0; i < num_planes; i++) {
Zoe Liuf40a9572017-10-13 12:37:19 -07009149 orig_dst.plane[i] = xd->plane[i].dst.buf;
9150 orig_dst.stride[i] = xd->plane[i].dst.stride;
9151 }
9152
9153 // Obtain the rdcost for skip_mode.
9154 skip_mode_rd(cpi, x, bsize, mi_row, mi_col, &orig_dst);
9155 break;
9156 }
9157}
9158#endif // CONFIG_EXT_SKIP
9159
Urvang Joshi52648442016-10-13 17:27:51 -07009160void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
Yaowu Xuf883b422016-08-30 14:01:10 -07009161 MACROBLOCK *x, int mi_row, int mi_col,
Sebastien Alaiwan0cf54d42017-10-16 16:10:04 +02009162 RD_STATS *rd_cost, BLOCK_SIZE bsize,
9163 PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) {
Urvang Joshi52648442016-10-13 17:27:51 -07009164 const AV1_COMMON *const cm = &cpi->common;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009165 const int num_planes = av1_num_planes(cm);
Urvang Joshi52648442016-10-13 17:27:51 -07009166 const RD_OPT *const rd_opt = &cpi->rd;
9167 const SPEED_FEATURES *const sf = &cpi->sf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009168 MACROBLOCKD *const xd = &x->e_mbd;
9169 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Hui Sue87fb232017-10-05 15:00:15 -07009170 const int try_palette =
9171 av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009172 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
9173 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
9174 const struct segmentation *const seg = &cm->seg;
9175 PREDICTION_MODE this_mode;
9176 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
9177 unsigned char segment_id = mbmi->segment_id;
9178 int comp_pred, i, k;
9179 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
9180 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
Yunqing Wang5f4f7382018-01-09 10:33:09 -08009181 // Save a set of single_newmv for each checked ref_mv.
9182 int_mv single_newmv[MAX_REF_MV_SERCH][TOTAL_REFS_PER_FRAME] = { { { 0 } } };
9183 int single_newmv_rate[MAX_REF_MV_SERCH][TOTAL_REFS_PER_FRAME] = { { 0 } };
9184 int single_newmv_valid[MAX_REF_MV_SERCH][TOTAL_REFS_PER_FRAME] = { { 0 } };
Zoe Liu7f24e1b2017-03-17 17:42:05 -07009185 int64_t modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02009186 static const int flag_list[TOTAL_REFS_PER_FRAME] = { 0,
9187 AOM_LAST_FLAG,
9188 AOM_LAST2_FLAG,
9189 AOM_LAST3_FLAG,
9190 AOM_GOLD_FLAG,
9191 AOM_BWD_FLAG,
9192 AOM_ALT2_FLAG,
9193 AOM_ALT_FLAG };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009194 int64_t best_rd = best_rd_so_far;
9195 int best_rate_y = INT_MAX, best_rate_uv = INT_MAX;
9196 int64_t best_pred_diff[REFERENCE_MODES];
9197 int64_t best_pred_rd[REFERENCE_MODES];
9198 MB_MODE_INFO best_mbmode;
Zoe Liu1eed2df2017-10-16 17:13:15 -07009199 const int skip_ctx = av1_get_skip_context(xd);
9200 int rate_skip0 = x->skip_cost[skip_ctx][0];
9201 int rate_skip1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009202 int best_mode_skippable = 0;
9203 int midx, best_mode_index = -1;
9204 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -07009205#if CONFIG_EXT_COMP_REFS
9206 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
9207#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07009208 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -07009209#endif // CONFIG_EXT_COMP_REFS
Hui Su9d0c03d2017-12-27 16:05:23 -08009210 int *comp_inter_cost =
9211 x->comp_inter_cost[av1_get_reference_mode_context(cm, xd)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009212 int64_t best_intra_rd = INT64_MAX;
9213 unsigned int best_pred_sse = UINT_MAX;
9214 PREDICTION_MODE best_intra_mode = DC_PRED;
Urvang Joshifeb925f2016-12-05 10:37:29 -08009215 int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
9216 int64_t dist_uvs[TX_SIZES_ALL];
9217 int skip_uvs[TX_SIZES_ALL];
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04009218 UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL];
Urvang Joshifeb925f2016-12-05 10:37:29 -08009219 PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
Urvang Joshifeb925f2016-12-05 10:37:29 -08009220 int8_t uv_angle_delta[TX_SIZES_ALL];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009221 int is_directional_mode, angle_stats_ready = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009222 uint8_t directional_mode_skip_mask[INTRA_MODES];
Yaowu Xuf883b422016-08-30 14:01:10 -07009223 const int intra_cost_penalty = av1_get_intra_cost_penalty(
Yaowu Xuc27fc142016-08-22 16:08:15 -07009224 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
Yue Chenb23d00a2017-07-28 17:01:21 -07009225 const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009226 int best_skip2 = 0;
Zoe Liu97ad0582017-02-09 10:51:00 -08009227 uint16_t ref_frame_skip_mask[2] = { 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009228 uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009229 int mode_skip_start = sf->mode_skip_start + 1;
9230 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
9231 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
9232 int64_t mode_threshold[MAX_MODES];
9233 int *mode_map = tile_data->mode_map[bsize];
9234 const int mode_search_skip_flags = sf->mode_search_skip_flags;
Yushin Cho77bba8d2016-11-04 16:36:56 -07009235
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009236 HandleInterModeArgs args = {
Yunqing Wang5f4f7382018-01-09 10:33:09 -08009237 { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
9238 { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
9239 NULL, NULL,
9240 NULL, NULL,
9241 { { 0 } },
Fergus Simpson073c6f32017-02-17 12:13:48 -08009242 };
9243
Jingning Hanae5cfde2016-11-30 12:01:44 -08009244 const int rows = block_size_high[bsize];
9245 const int cols = block_size_wide[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009246 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
9247 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
9248 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
9249 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009250
Cheng Chenc683bf92017-12-13 09:21:40 -08009251 int64_t dist_refs[TOTAL_REFS_PER_FRAME];
9252 int dist_order_refs[TOTAL_REFS_PER_FRAME];
9253 int num_available_refs = 0;
9254 memset(dist_refs, -1, sizeof(dist_refs));
9255 memset(dist_order_refs, -1, sizeof(dist_order_refs));
9256
Yaowu Xuc27fc142016-08-22 16:08:15 -07009257 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
9258 int len = sizeof(uint16_t);
Jingning Hand064cf02017-06-01 10:00:39 -07009259 args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
9260 args.above_pred_buf[1] =
9261 CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009262 args.above_pred_buf[2] =
Jingning Hand064cf02017-06-01 10:00:39 -07009263 CONVERT_TO_BYTEPTR(x->above_pred_buf + 2 * MAX_SB_SQUARE * len);
9264 args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
9265 args.left_pred_buf[1] =
9266 CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009267 args.left_pred_buf[2] =
Jingning Hand064cf02017-06-01 10:00:39 -07009268 CONVERT_TO_BYTEPTR(x->left_pred_buf + 2 * MAX_SB_SQUARE * len);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009269 } else {
Jingning Hand064cf02017-06-01 10:00:39 -07009270 args.above_pred_buf[0] = x->above_pred_buf;
9271 args.above_pred_buf[1] = x->above_pred_buf + MAX_SB_SQUARE;
9272 args.above_pred_buf[2] = x->above_pred_buf + 2 * MAX_SB_SQUARE;
9273 args.left_pred_buf[0] = x->left_pred_buf;
9274 args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE;
9275 args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009276 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009277
Yaowu Xuf883b422016-08-30 14:01:10 -07009278 av1_zero(best_mbmode);
Urvang Joshib100db72016-10-12 16:28:56 -07009279 av1_zero(pmi_uv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009280
Zoe Liufa8bad12018-01-23 14:32:31 -08009281 av1_collect_neighbors_ref_counts(xd);
9282
Yue Chen170678a2017-10-17 13:43:10 -07009283 estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
Hui Su9d0c03d2017-12-27 16:05:23 -08009284 ref_costs_comp);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009285
9286 for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
Urvang Joshifeb925f2016-12-05 10:37:29 -08009287 for (i = 0; i < TX_SIZES_ALL; i++) rate_uv_intra[i] = INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009288 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
9289 for (i = 0; i < MB_MODE_COUNT; ++i) {
9290 for (k = 0; k < TOTAL_REFS_PER_FRAME; ++k) {
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009291 args.single_filter[i][k] = SWITCHABLE;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009292 }
9293 }
9294
Hui Suc05a4aa2018-01-12 15:34:32 -08009295 av1_invalid_rd_stats(rd_cost);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009296
9297 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
9298 x->pred_mv_sad[ref_frame] = INT_MAX;
9299 x->mbmi_ext->mode_context[ref_frame] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009300 x->mbmi_ext->compound_mode_context[ref_frame] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009301 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
9302 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
9303 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
9304 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
9305 }
9306 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009307 frame_mv[GLOBALMV][ref_frame].as_int =
Sarah Parkerae7c4582017-02-28 16:30:30 -08009308 gm_get_motion_vector(&cm->global_motion[ref_frame],
Luc Trudeau2eb9b842017-12-13 11:19:16 -05009309 cm->allow_high_precision_mv, bsize, mi_col, mi_row
RogerZhou3b635242017-09-19 10:06:46 -07009310#if CONFIG_AMVR
9311 ,
RogerZhou10a03802017-10-26 11:49:48 -07009312 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07009313#endif
9314 )
David Barkercdcac6d2016-12-01 17:04:16 +00009315 .as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009316 frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009317 frame_mv[GLOBAL_GLOBALMV][ref_frame].as_int =
Sarah Parkerae7c4582017-02-28 16:30:30 -08009318 gm_get_motion_vector(&cm->global_motion[ref_frame],
Luc Trudeau2eb9b842017-12-13 11:19:16 -05009319 cm->allow_high_precision_mv, bsize, mi_col, mi_row
RogerZhou3b635242017-09-19 10:06:46 -07009320#if CONFIG_AMVR
9321 ,
RogerZhou10a03802017-10-26 11:49:48 -07009322 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07009323#endif
9324 )
Sarah Parkerc2d38712017-01-24 15:15:41 -08009325 .as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009326 }
9327
Yaowu Xuc27fc142016-08-22 16:08:15 -07009328 for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
9329 MODE_INFO *const mi = xd->mi[0];
9330 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
9331 x->mbmi_ext->mode_context[ref_frame] = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -07009332 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
9333 mbmi_ext->ref_mv_stack[ref_frame],
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02009334 mbmi_ext->compound_mode_context, candidates, mi_row,
9335 mi_col, NULL, NULL, mbmi_ext->mode_context);
Jingning Han731af492016-11-17 11:53:23 -08009336 if (mbmi_ext->ref_mv_count[ref_frame] < 2) {
9337 MV_REFERENCE_FRAME rf[2];
9338 av1_set_ref_frame(rf, ref_frame);
David Barkercdcac6d2016-12-01 17:04:16 +00009339 if (mbmi_ext->ref_mvs[rf[0]][0].as_int !=
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009340 frame_mv[GLOBALMV][rf[0]].as_int ||
David Barkercdcac6d2016-12-01 17:04:16 +00009341 mbmi_ext->ref_mvs[rf[0]][1].as_int !=
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009342 frame_mv[GLOBALMV][rf[0]].as_int ||
David Barkercdcac6d2016-12-01 17:04:16 +00009343 mbmi_ext->ref_mvs[rf[1]][0].as_int !=
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009344 frame_mv[GLOBALMV][rf[1]].as_int ||
9345 mbmi_ext->ref_mvs[rf[1]][1].as_int !=
9346 frame_mv[GLOBALMV][rf[1]].as_int)
Jingning Han731af492016-11-17 11:53:23 -08009347 mbmi_ext->mode_context[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET);
9348 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009349 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009350
Yue Chen5329a2b2017-02-28 17:33:00 +08009351 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
Jingning Hanad586b92017-05-23 10:24:57 -07009352
Yue Chenf7ba6472017-04-19 11:08:58 -07009353 if (check_num_overlappable_neighbors(mbmi) &&
9354 is_motion_variation_allowed_bsize(bsize)) {
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009355 av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
9356 args.above_pred_buf, dst_width1,
9357 dst_height1, args.above_pred_stride);
Yue Chen5329a2b2017-02-28 17:33:00 +08009358 av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009359 args.left_pred_buf, dst_width2,
9360 dst_height2, args.left_pred_stride);
Jingning Han91d9a792017-04-18 12:01:52 -07009361 av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009362 mi_col, num_planes);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009363 calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0],
9364 args.above_pred_stride[0], args.left_pred_buf[0],
9365 args.left_pred_stride[0]);
Yue Chen5329a2b2017-02-28 17:33:00 +08009366 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009367
9368 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
9369 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
Zoe Liue9b15e22017-07-19 15:53:01 -07009370 // Skip checking missing references in both single and compound reference
9371 // modes. Note that a mode will be skipped iff both reference frames
9372 // are masked out.
Zoe Liuc082bbc2017-05-17 13:31:37 -07009373 ref_frame_skip_mask[0] |= (1 << ref_frame);
9374 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009375 } else {
9376 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
9377 // Skip fixed mv modes for poor references
9378 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
9379 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
9380 break;
9381 }
9382 }
9383 }
9384 // If the segment reference frame feature is enabled....
9385 // then do nothing if the current ref frame is not allowed..
9386 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
9387 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
9388 ref_frame_skip_mask[0] |= (1 << ref_frame);
9389 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9390 }
9391 }
9392
9393 // Disable this drop out case if the ref frame
9394 // segment level feature is enabled for this segment. This is to
9395 // prevent the possibility that we end up unable to pick any mode.
9396 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009397 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
Yaowu Xuc27fc142016-08-22 16:08:15 -07009398 // unless ARNR filtering is enabled in which case we want
9399 // an unfiltered alternative. We allow near/nearest as well
9400 // because they may result in zero-zero MVs but be cheaper.
9401 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
Sarah Parkere5299862016-08-16 14:57:37 -07009402 int_mv zeromv;
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02009403 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << LAST2_FRAME) |
9404 (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) |
9405 (1 << ALTREF2_FRAME) | (1 << GOLDEN_FRAME);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009406 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
9407 // TODO(zoeliu): To further explore whether following needs to be done for
9408 // BWDREF_FRAME as well.
9409 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
David Barkercdcac6d2016-12-01 17:04:16 +00009410 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME],
Sarah Parkerae7c4582017-02-28 16:30:30 -08009411 cm->allow_high_precision_mv, bsize,
Luc Trudeau2eb9b842017-12-13 11:19:16 -05009412 mi_col, mi_row
RogerZhou3b635242017-09-19 10:06:46 -07009413#if CONFIG_AMVR
9414 ,
RogerZhou10a03802017-10-26 11:49:48 -07009415 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07009416#endif
9417 )
David Barkercdcac6d2016-12-01 17:04:16 +00009418 .as_int;
Sarah Parkere5299862016-08-16 14:57:37 -07009419 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009420 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
Sarah Parkere5299862016-08-16 14:57:37 -07009421 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009422 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
Sarah Parkere5299862016-08-16 14:57:37 -07009423 if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009424 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
Sarah Parkere5299862016-08-16 14:57:37 -07009425 if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009426 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009427 }
9428 }
9429
9430 if (cpi->rc.is_src_frame_alt_ref) {
9431 if (sf->alt_ref_search_fp) {
9432 assert(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
9433 mode_skip_mask[ALTREF_FRAME] = 0;
9434 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
9435 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
9436 }
9437 }
9438
9439 if (sf->alt_ref_search_fp)
9440 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
9441 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
9442 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
9443
9444 if (sf->adaptive_mode_search) {
9445 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
9446 cpi->rc.frames_since_golden >= 3)
Yaowu Xu36bad472017-05-16 18:29:53 -07009447 if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
Yaowu Xuc27fc142016-08-22 16:08:15 -07009448 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
9449 }
9450
9451 if (bsize > sf->max_intra_bsize) {
9452 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
9453 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
9454 }
9455
9456 mode_skip_mask[INTRA_FRAME] |=
9457 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
9458
9459 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
9460 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
9461 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
9462
9463 midx = sf->schedule_mode_search ? mode_skip_start : 0;
9464 while (midx > 4) {
9465 uint8_t end_pos = 0;
9466 for (i = 5; i < midx; ++i) {
9467 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
9468 uint8_t tmp = mode_map[i];
9469 mode_map[i] = mode_map[i - 1];
9470 mode_map[i - 1] = tmp;
9471 end_pos = i;
9472 }
9473 }
9474 midx = end_pos;
9475 }
9476
9477 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
9478 x->use_default_intra_tx_type = 1;
9479 else
9480 x->use_default_intra_tx_type = 0;
9481
9482 if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
9483 x->use_default_inter_tx_type = 1;
9484 else
9485 x->use_default_inter_tx_type = 0;
Yushin Chod0b77ac2017-10-20 17:33:16 -07009486
Yaowu Xuc27fc142016-08-22 16:08:15 -07009487 for (i = 0; i < MB_MODE_COUNT; ++i)
9488 for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
9489 modelled_rd[i][ref_frame] = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009490
Zoe Liuf40a9572017-10-13 12:37:19 -07009491#if CONFIG_EXT_SKIP
9492 x->skip_mode_rdcost = -1;
9493 x->skip_mode_index = -1;
9494#endif // CONFIG_EXT_SKIP
9495
Yaowu Xuc27fc142016-08-22 16:08:15 -07009496 for (midx = 0; midx < MAX_MODES; ++midx) {
9497 int mode_index;
9498 int mode_excluded = 0;
9499 int64_t this_rd = INT64_MAX;
9500 int disable_skip = 0;
9501 int compmode_cost = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009502 int rate2 = 0, rate_y = 0, rate_uv = 0;
9503 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
9504 int skippable = 0;
9505 int this_skip2 = 0;
9506 int64_t total_sse = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009507 uint8_t ref_frame_type;
Yushin Chod0b77ac2017-10-20 17:33:16 -07009508
Yaowu Xuc27fc142016-08-22 16:08:15 -07009509 mode_index = mode_map[midx];
Zoe Liuf40a9572017-10-13 12:37:19 -07009510#if CONFIG_EXT_SKIP
9511 x->skip_mode_index_candidate = mode_index;
9512#endif // CONFIG_EXT_SKIP
Yaowu Xuf883b422016-08-30 14:01:10 -07009513 this_mode = av1_mode_order[mode_index].mode;
9514 ref_frame = av1_mode_order[mode_index].ref_frame[0];
9515 second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
Yaowu Xu4306b6e2016-09-27 12:55:32 -07009516 mbmi->ref_mv_idx = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009517
Cheng Chenc683bf92017-12-13 09:21:40 -08009518 if (sf->drop_ref) {
9519 if (ref_frame > INTRA_FRAME && second_ref_frame > INTRA_FRAME) {
9520 if (num_available_refs > 2) {
9521 if ((ref_frame == dist_order_refs[0] &&
9522 second_ref_frame == dist_order_refs[1]) ||
9523 (ref_frame == dist_order_refs[1] &&
9524 second_ref_frame == dist_order_refs[0]))
9525 continue;
9526 }
9527 }
9528 }
9529
Yaowu Xuc27fc142016-08-22 16:08:15 -07009530 if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
9531 // Mode must by compatible
Debargha Mukherjee37f6fe62017-02-10 21:44:13 -08009532 if (!is_interintra_allowed_mode(this_mode)) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009533 if (!is_interintra_allowed_bsize(bsize)) continue;
9534 }
9535
9536 if (is_inter_compound_mode(this_mode)) {
9537 frame_mv[this_mode][ref_frame].as_int =
9538 frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
9539 frame_mv[this_mode][second_ref_frame].as_int =
9540 frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
9541 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009542
9543 // Look at the reference frame of the best mode so far and set the
9544 // skip mask to look at a subset of the remaining modes.
9545 if (midx == mode_skip_start && best_mode_index >= 0) {
9546 switch (best_mbmode.ref_frame[0]) {
9547 case INTRA_FRAME: break;
9548 case LAST_FRAME:
9549 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
9550 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9551 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009552 case LAST2_FRAME:
9553 ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK;
9554 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9555 break;
9556 case LAST3_FRAME:
9557 ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK;
9558 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9559 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009560 case GOLDEN_FRAME:
9561 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
9562 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9563 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009564 case BWDREF_FRAME:
9565 ref_frame_skip_mask[0] |= BWDREF_FRAME_MODE_MASK;
9566 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9567 break;
Zoe Liue9b15e22017-07-19 15:53:01 -07009568 case ALTREF2_FRAME:
9569 ref_frame_skip_mask[0] |= ALTREF2_FRAME_MODE_MASK;
9570 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9571 break;
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02009572 case ALTREF_FRAME:
9573 ref_frame_skip_mask[0] |= ALTREF_FRAME_MODE_MASK;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009574 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009575 break;
Emil Keyder01770b32017-01-20 18:03:11 -05009576 case NONE_FRAME:
Yaowu Xuc27fc142016-08-22 16:08:15 -07009577 case TOTAL_REFS_PER_FRAME:
9578 assert(0 && "Invalid Reference frame");
9579 break;
9580 }
9581 }
9582
9583 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
Yaowu Xuf883b422016-08-30 14:01:10 -07009584 (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
Yaowu Xuc27fc142016-08-22 16:08:15 -07009585 continue;
9586
9587 if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
9588
9589 // Test best rd so far against threshold for trying this mode.
9590 if (best_mode_skippable && sf->schedule_mode_search)
9591 mode_threshold[mode_index] <<= 1;
9592
9593 if (best_rd < mode_threshold[mode_index]) continue;
9594
Yunqing Wangff4fa062017-04-21 10:56:08 -07009595 // This is only used in motion vector unit test.
9596 if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
9597
Zoe Liuc01dddb2017-11-07 08:44:06 -08009598#if !CONFIG_EXT_COMP_REFS // Changes LL bitstream
Arild Fuldseth (arilfuld)3f429082017-04-28 15:54:28 +02009599 if (cpi->oxcf.pass == 0) {
9600 // Complexity-compression trade-offs
9601 // if (ref_frame == ALTREF_FRAME) continue;
9602 // if (ref_frame == BWDREF_FRAME) continue;
9603 if (second_ref_frame == ALTREF_FRAME) continue;
9604 // if (second_ref_frame == BWDREF_FRAME) continue;
9605 }
Zoe Liuc01dddb2017-11-07 08:44:06 -08009606#endif // !CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07009607 comp_pred = second_ref_frame > INTRA_FRAME;
9608 if (comp_pred) {
9609 if (!cpi->allow_comp_inter_inter) continue;
9610
9611 // Skip compound inter modes if ARF is not available.
9612 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
9613
9614 // Do not allow compound prediction if the segment level reference frame
9615 // feature is in use as in this case there can only be one reference.
9616 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
9617
9618 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
9619 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
9620 continue;
9621
9622 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
9623 } else {
Zoe Liud4d8b862017-12-06 10:56:01 -08009624 if (ref_frame != INTRA_FRAME) mode_excluded = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009625 }
9626
9627 if (ref_frame == INTRA_FRAME) {
9628 if (sf->adaptive_mode_search)
9629 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
9630 continue;
9631
9632 if (this_mode != DC_PRED) {
9633 // Disable intra modes other than DC_PRED for blocks with low variance
9634 // Threshold for intra skipping based on source variance
9635 // TODO(debargha): Specialize the threshold for super block sizes
9636 const unsigned int skip_intra_var_thresh = 64;
9637 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
9638 x->source_variance < skip_intra_var_thresh)
9639 continue;
9640 // Only search the oblique modes if the best so far is
9641 // one of the neighboring directional modes
9642 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
Urvang Joshi96d1c0a2017-10-10 13:15:32 -07009643 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009644 if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
9645 continue;
9646 }
9647 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
9648 if (conditional_skipintra(this_mode, best_intra_mode)) continue;
9649 }
9650 }
David Barkercf3d0b02016-11-10 10:14:49 +00009651 } else if (cm->global_motion[ref_frame].wmtype == IDENTITY &&
Sarah Parkere5299862016-08-16 14:57:37 -07009652 (!comp_pred ||
David Barkercf3d0b02016-11-10 10:14:49 +00009653 cm->global_motion[second_ref_frame].wmtype == IDENTITY)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009654 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
Yue Chenb23d00a2017-07-28 17:01:21 -07009655 if (!check_best_zero_mv(cpi, x, mbmi_ext->mode_context,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02009656 mbmi_ext->compound_mode_context, frame_mv,
Luc Trudeau15a18e32017-12-13 14:15:25 -05009657 this_mode, ref_frames, bsize, mi_row, mi_col))
Yaowu Xuc27fc142016-08-22 16:08:15 -07009658 continue;
9659 }
9660
9661 mbmi->mode = this_mode;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04009662 mbmi->uv_mode = UV_DC_PRED;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009663 mbmi->ref_frame[0] = ref_frame;
9664 mbmi->ref_frame[1] = second_ref_frame;
9665 pmi->palette_size[0] = 0;
9666 pmi->palette_size[1] = 0;
hui su5db97432016-10-14 16:10:14 -07009667#if CONFIG_FILTER_INTRA
Yue Chenb0571872017-12-18 18:12:59 -08009668 mbmi->filter_intra_mode_info.use_filter_intra = 0;
hui su5db97432016-10-14 16:10:14 -07009669#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009670 // Evaluate all sub-pel filters irrespective of whether we can use
9671 // them for this frame.
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07009672
9673 set_default_interp_filters(mbmi, cm->interp_filter);
9674
Yaowu Xuc27fc142016-08-22 16:08:15 -07009675 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
Yue Chencb60b182016-10-13 15:18:22 -07009676 mbmi->motion_mode = SIMPLE_TRANSLATION;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009677
9678 x->skip = 0;
9679 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
9680
9681 // Select prediction reference frames.
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009682 for (i = 0; i < num_planes; i++) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009683 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
9684 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
9685 }
9686
Debargha Mukherjeecb603792016-10-04 13:10:23 -07009687 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009688
Jingning Hancf842ad2017-10-25 12:02:30 -07009689#if CONFIG_FRAME_MARKER
9690 if (sf->selective_ref_frame) {
Debargha Mukherjee06b40cc2017-11-02 13:39:39 -07009691 if (sf->selective_ref_frame == 2) {
9692 if (mbmi->ref_frame[0] == ALTREF2_FRAME ||
9693 mbmi->ref_frame[1] == ALTREF2_FRAME)
9694 if (cm->cur_frame->alt2_frame_offset < cm->frame_offset) continue;
9695 if (mbmi->ref_frame[0] == BWDREF_FRAME ||
9696 mbmi->ref_frame[1] == BWDREF_FRAME)
9697 if (cm->cur_frame->bwd_frame_offset < cm->frame_offset) continue;
9698 }
Jingning Hancf842ad2017-10-25 12:02:30 -07009699 if (mbmi->ref_frame[0] == LAST3_FRAME ||
9700 mbmi->ref_frame[1] == LAST3_FRAME)
9701 if (cm->cur_frame->lst3_frame_offset <= cm->cur_frame->gld_frame_offset)
9702 continue;
9703 if (mbmi->ref_frame[0] == LAST2_FRAME ||
9704 mbmi->ref_frame[1] == LAST2_FRAME)
9705 if (cm->cur_frame->lst2_frame_offset <= cm->cur_frame->gld_frame_offset)
9706 continue;
9707 }
Zoe Liu77fb5be2017-11-02 14:36:19 -07009708
9709 // One-sided compound is used only when all reference frames are one-sided.
9710 if (sf->selective_ref_frame && comp_pred && !cpi->all_one_sided_refs) {
9711 unsigned int ref_offsets[2];
9712 for (i = 0; i < 2; ++i) {
9713 const int buf_idx = cm->frame_refs[mbmi->ref_frame[i] - LAST_FRAME].idx;
9714 assert(buf_idx >= 0);
9715 ref_offsets[i] = cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
9716 }
9717 if ((ref_offsets[0] <= cm->frame_offset &&
9718 ref_offsets[1] <= cm->frame_offset) ||
9719 (ref_offsets[0] > cm->frame_offset &&
9720 ref_offsets[1] > cm->frame_offset))
9721 continue;
9722 }
9723#endif // CONFIG_FRAME_MARKER
Jingning Hancf842ad2017-10-25 12:02:30 -07009724
Yaowu Xuc27fc142016-08-22 16:08:15 -07009725 if (ref_frame == INTRA_FRAME) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009726 RD_STATS rd_stats_y;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009727 TX_SIZE uv_tx;
hui su45dc5972016-12-08 17:42:50 -08009728 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
Joe Young830d4ce2017-05-30 17:48:13 -07009729 if (is_directional_mode && av1_use_angle_delta(bsize)) {
hui su45dc5972016-12-08 17:42:50 -08009730 int rate_dummy;
hui su9a416f52017-01-13 11:37:53 -08009731 int64_t model_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009732 if (!angle_stats_ready) {
9733 const int src_stride = x->plane[0].src.stride;
9734 const uint8_t *src = x->plane[0].src.buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009735 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
hui su9cc10652017-04-27 17:22:07 -07009736 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07009737 directional_mode_skip_mask);
9738 else
hui su9cc10652017-04-27 17:22:07 -07009739 angle_estimation(src, src_stride, rows, cols, bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07009740 directional_mode_skip_mask);
9741 angle_stats_ready = 1;
9742 }
9743 if (directional_mode_skip_mask[mbmi->mode]) continue;
hui su45dc5972016-12-08 17:42:50 -08009744 rd_stats_y.rate = INT_MAX;
Yue Chenb0f808b2017-04-26 11:55:14 -07009745 rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rd_stats_y, bsize,
9746 intra_mode_cost[mbmi->mode], best_rd,
9747 &model_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009748 } else {
9749 mbmi->angle_delta[0] = 0;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009750 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009751 }
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009752 rate_y = rd_stats_y.rate;
9753 distortion_y = rd_stats_y.dist;
9754 skippable = rd_stats_y.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009755
hui su5db97432016-10-14 16:10:14 -07009756#if CONFIG_FILTER_INTRA
Jingning Han12384b12018-01-05 11:38:08 -08009757 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
Angie Chiangdf682312018-01-08 11:19:55 -08009758 memcpy(best_blk_skip, x->blk_skip[0],
9759 sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
Jingning Han12384b12018-01-05 11:38:08 -08009760
Yue Chen250dd962017-12-18 17:32:32 -08009761 if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id]) {
Yue Chen57b8ff62017-10-10 23:37:31 -07009762 RD_STATS rd_stats_y_fi;
9763 int filter_intra_selected_flag = 0;
9764 TX_SIZE best_tx_size = mbmi->tx_size;
9765 TX_TYPE best_tx_type = mbmi->tx_type;
Jingning Han571e1cb2018-01-02 17:05:29 -08009766#if CONFIG_TXK_SEL
9767 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
9768 memcpy(best_txk_type, mbmi->txk_type,
9769 sizeof(*best_txk_type) *
9770 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
9771#endif
Yue Chen57b8ff62017-10-10 23:37:31 -07009772 FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
Yue Chen95e13e22017-11-01 23:56:35 -07009773 int64_t best_rd_tmp = INT64_MAX;
9774 if (rate_y != INT_MAX &&
9775 av1_filter_intra_allowed_txsize(best_tx_size)) {
Yue Chen4eba69b2017-11-09 22:37:35 -08009776 best_rd_tmp = RDCOST(x->rdmult,
9777 rate_y + x->filter_intra_cost[mbmi->tx_size][0] +
9778 intra_mode_cost[mbmi->mode],
9779 distortion_y);
Yue Chen95e13e22017-11-01 23:56:35 -07009780 }
Yue Chen57b8ff62017-10-10 23:37:31 -07009781
Yue Chenb0571872017-12-18 18:12:59 -08009782 mbmi->filter_intra_mode_info.use_filter_intra = 1;
Yue Chen57b8ff62017-10-10 23:37:31 -07009783 for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED;
9784 fi_mode < FILTER_INTRA_MODES; ++fi_mode) {
Yue Chen57b8ff62017-10-10 23:37:31 -07009785 int64_t this_rd_tmp;
Yue Chenb0571872017-12-18 18:12:59 -08009786 mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode;
Yue Chen57b8ff62017-10-10 23:37:31 -07009787
9788 super_block_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd);
9789 if (rd_stats_y_fi.rate == INT_MAX) continue;
Hui Su4665f092018-01-17 18:10:06 -08009790 const int this_rate_tmp =
9791 rd_stats_y_fi.rate +
9792 intra_mode_info_cost_y(cpi, x, mbmi, bsize,
9793 intra_mode_cost[mbmi->mode]);
Yue Chen57b8ff62017-10-10 23:37:31 -07009794 this_rd_tmp = RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
9795
9796 if (this_rd_tmp < best_rd_tmp) {
9797 best_tx_size = mbmi->tx_size;
9798 best_tx_type = mbmi->tx_type;
Jingning Han571e1cb2018-01-02 17:05:29 -08009799#if CONFIG_TXK_SEL
9800 memcpy(best_txk_type, mbmi->txk_type,
9801 sizeof(*best_txk_type) *
9802 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
9803#endif
Jingning Han12384b12018-01-05 11:38:08 -08009804 memcpy(best_blk_skip, x->blk_skip[0],
Angie Chiangdf682312018-01-08 11:19:55 -08009805 sizeof(best_blk_skip[0]) * ctx->num_4x4_blk);
Yue Chen57b8ff62017-10-10 23:37:31 -07009806 best_fi_mode = fi_mode;
9807 rd_stats_y = rd_stats_y_fi;
9808 rate_y = rd_stats_y_fi.rate;
9809 distortion_y = rd_stats_y_fi.dist;
9810 skippable = rd_stats_y_fi.skip;
9811 filter_intra_selected_flag = 1;
9812 best_rd_tmp = this_rd_tmp;
9813 }
9814 }
9815
9816 mbmi->tx_size = best_tx_size;
9817 mbmi->tx_type = best_tx_type;
Jingning Han571e1cb2018-01-02 17:05:29 -08009818#if CONFIG_TXK_SEL
9819 memcpy(mbmi->txk_type, best_txk_type,
9820 sizeof(*best_txk_type) *
9821 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
9822#endif
Jingning Han12384b12018-01-05 11:38:08 -08009823 memcpy(x->blk_skip[0], best_blk_skip,
Angie Chiangdf682312018-01-08 11:19:55 -08009824 sizeof(x->blk_skip[0][0]) * ctx->num_4x4_blk);
Jingning Han12384b12018-01-05 11:38:08 -08009825
Yue Chen57b8ff62017-10-10 23:37:31 -07009826 if (filter_intra_selected_flag) {
Yue Chenb0571872017-12-18 18:12:59 -08009827 mbmi->filter_intra_mode_info.use_filter_intra = 1;
9828 mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode;
Yue Chen57b8ff62017-10-10 23:37:31 -07009829 } else {
Yue Chenb0571872017-12-18 18:12:59 -08009830 mbmi->filter_intra_mode_info.use_filter_intra = 0;
Yue Chen57b8ff62017-10-10 23:37:31 -07009831 }
9832 }
9833#endif
9834
9835 if (rate_y == INT_MAX) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009836
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009837 if (num_planes > 1) {
9838 uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
9839 if (rate_uv_intra[uv_tx] == INT_MAX) {
9840 choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
9841 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
9842 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
9843 if (try_palette) pmi_uv[uv_tx] = *pmi;
9844 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
9845 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009846
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009847 rate_uv = rate_uv_tokenonly[uv_tx];
9848 distortion_uv = dist_uvs[uv_tx];
9849 skippable = skippable && skip_uvs[uv_tx];
9850 mbmi->uv_mode = mode_uv[uv_tx];
9851 if (try_palette) {
9852 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
9853 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
9854 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
9855 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
9856 }
9857 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009858 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009859
Hui Su4665f092018-01-17 18:10:06 -08009860 rate2 = rate_y + intra_mode_info_cost_y(cpi, x, mbmi, bsize,
9861 intra_mode_cost[mbmi->mode]);
Rupert Swarbrickfcff0b22017-10-05 09:26:04 +01009862 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009863 // super_block_yrd above includes the cost of the tx_size in the
9864 // tokenonly rate, but for intra blocks, tx_size is always coded
9865 // (prediction granularity), so we account for it in the full rate,
9866 // not the tokenonly rate.
Yue Chen3dd03e32017-10-17 15:39:52 -07009867 rate_y -= tx_size_cost(cm, x, bsize, mbmi->tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009868 }
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009869 if (num_planes > 1 && !x->skip_chroma_rd) {
Hui Sub6d058d2018-01-18 14:12:36 -08009870 const int uv_mode_cost =
Hui Su4665f092018-01-17 18:10:06 -08009871#if CONFIG_CFL
Hui Sub6d058d2018-01-18 14:12:36 -08009872 x->intra_uv_mode_cost[is_cfl_allowed(mbmi)][mbmi->mode]
9873 [mbmi->uv_mode];
Joe Young3ca43bf2017-10-06 15:12:46 -07009874#else
Hui Sub6d058d2018-01-18 14:12:36 -08009875 x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
Hui Su4665f092018-01-17 18:10:06 -08009876#endif
Hui Sub6d058d2018-01-18 14:12:36 -08009877 rate2 += rate_uv +
9878 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009879 }
Urvang Joshi96d1c0a2017-10-10 13:15:32 -07009880 if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009881 rate2 += intra_cost_penalty;
9882 distortion2 = distortion_y + distortion_uv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009883 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009884 int_mv backup_ref_mv[2];
9885
Debargha Mukherjee0f248c42017-09-07 12:40:18 -07009886 if (!is_comp_ref_allowed(bsize) && mbmi->ref_frame[1] > INTRA_FRAME)
9887 continue;
Jingning Hanc41a5492017-02-24 11:18:52 -08009888
Yaowu Xuc27fc142016-08-22 16:08:15 -07009889 backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
9890 if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
Yue Chenb8aa3992017-12-13 15:27:52 -08009891 mbmi->angle_delta[0] = 0;
9892 mbmi->angle_delta[1] = 0;
hui su5db97432016-10-14 16:10:14 -07009893#if CONFIG_FILTER_INTRA
Yue Chenb8aa3992017-12-13 15:27:52 -08009894 mbmi->filter_intra_mode_info.use_filter_intra = 0;
hui su5db97432016-10-14 16:10:14 -07009895#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009896 mbmi->ref_mv_idx = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -07009897 ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009898
David Barker404b2e82017-03-27 13:07:47 +01009899 if (comp_pred) {
9900 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
David Barker3dfba992017-04-03 16:10:09 +01009901 int ref_mv_idx = 0;
9902 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
9903 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
9904 // mbmi->ref_mv_idx (like NEWMV)
9905 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
9906 ref_mv_idx = 1;
9907
9908 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
9909 int_mv this_mv =
9910 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
David Barker404b2e82017-03-27 13:07:47 +01009911 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9912 xd->n8_h << MI_SIZE_LOG2, xd);
9913 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
9914 }
David Barker3dfba992017-04-03 16:10:09 +01009915 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
9916 int_mv this_mv =
9917 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
David Barker404b2e82017-03-27 13:07:47 +01009918 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9919 xd->n8_h << MI_SIZE_LOG2, xd);
9920 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
9921 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009922 }
David Barker404b2e82017-03-27 13:07:47 +01009923 } else {
Zoe Liu1157d502017-04-30 07:57:14 -07009924 if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
David Barker404b2e82017-03-27 13:07:47 +01009925 int ref;
9926 for (ref = 0; ref < 1 + comp_pred; ++ref) {
9927 int_mv this_mv =
9928 (ref == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
9929 : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9930 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9931 xd->n8_h << MI_SIZE_LOG2, xd);
9932 mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
9933 }
9934 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009935 }
Angie Chiang76159122016-11-09 12:13:22 -08009936 {
9937 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
9938 av1_init_rd_stats(&rd_stats);
9939 rd_stats.rate = rate2;
Fergus Simpson073c6f32017-02-17 12:13:48 -08009940
Zoe Liu7f24e1b2017-03-17 17:42:05 -07009941 // Point to variables that are maintained between loop iterations
Yunqing Wang5f4f7382018-01-09 10:33:09 -08009942 args.single_newmv = single_newmv[0];
9943 args.single_newmv_rate = single_newmv_rate[0];
9944 args.single_newmv_valid = single_newmv_valid[0];
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009945 args.modelled_rd = modelled_rd;
Fergus Simpson3424c2d2017-03-09 11:48:15 -08009946 this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
9947 &rd_stats_uv, &disable_skip, frame_mv,
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009948 mi_row, mi_col, &args, best_rd);
Angie Chiang76159122016-11-09 12:13:22 -08009949 rate2 = rd_stats.rate;
9950 skippable = rd_stats.skip;
9951 distortion2 = rd_stats.dist;
9952 total_sse = rd_stats.sse;
9953 rate_y = rd_stats_y.rate;
9954 rate_uv = rd_stats_uv.rate;
9955 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009956
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01009957 // TODO(jingning): This needs some refactoring to improve code quality
9958 // and reduce redundant steps.
David Barker3dfba992017-04-03 16:10:09 +01009959 if ((have_nearmv_in_inter_mode(mbmi->mode) &&
David Barker404b2e82017-03-27 13:07:47 +01009960 mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
9961 ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01009962 mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009963 int_mv backup_mv = frame_mv[NEARMV][ref_frame];
9964 MB_MODE_INFO backup_mbmi = *mbmi;
9965 int backup_skip = x->skip;
9966 int64_t tmp_ref_rd = this_rd;
9967 int ref_idx;
9968
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02009969 // TODO(jingning): This should be deprecated shortly.
David Barker3dfba992017-04-03 16:10:09 +01009970 int idx_offset = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009971 int ref_set =
Yunqing Wang5f4f7382018-01-09 10:33:09 -08009972 AOMMIN(MAX_REF_MV_SERCH - 1,
9973 mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009974
9975 uint8_t drl_ctx =
Yaowu Xuf883b422016-08-30 14:01:10 -07009976 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx_offset);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009977 // Dummy
9978 int_mv backup_fmv[2];
9979 backup_fmv[0] = frame_mv[NEWMV][ref_frame];
9980 if (comp_pred) backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
9981
Yue Chenb23d00a2017-07-28 17:01:21 -07009982 rate2 += (rate2 < INT_MAX ? x->drl_mode_cost0[drl_ctx][0] : 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009983
9984 if (this_rd < INT64_MAX) {
Urvang Joshi70006e42017-06-14 16:08:55 -07009985 if (RDCOST(x->rdmult, rate_y + rate_uv, distortion2) <
9986 RDCOST(x->rdmult, 0, total_sse))
9987 tmp_ref_rd = RDCOST(
Zoe Liu1eed2df2017-10-16 17:13:15 -07009988 x->rdmult, rate2 + x->skip_cost[av1_get_skip_context(xd)][0],
Urvang Joshi70006e42017-06-14 16:08:55 -07009989 distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009990 else
clang-format4eafefe2017-09-04 12:51:20 -07009991 tmp_ref_rd =
9992 RDCOST(x->rdmult,
Zoe Liu1eed2df2017-10-16 17:13:15 -07009993 rate2 + x->skip_cost[av1_get_skip_context(xd)][1] -
clang-format4eafefe2017-09-04 12:51:20 -07009994 rate_y - rate_uv,
9995 total_sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009996 }
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +00009997 for (i = 0; i < num_planes; ++i)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009998 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
9999 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010000
Cheng Chen4a6977a2017-12-27 12:27:39 -080010001 for (ref_idx = 0; ref_idx < ref_set; ++ref_idx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010002 int64_t tmp_alt_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010003 int dummy_disable_skip = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010004 int_mv cur_mv;
Angie Chiang76159122016-11-09 12:13:22 -080010005 RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010006
Yaowu Xu5bfbfdf2016-11-22 16:43:34 -080010007 av1_invalid_rd_stats(&tmp_rd_stats);
Yushin Choc0f6bf22017-06-09 16:08:02 -070010008
Jingning Han52617b22017-04-11 12:50:08 -070010009 x->skip = 0;
Yaowu Xu5bfbfdf2016-11-22 16:43:34 -080010010
Yaowu Xuc27fc142016-08-22 16:08:15 -070010011 mbmi->ref_mv_idx = 1 + ref_idx;
10012
Jingning Han8db5f172018-01-23 15:15:51 -080010013 if (cpi->sf.reduce_inter_modes) {
10014 if (mbmi->ref_frame[0] == LAST2_FRAME ||
10015 mbmi->ref_frame[0] == LAST3_FRAME ||
10016 mbmi->ref_frame[1] == LAST2_FRAME ||
10017 mbmi->ref_frame[1] == LAST3_FRAME) {
10018 if (mbmi_ext
10019 ->ref_mv_stack[ref_frame_type]
10020 [mbmi->ref_mv_idx + idx_offset]
10021 .weight < REF_CAT_LEVEL) {
10022 *mbmi = backup_mbmi;
10023 x->skip = backup_skip;
10024 continue;
10025 }
10026 }
10027 }
10028
David Barker3dfba992017-04-03 16:10:09 +010010029 if (comp_pred) {
10030 int ref_mv_idx = mbmi->ref_mv_idx;
10031 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
10032 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
10033 // mbmi->ref_mv_idx (like NEWMV)
10034 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
10035 ref_mv_idx = 1 + mbmi->ref_mv_idx;
10036
10037 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
10038 int_mv this_mv =
10039 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
10040 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10041 xd->n8_h << MI_SIZE_LOG2, xd);
10042 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
10043 } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV) {
10044 int_mv this_mv =
10045 mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
10046 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10047 xd->n8_h << MI_SIZE_LOG2, xd);
10048 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
10049 }
10050
10051 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
10052 int_mv this_mv =
10053 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
10054 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10055 xd->n8_h << MI_SIZE_LOG2, xd);
10056 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
10057 } else if (compound_ref1_mode(mbmi->mode) == NEARESTMV) {
10058 int_mv this_mv =
10059 mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
10060 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10061 xd->n8_h << MI_SIZE_LOG2, xd);
10062 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
10063 }
10064 } else {
Imdad Sardharwallabe5ca552017-11-29 14:38:28 +000010065 int_mv this_mv = mbmi_ext
10066 ->ref_mv_stack[ref_frame_type]
10067 [mbmi->ref_mv_idx + idx_offset]
10068 .this_mv;
Jingning Hanc3ef32a2017-12-04 09:56:53 -080010069 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10070 xd->n8_h << MI_SIZE_LOG2, xd);
10071 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010072 }
10073
10074 cur_mv =
10075 mbmi_ext->ref_mv_stack[ref_frame][mbmi->ref_mv_idx + idx_offset]
10076 .this_mv;
10077 clamp_mv2(&cur_mv.as_mv, xd);
10078
Alex Converse0fa0f422017-04-24 12:51:14 -070010079 if (!mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010080 frame_mv[NEARMV][ref_frame] = cur_mv;
Angie Chiang76159122016-11-09 12:13:22 -080010081 av1_init_rd_stats(&tmp_rd_stats);
Yushin Cho8e75e8b2017-09-12 16:33:28 -070010082
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010083 args.modelled_rd = NULL;
Yunqing Wang5f4f7382018-01-09 10:33:09 -080010084 args.single_newmv = single_newmv[mbmi->ref_mv_idx];
10085 args.single_newmv_rate = single_newmv_rate[mbmi->ref_mv_idx];
10086 args.single_newmv_valid = single_newmv_valid[mbmi->ref_mv_idx];
10087
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010088 tmp_alt_rd = handle_inter_mode(
10089 cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv,
10090 &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010091 // Prevent pointers from escaping local scope
Yunqing Wang5f4f7382018-01-09 10:33:09 -080010092 args.single_newmv = single_newmv[0];
10093 args.single_newmv_rate = single_newmv_rate[0];
10094 args.single_newmv_valid = single_newmv_valid[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010095 }
10096
10097 for (i = 0; i < mbmi->ref_mv_idx; ++i) {
10098 uint8_t drl1_ctx = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -070010099 drl1_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
10100 i + idx_offset);
Angie Chiang76159122016-11-09 12:13:22 -080010101 tmp_rd_stats.rate +=
Yue Chenb23d00a2017-07-28 17:01:21 -070010102 (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][1]
Angie Chiang76159122016-11-09 12:13:22 -080010103 : 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010104 }
10105
10106 if (mbmi_ext->ref_mv_count[ref_frame_type] >
10107 mbmi->ref_mv_idx + idx_offset + 1 &&
10108 ref_idx < ref_set - 1) {
10109 uint8_t drl1_ctx =
Yaowu Xuf883b422016-08-30 14:01:10 -070010110 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
10111 mbmi->ref_mv_idx + idx_offset);
Yaowu Xu83ed6fe2016-11-22 11:15:29 -080010112 tmp_rd_stats.rate +=
Yue Chenb23d00a2017-07-28 17:01:21 -070010113 (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][0]
Yaowu Xu83ed6fe2016-11-22 11:15:29 -080010114 : 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010115 }
10116
10117 if (tmp_alt_rd < INT64_MAX) {
Urvang Joshi70006e42017-06-14 16:08:55 -070010118 tmp_alt_rd =
10119 RDCOST(x->rdmult, tmp_rd_stats.rate, tmp_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010120 }
10121
10122 if (tmp_ref_rd > tmp_alt_rd) {
Angie Chiang76159122016-11-09 12:13:22 -080010123 rate2 = tmp_rd_stats.rate;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010124 disable_skip = dummy_disable_skip;
Angie Chiang76159122016-11-09 12:13:22 -080010125 distortion2 = tmp_rd_stats.dist;
10126 skippable = tmp_rd_stats.skip;
10127 rate_y = tmp_rd_stats_y.rate;
10128 rate_uv = tmp_rd_stats_uv.rate;
10129 total_sse = tmp_rd_stats.sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010130 this_rd = tmp_alt_rd;
10131 tmp_ref_rd = tmp_alt_rd;
10132 backup_mbmi = *mbmi;
10133 backup_skip = x->skip;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010134 for (i = 0; i < num_planes; ++i)
Yaowu Xuc27fc142016-08-22 16:08:15 -070010135 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
10136 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010137 } else {
10138 *mbmi = backup_mbmi;
10139 x->skip = backup_skip;
10140 }
10141 }
10142
10143 frame_mv[NEARMV][ref_frame] = backup_mv;
10144 frame_mv[NEWMV][ref_frame] = backup_fmv[0];
10145 if (comp_pred) frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010146 for (i = 0; i < num_planes; ++i)
Yaowu Xuc27fc142016-08-22 16:08:15 -070010147 memcpy(x->blk_skip[i], x->blk_skip_drl[i],
10148 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010149 }
10150 mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
10151 if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010152
10153 if (this_rd == INT64_MAX) continue;
10154
Debargha Mukherjee0f248c42017-09-07 12:40:18 -070010155 if (is_comp_ref_allowed(mbmi->sb_type))
Hui Su9d0c03d2017-12-27 16:05:23 -080010156 compmode_cost = comp_inter_cost[comp_pred];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010157
10158 if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
10159 }
10160
Yaowu Xuc27fc142016-08-22 16:08:15 -070010161 // Estimate the reference frame signaling cost and add it
10162 // to the rolling cost variable.
10163 if (comp_pred) {
Zoe Liuc082bbc2017-05-17 13:31:37 -070010164#if CONFIG_EXT_COMP_REFS
10165 rate2 += ref_costs_comp[ref_frame][second_ref_frame];
Sebastien Alaiwan365e6442017-10-16 11:35:00 +020010166#else // !CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -070010167 rate2 += ref_costs_comp[ref_frame];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010168 rate2 += ref_costs_comp[second_ref_frame];
Zoe Liuc082bbc2017-05-17 13:31:37 -070010169#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -070010170 } else {
10171 rate2 += ref_costs_single[ref_frame];
10172 }
10173
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +010010174 if (ref_frame == INTRA_FRAME) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010175 if (skippable) {
10176 // Back out the coefficient coding costs
10177 rate2 -= (rate_y + rate_uv);
10178 rate_y = 0;
10179 rate_uv = 0;
10180 // Cost the skip mb case
Zoe Liu1eed2df2017-10-16 17:13:15 -070010181 rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010182 } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Urvang Joshi70006e42017-06-14 16:08:55 -070010183 if (RDCOST(x->rdmult, rate_y + rate_uv + rate_skip0, distortion2) <
10184 RDCOST(x->rdmult, rate_skip1, total_sse)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010185 // Add in the cost of the no skip flag.
Zoe Liu1eed2df2017-10-16 17:13:15 -070010186 rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010187 } else {
10188 // FIXME(rbultje) make this work for splitmv also
Zoe Liu1eed2df2017-10-16 17:13:15 -070010189 rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010190 distortion2 = total_sse;
10191 assert(total_sse >= 0);
10192 rate2 -= (rate_y + rate_uv);
10193 this_skip2 = 1;
10194 rate_y = 0;
10195 rate_uv = 0;
10196 }
10197 } else {
10198 // Add in the cost of the no skip flag.
Zoe Liu1eed2df2017-10-16 17:13:15 -070010199 rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010200 }
10201
10202 // Calculate the final RD estimate for this mode.
Urvang Joshi70006e42017-06-14 16:08:55 -070010203 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010204 } else {
10205 this_skip2 = mbmi->skip;
Urvang Joshi70006e42017-06-14 16:08:55 -070010206 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010207 if (this_skip2) {
10208 rate_y = 0;
10209 rate_uv = 0;
10210 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010211 }
10212
Yaowu Xuc27fc142016-08-22 16:08:15 -070010213 if (ref_frame == INTRA_FRAME) {
10214 // Keep record of best intra rd
10215 if (this_rd < best_intra_rd) {
10216 best_intra_rd = this_rd;
10217 best_intra_mode = mbmi->mode;
10218 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010219 }
10220
10221 if (!disable_skip && ref_frame == INTRA_FRAME) {
10222 for (i = 0; i < REFERENCE_MODES; ++i)
Yaowu Xuf883b422016-08-30 14:01:10 -070010223 best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010224 }
10225
10226 // Did this mode help.. i.e. is it the new best mode
10227 if (this_rd < best_rd || x->skip) {
10228 if (!mode_excluded) {
10229 // Note index of best mode so far
10230 best_mode_index = mode_index;
10231
10232 if (ref_frame == INTRA_FRAME) {
10233 /* required for left and above block mv */
10234 mbmi->mv[0].as_int = 0;
10235 } else {
10236 best_pred_sse = x->pred_sse[ref_frame];
10237 }
10238
10239 rd_cost->rate = rate2;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010240 rd_cost->dist = distortion2;
10241 rd_cost->rdcost = this_rd;
10242 best_rd = this_rd;
10243 best_mbmode = *mbmi;
10244 best_skip2 = this_skip2;
10245 best_mode_skippable = skippable;
Zoe Liu1eed2df2017-10-16 17:13:15 -070010246 best_rate_y =
10247 rate_y +
10248 x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010249 best_rate_uv = rate_uv;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010250 for (i = 0; i < num_planes; ++i)
Yaowu Xuc27fc142016-08-22 16:08:15 -070010251 memcpy(ctx->blk_skip[i], x->blk_skip[i],
10252 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010253 }
10254 }
Yushin Cho8e75e8b2017-09-12 16:33:28 -070010255
Yaowu Xuc27fc142016-08-22 16:08:15 -070010256 /* keep record of best compound/single-only prediction */
10257 if (!disable_skip && ref_frame != INTRA_FRAME) {
10258 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
10259
10260 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
10261 single_rate = rate2 - compmode_cost;
10262 hybrid_rate = rate2;
10263 } else {
10264 single_rate = rate2;
10265 hybrid_rate = rate2 + compmode_cost;
10266 }
10267
Urvang Joshi70006e42017-06-14 16:08:55 -070010268 single_rd = RDCOST(x->rdmult, single_rate, distortion2);
10269 hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010270
10271 if (!comp_pred) {
10272 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
10273 best_pred_rd[SINGLE_REFERENCE] = single_rd;
10274 } else {
10275 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
10276 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
10277 }
10278 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
10279 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
10280 }
10281
Cheng Chenc683bf92017-12-13 09:21:40 -080010282 if (sf->drop_ref) {
10283 if (second_ref_frame == NONE_FRAME) {
10284 const int idx = ref_frame - LAST_FRAME;
10285 if (idx && distortion2 > dist_refs[idx]) {
10286 dist_refs[idx] = distortion2;
10287 dist_order_refs[idx] = ref_frame;
10288 }
10289
10290 // Reach the last single ref prediction mode
10291 if (ref_frame == ALTREF_FRAME && this_mode == GLOBALMV) {
10292 // bubble sort dist_refs and the order index
10293 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) {
10294 for (k = i + 1; k < TOTAL_REFS_PER_FRAME; ++k) {
10295 if (dist_refs[i] < dist_refs[k]) {
10296 int64_t tmp_dist = dist_refs[i];
10297 dist_refs[i] = dist_refs[k];
10298 dist_refs[k] = tmp_dist;
10299
10300 int tmp_idx = dist_order_refs[i];
10301 dist_order_refs[i] = dist_order_refs[k];
10302 dist_order_refs[k] = tmp_idx;
10303 }
10304 }
10305 }
10306
10307 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) {
10308 if (dist_refs[i] == -1) break;
10309 num_available_refs = i;
10310 }
10311 num_available_refs++;
10312 }
10313 }
10314 }
10315
Yaowu Xuc27fc142016-08-22 16:08:15 -070010316 if (x->skip && !comp_pred) break;
10317 }
10318
10319 if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
10320 ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
10321 is_inter_mode(best_mbmode.mode)) ||
10322 (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
10323 !is_inter_mode(best_mbmode.mode)))) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010324 int skip_blk = 0;
10325 RD_STATS rd_stats_y, rd_stats_uv;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010326
10327 x->use_default_inter_tx_type = 0;
10328 x->use_default_intra_tx_type = 0;
10329
10330 *mbmi = best_mbmode;
10331
10332 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
10333
10334 // Select prediction reference frames.
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010335 for (i = 0; i < num_planes; i++) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010336 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
10337 if (has_second_ref(mbmi))
10338 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
10339 }
10340
10341 if (is_inter_mode(mbmi->mode)) {
Jingning Hanc44009c2017-05-06 11:36:49 -070010342 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
Sarah Parker19234cc2017-03-10 16:43:25 -080010343 if (mbmi->motion_mode == OBMC_CAUSAL) {
Fergus Simpson073c6f32017-02-17 12:13:48 -080010344 av1_build_obmc_inter_prediction(
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010345 cm, xd, mi_row, mi_col, args.above_pred_buf, args.above_pred_stride,
10346 args.left_pred_buf, args.left_pred_stride);
Sarah Parker19234cc2017-03-10 16:43:25 -080010347 }
Yaowu Xuf883b422016-08-30 14:01:10 -070010348 av1_subtract_plane(x, bsize, 0);
Rupert Swarbrick6f149692017-12-11 15:52:05 +000010349 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee51666862017-10-24 14:29:13 -070010350 // av1_rd_pick_inter_mode_sb
Yue Chen25dc0702017-10-18 23:36:06 -070010351 select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col,
10352 INT64_MAX);
Hui Su1ddf2312017-08-19 15:21:34 -070010353 assert(rd_stats_y.rate != INT_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010354 } else {
10355 int idx, idy;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010356 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010357 for (idy = 0; idy < xd->n8_h; ++idy)
10358 for (idx = 0; idx < xd->n8_w; ++idx)
10359 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010360 memset(x->blk_skip[0], rd_stats_y.skip,
Yaowu Xuc27fc142016-08-22 16:08:15 -070010361 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
10362 }
10363
Debargha Mukherjee51666862017-10-24 14:29:13 -070010364 inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010365 } else {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010366 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
Angie Chiang284d7772016-11-08 11:06:45 -080010367 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010368 }
10369
Urvang Joshi70006e42017-06-14 16:08:55 -070010370 if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010371 (rd_stats_y.dist + rd_stats_uv.dist)) >
Urvang Joshi70006e42017-06-14 16:08:55 -070010372 RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010373 skip_blk = 1;
Zoe Liu1eed2df2017-10-16 17:13:15 -070010374 rd_stats_y.rate = x->skip_cost[av1_get_skip_context(xd)][1];
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010375 rd_stats_uv.rate = 0;
10376 rd_stats_y.dist = rd_stats_y.sse;
10377 rd_stats_uv.dist = rd_stats_uv.sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010378 } else {
10379 skip_blk = 0;
Zoe Liu1eed2df2017-10-16 17:13:15 -070010380 rd_stats_y.rate += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010381 }
10382
Urvang Joshi70006e42017-06-14 16:08:55 -070010383 if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
10384 RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010385 (rd_stats_y.dist + rd_stats_uv.dist))) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010386 int idx, idy;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010387 best_mbmode.tx_type = mbmi->tx_type;
10388 best_mbmode.tx_size = mbmi->tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010389 for (idy = 0; idy < xd->n8_h; ++idy)
10390 for (idx = 0; idx < xd->n8_w; ++idx)
10391 best_mbmode.inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
10392
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010393 for (i = 0; i < num_planes; ++i)
Yaowu Xuc27fc142016-08-22 16:08:15 -070010394 memcpy(ctx->blk_skip[i], x->blk_skip[i],
10395 sizeof(uint8_t) * ctx->num_4x4_blk);
Jingning Hane67b38a2016-11-04 10:30:00 -070010396
10397 best_mbmode.min_tx_size = mbmi->min_tx_size;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010398 rd_cost->rate +=
10399 (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
10400 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
Urvang Joshi70006e42017-06-14 16:08:55 -070010401 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010402 best_skip2 = skip_blk;
10403 }
10404 }
10405
10406 // Only try palette mode when the best mode so far is an intra mode.
hui su9bc1d8d2017-03-24 12:36:03 -070010407 if (try_palette && !is_inter_mode(best_mbmode.mode)) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010408 int rate2 = 0;
Urvang Joshi451e0f22017-01-31 11:18:31 -080010409 int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd,
10410 best_model_rd_palette = INT64_MAX;
Urvang Joshi626591d2016-10-24 14:13:55 -070010411 int skippable = 0, rate_overhead_palette = 0;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010412 RD_STATS rd_stats_y;
hui sude0c70a2017-01-09 17:12:17 -080010413 TX_SIZE uv_tx;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010414 uint8_t *const best_palette_color_map =
10415 x->palette_buffer->best_palette_color_map;
10416 uint8_t *const color_map = xd->plane[0].color_index_map;
Hui Suefb755c2017-10-26 16:09:05 -070010417 MB_MODE_INFO best_mbmi_palette = *mbmi;
Jingning Hanc5c37032018-01-04 16:43:43 -080010418 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010419
10420 mbmi->mode = DC_PRED;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -040010421 mbmi->uv_mode = UV_DC_PRED;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010422 mbmi->ref_frame[0] = INTRA_FRAME;
Emil Keyder01770b32017-01-20 18:03:11 -050010423 mbmi->ref_frame[1] = NONE_FRAME;
Urvang Joshi626591d2016-10-24 14:13:55 -070010424 rate_overhead_palette = rd_pick_palette_intra_sby(
Hui Su4665f092018-01-17 18:10:06 -080010425 cpi, x, bsize, intra_mode_cost[DC_PRED], &best_mbmi_palette,
10426 best_palette_color_map, &best_rd_palette, &best_model_rd_palette, NULL,
10427 NULL, NULL, NULL, ctx, best_blk_skip);
Jingning Hanc5c37032018-01-04 16:43:43 -080010428
10429 memcpy(x->blk_skip[0], best_blk_skip,
10430 sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize));
10431
hui sude0c70a2017-01-09 17:12:17 -080010432 if (pmi->palette_size[0] == 0) goto PALETTE_EXIT;
10433 memcpy(color_map, best_palette_color_map,
10434 rows * cols * sizeof(best_palette_color_map[0]));
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010435 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
10436 if (rd_stats_y.rate == INT_MAX) goto PALETTE_EXIT;
Debargha Mukherjee80592c72017-12-16 08:23:34 -080010437 uv_tx = av1_get_tx_size(AOM_PLANE_U, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010438 if (rate_uv_intra[uv_tx] == INT_MAX) {
Luc Trudeau9d4cbb82017-07-27 17:01:32 -040010439 choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
Urvang Joshi368fbc92016-10-17 16:31:34 -070010440 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
10441 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010442 pmi_uv[uv_tx] = *pmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010443 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010444 }
10445 mbmi->uv_mode = mode_uv[uv_tx];
10446 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
hui sude0c70a2017-01-09 17:12:17 -080010447 if (pmi->palette_size[1] > 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010448 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
10449 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
10450 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
hui sude0c70a2017-01-09 17:12:17 -080010451 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010452 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010453 skippable = rd_stats_y.skip && skip_uvs[uv_tx];
10454 distortion2 = rd_stats_y.dist + dist_uvs[uv_tx];
10455 rate2 = rd_stats_y.rate + rate_overhead_palette + rate_uv_intra[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010456 rate2 += ref_costs_single[INTRA_FRAME];
10457
10458 if (skippable) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010459 rate2 -= (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
Zoe Liu1eed2df2017-10-16 17:13:15 -070010460 rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010461 } else {
Zoe Liu1eed2df2017-10-16 17:13:15 -070010462 rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010463 }
Urvang Joshi70006e42017-06-14 16:08:55 -070010464 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010465 if (this_rd < best_rd) {
10466 best_mode_index = 3;
10467 mbmi->mv[0].as_int = 0;
10468 rd_cost->rate = rate2;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010469 rd_cost->dist = distortion2;
10470 rd_cost->rdcost = this_rd;
10471 best_rd = this_rd;
10472 best_mbmode = *mbmi;
10473 best_skip2 = 0;
10474 best_mode_skippable = skippable;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010475 for (i = 0; i < num_planes; ++i)
Jingning Han12384b12018-01-05 11:38:08 -080010476 memcpy(ctx->blk_skip[i], x->blk_skip[i],
10477 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010478 }
10479 }
10480PALETTE_EXIT:
Zoe Liuf40a9572017-10-13 12:37:19 -070010481
10482#if CONFIG_EXT_SKIP
Zoe Liu8a5d3432017-11-30 16:33:44 -080010483 best_mbmode.skip_mode = 0;
10484 if (cm->skip_mode_flag &&
Zoe Liuf40a9572017-10-13 12:37:19 -070010485 !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
10486 is_comp_ref_allowed(bsize)) {
Zoe Liu104d62e2017-12-07 12:44:45 -080010487// Obtain the rdcost for skip_mode.
10488#if CONFIG_JNT_COMP
10489 x->compound_idx = 1; // COMPOUND_AVERAGE
Zoe Liu56644192017-12-19 13:16:18 -080010490#endif // CONFIG_JNT_COMP
10491 estimate_skip_mode_rdcost(cpi, tile_data, x, bsize, mi_row, mi_col,
10492 frame_mv, yv12_mb);
Zoe Liuf40a9572017-10-13 12:37:19 -070010493
Zoe Liu8a5d3432017-11-30 16:33:44 -080010494 if (x->skip_mode_rdcost >= 0 && x->skip_mode_rdcost < INT64_MAX) {
10495 // Update skip mode rdcost.
10496 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
10497 x->skip_mode_rate += x->skip_mode_cost[skip_mode_ctx][1];
10498 x->skip_mode_rdcost =
10499 RDCOST(x->rdmult, x->skip_mode_rate, x->skip_mode_dist);
Zoe Liuf40a9572017-10-13 12:37:19 -070010500
Zoe Liu8a5d3432017-11-30 16:33:44 -080010501 // Compare the use of skip_mode with the best intra/inter mode obtained.
10502 const int64_t best_intra_inter_mode_cost =
Yaowu Xu8f699e02017-12-28 14:34:48 -080010503 (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX)
10504 ? RDCOST(x->rdmult,
10505 rd_cost->rate + x->skip_mode_cost[skip_mode_ctx][0],
10506 rd_cost->dist)
10507 : INT64_MAX;
Zoe Liu8a5d3432017-11-30 16:33:44 -080010508
Zoe Liu50c810d2017-12-05 16:09:37 -080010509 if (x->skip_mode_rdcost <= best_intra_inter_mode_cost)
Zoe Liu8a5d3432017-11-30 16:33:44 -080010510 best_mbmode.skip_mode = 1;
10511 }
10512
10513 if (best_mbmode.skip_mode) {
Zoe Liuf40a9572017-10-13 12:37:19 -070010514 best_mbmode = *mbmi;
10515
10516 best_mbmode.skip_mode = best_mbmode.skip = 1;
10517 best_mbmode.mode = NEAREST_NEARESTMV;
10518 best_mbmode.ref_frame[0] = x->skip_mode_ref_frame[0];
10519 best_mbmode.ref_frame[1] = x->skip_mode_ref_frame[1];
10520 best_mbmode.mv[0].as_int = x->skip_mode_mv[0].as_int;
10521 best_mbmode.mv[1].as_int = x->skip_mode_mv[1].as_int;
Zoe Liu8a5d3432017-11-30 16:33:44 -080010522 best_mbmode.ref_mv_idx = 0;
Zoe Liuf40a9572017-10-13 12:37:19 -070010523
10524 // Set up tx_size related variables for skip-specific loop filtering.
10525 best_mbmode.tx_size = block_signals_txsize(bsize)
10526 ? tx_size_from_tx_mode(bsize, cm->tx_mode, 1)
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -080010527 : max_txsize_rect_lookup[1][bsize];
Zoe Liuf40a9572017-10-13 12:37:19 -070010528 {
10529 const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
10530 const int height = block_size_high[bsize] >> tx_size_high_log2[0];
10531 for (int idy = 0; idy < height; ++idy)
10532 for (int idx = 0; idx < width; ++idx)
10533 best_mbmode.inter_tx_size[idy >> 1][idx >> 1] = best_mbmode.tx_size;
10534 }
Debargha Mukherjee3ebb0d02017-12-14 05:05:18 -080010535 best_mbmode.min_tx_size = best_mbmode.tx_size;
Zoe Liuf40a9572017-10-13 12:37:19 -070010536 set_txfm_ctxs(best_mbmode.tx_size, xd->n8_w, xd->n8_h, best_mbmode.skip,
10537 xd);
10538
10539 // Set up color-related variables for skip mode.
10540 best_mbmode.uv_mode = UV_DC_PRED;
10541 best_mbmode.palette_mode_info.palette_size[0] = 0;
10542 best_mbmode.palette_mode_info.palette_size[1] = 0;
Zoe Liu104d62e2017-12-07 12:44:45 -080010543
10544#if CONFIG_JNT_COMP
10545 best_mbmode.comp_group_idx = 0;
10546 best_mbmode.compound_idx = x->compound_idx;
10547#endif // CONFIG_JNT_COMP
Zoe Liuf40a9572017-10-13 12:37:19 -070010548 best_mbmode.interinter_compound_type = COMPOUND_AVERAGE;
10549 best_mbmode.motion_mode = SIMPLE_TRANSLATION;
Zoe Liu104d62e2017-12-07 12:44:45 -080010550
10551 best_mbmode.interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
Zoe Liuf40a9572017-10-13 12:37:19 -070010552#if CONFIG_FILTER_INTRA
Yue Chen994dba22017-12-19 15:27:26 -080010553 best_mbmode.filter_intra_mode_info.use_filter_intra = 0;
Zoe Liuf40a9572017-10-13 12:37:19 -070010554#endif // CONFIG_FILTER_INTRA
10555
Zoe Liuf40a9572017-10-13 12:37:19 -070010556 set_default_interp_filters(&best_mbmode, cm->interp_filter);
10557
Zoe Liu8a5d3432017-11-30 16:33:44 -080010558 best_mode_index = x->skip_mode_index;
10559
Zoe Liuf40a9572017-10-13 12:37:19 -070010560 // Update rd_cost
Zoe Liu8a5d3432017-11-30 16:33:44 -080010561 rd_cost->rate = x->skip_mode_rate;
Zoe Liuf40a9572017-10-13 12:37:19 -070010562 rd_cost->dist = rd_cost->sse = x->skip_mode_dist;
10563 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
Zoe Liu8a5d3432017-11-30 16:33:44 -080010564
Zoe Liuf40a9572017-10-13 12:37:19 -070010565 best_rd = rd_cost->rdcost;
Zoe Liu8a5d3432017-11-30 16:33:44 -080010566 best_skip2 = 1;
10567 best_mode_skippable = (x->skip_mode_sse == 0);
Zoe Liuf40a9572017-10-13 12:37:19 -070010568
10569 x->skip = 1;
10570#if 0
Zoe Liu8a5d3432017-11-30 16:33:44 -080010571 // TODO(zoeliu): To investigate why following cause performance drop.
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010572 for (i = 0; i < num_planes; ++i) {
Zoe Liu8a5d3432017-11-30 16:33:44 -080010573 memset(x->blk_skip[i], x->skip, sizeof(uint8_t) * ctx->num_4x4_blk);
10574 memcpy(ctx->blk_skip[i], x->blk_skip[i],
Zoe Liuf40a9572017-10-13 12:37:19 -070010575 sizeof(uint8_t) * ctx->num_4x4_blk);
Zoe Liu8a5d3432017-11-30 16:33:44 -080010576 }
Zoe Liuf40a9572017-10-13 12:37:19 -070010577#endif // 0
10578 }
10579 }
10580#endif // CONFIG_EXT_SKIP
10581
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010582 // The inter modes' rate costs are not calculated precisely in some cases.
10583 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
10584 // GLOBALMV. Here, checks are added for those cases, and the mode decisions
10585 // are corrected.
Yunqing Wang62fe78e2017-12-22 16:03:43 -080010586 if (best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010587 const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
10588 best_mbmode.ref_frame[1] };
10589 int comp_pred_mode = refs[1] > INTRA_FRAME;
Sarah Parkere5299862016-08-16 14:57:37 -070010590 int_mv zeromv[2];
Yaowu Xuf883b422016-08-30 14:01:10 -070010591 const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
Luc Trudeau2eb9b842017-12-13 11:19:16 -050010592 zeromv[0].as_int =
10593 gm_get_motion_vector(&cm->global_motion[refs[0]],
10594 cm->allow_high_precision_mv, bsize, mi_col, mi_row
RogerZhou3b635242017-09-19 10:06:46 -070010595#if CONFIG_AMVR
Luc Trudeau2eb9b842017-12-13 11:19:16 -050010596 ,
10597 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010598#endif
Luc Trudeau2eb9b842017-12-13 11:19:16 -050010599 )
10600 .as_int;
RogerZhou10a03802017-10-26 11:49:48 -070010601 zeromv[1].as_int = comp_pred_mode
10602 ? gm_get_motion_vector(&cm->global_motion[refs[1]],
10603 cm->allow_high_precision_mv,
Luc Trudeau2eb9b842017-12-13 11:19:16 -050010604 bsize, mi_col, mi_row
RogerZhou3b635242017-09-19 10:06:46 -070010605#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -070010606 ,
10607 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010608#endif
RogerZhou10a03802017-10-26 11:49:48 -070010609 )
10610 .as_int
10611 : 0;
Angie Chiang6f90fb22017-12-20 16:30:20 -080010612
10613 // Check if the global motion mode is non-translational.
10614 int is_nontran_gm = cm->global_motion[refs[0]].wmtype <= TRANSLATION;
10615 if (comp_pred_mode)
10616 is_nontran_gm &= cm->global_motion[refs[1]].wmtype <= TRANSLATION;
10617 if (AOMMIN(block_size_wide[bsize], block_size_high[bsize]) < 8)
10618 is_nontran_gm = 1;
10619
Yaowu Xuc27fc142016-08-22 16:08:15 -070010620 if (!comp_pred_mode) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010621 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
Yaowu Xuf883b422016-08-30 14:01:10 -070010622 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
Yaowu Xuc27fc142016-08-22 16:08:15 -070010623 : INT_MAX;
10624
10625 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10626 int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10627 if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
10628 best_mbmode.mode = NEARMV;
10629 best_mbmode.ref_mv_idx = i;
10630 }
10631 }
10632
10633 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
10634 best_mbmode.mode = NEARESTMV;
Angie Chiang6f90fb22017-12-20 16:30:20 -080010635 else if (best_mbmode.mv[0].as_int == zeromv[0].as_int && is_nontran_gm)
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010636 best_mbmode.mode = GLOBALMV;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010637 } else {
10638 int_mv nearestmv[2];
10639 int_mv nearmv[2];
10640
Yaowu Xuc27fc142016-08-22 16:08:15 -070010641 if (mbmi_ext->ref_mv_count[rf_type] > 1) {
10642 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
10643 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
10644 } else {
10645 nearmv[0] = frame_mv[NEARMV][refs[0]];
10646 nearmv[1] = frame_mv[NEARMV][refs[1]];
10647 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010648 if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
10649 nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
10650 nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
10651 } else {
10652 nearestmv[0] = frame_mv[NEARESTMV][refs[0]];
10653 nearestmv[1] = frame_mv[NEARESTMV][refs[1]];
10654 }
10655
10656 if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +020010657 nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010658 best_mbmode.mode = NEAREST_NEARESTMV;
David Barker404b2e82017-03-27 13:07:47 +010010659 } else {
10660 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
10661 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
10662 : INT_MAX;
10663
10664 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10665 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10666 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
10667
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -070010668 // Try switching to the NEAR_NEARMV mode
10669 if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
David Barker404b2e82017-03-27 13:07:47 +010010670 nearmv[1].as_int == best_mbmode.mv[1].as_int) {
10671 best_mbmode.mode = NEAR_NEARMV;
10672 best_mbmode.ref_mv_idx = i;
10673 }
10674 }
10675
David Barker3dfba992017-04-03 16:10:09 +010010676 if (best_mbmode.mode == NEW_NEWMV &&
David Barker404b2e82017-03-27 13:07:47 +010010677 best_mbmode.mv[0].as_int == zeromv[0].as_int &&
Angie Chiang6f90fb22017-12-20 16:30:20 -080010678 best_mbmode.mv[1].as_int == zeromv[1].as_int && is_nontran_gm)
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010679 best_mbmode.mode = GLOBAL_GLOBALMV;
David Barker404b2e82017-03-27 13:07:47 +010010680 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010681 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010682 }
10683
David Barker9620bcd2017-03-22 14:46:42 +000010684 // Make sure that the ref_mv_idx is only nonzero when we're
10685 // using a mode which can support ref_mv_idx
10686 if (best_mbmode.ref_mv_idx != 0 &&
David Barker3dfba992017-04-03 16:10:09 +010010687 !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010688 have_nearmv_in_inter_mode(best_mbmode.mode))) {
David Barker9620bcd2017-03-22 14:46:42 +000010689 best_mbmode.ref_mv_idx = 0;
10690 }
10691
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010692 if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
Yunqing Wang876a8b02017-11-13 17:13:27 -080010693 best_mbmode.ref_frame[1] <= INTRA_FRAME
Zoe Liuf40a9572017-10-13 12:37:19 -070010694#if CONFIG_EXT_SKIP
10695 && !best_mbmode.skip_mode
10696#endif // CONFIG_EXT_SKIP
Yunqing Wang876a8b02017-11-13 17:13:27 -080010697 ) {
Jingning Han731af492016-11-17 11:53:23 -080010698 int8_t ref_frame_type = av1_ref_frame_type(best_mbmode.ref_frame);
10699 int16_t mode_ctx = mbmi_ext->mode_context[ref_frame_type];
David Barker68e6e862016-11-24 15:10:15 +000010700 if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010701 int_mv zeromv;
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010702 const MV_REFERENCE_FRAME ref = best_mbmode.ref_frame[0];
10703 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ref],
10704 cm->allow_high_precision_mv, bsize,
Luc Trudeau2eb9b842017-12-13 11:19:16 -050010705 mi_col, mi_row
RogerZhou3b635242017-09-19 10:06:46 -070010706#if CONFIG_AMVR
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010707 ,
RogerZhou10a03802017-10-26 11:49:48 -070010708 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010709#endif
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010710 )
10711 .as_int;
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010712 if (best_mbmode.mv[0].as_int == zeromv.as_int) {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010713 best_mbmode.mode = GLOBALMV;
David Barkercdcac6d2016-12-01 17:04:16 +000010714 }
David Barker68e6e862016-11-24 15:10:15 +000010715 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010716 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010717
10718 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
10719 rd_cost->rate = INT_MAX;
10720 rd_cost->rdcost = INT64_MAX;
10721 return;
10722 }
10723
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010724 assert((cm->interp_filter == SWITCHABLE) ||
10725 (cm->interp_filter ==
10726 av1_extract_interp_filter(best_mbmode.interp_filters, 0)) ||
10727 !is_inter_block(&best_mbmode));
Yaowu Xuc27fc142016-08-22 16:08:15 -070010728#if CONFIG_DUAL_FILTER
10729 assert((cm->interp_filter == SWITCHABLE) ||
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010730 (cm->interp_filter ==
10731 av1_extract_interp_filter(best_mbmode.interp_filters, 1)) ||
Yaowu Xuc27fc142016-08-22 16:08:15 -070010732 !is_inter_block(&best_mbmode));
Fergus Simpson4063a682017-02-28 16:52:22 -080010733#endif // CONFIG_DUAL_FILTER
Yaowu Xuc27fc142016-08-22 16:08:15 -070010734
10735 if (!cpi->rc.is_src_frame_alt_ref)
Yaowu Xuf883b422016-08-30 14:01:10 -070010736 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
10737 sf->adaptive_rd_thresh, bsize, best_mode_index);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010738
10739 // macroblock modes
10740 *mbmi = best_mbmode;
10741 x->skip |= best_skip2;
10742
Sebastien Alaiwan48795802017-10-30 12:07:13 +010010743 // Note: this section is needed since the mode may have been forced to
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010744 // GLOBALMV by the all-zero mode handling of ref-mv.
10745 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
10746 // Correct the motion mode for GLOBALMV
Sarah Parker0eea89f2017-07-11 11:56:36 -070010747 const MOTION_MODE last_motion_mode_allowed =
Luc Trudeau2eb9b842017-12-13 11:19:16 -050010748 motion_mode_allowed(xd->global_motion, xd, xd->mi[0]);
Angie Chiang6f90fb22017-12-20 16:30:20 -080010749 if (mbmi->motion_mode > last_motion_mode_allowed) assert(0);
Yue Chen19e7aa82016-11-30 14:05:39 -080010750 if (is_nontrans_global_motion(xd)) {
Angie Chiang6f90fb22017-12-20 16:30:20 -080010751 assert(mbmi->interp_filters ==
10752 av1_broadcast_interp_filter(
10753 av1_unswitchable_filter(cm->interp_filter)));
Yue Chen19e7aa82016-11-30 14:05:39 -080010754 }
10755 }
Yue Chen19e7aa82016-11-30 14:05:39 -080010756
Yaowu Xuc27fc142016-08-22 16:08:15 -070010757 for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
10758 if (mbmi->mode != NEWMV)
10759 mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
10760 else
10761 mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
10762 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010763
10764 for (i = 0; i < REFERENCE_MODES; ++i) {
10765 if (best_pred_rd[i] == INT64_MAX)
10766 best_pred_diff[i] = INT_MIN;
10767 else
10768 best_pred_diff[i] = best_rd - best_pred_rd[i];
10769 }
10770
10771 x->skip |= best_mode_skippable;
10772
10773 assert(best_mode_index >= 0);
10774
10775 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
10776 best_mode_skippable);
10777
Urvang Joshic9e71d42017-08-09 18:58:33 -070010778 if (pmi->palette_size[1] > 0) {
10779 assert(try_palette);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010780 restore_uv_color_map(cpi, x);
10781 }
10782}
10783
Urvang Joshi52648442016-10-13 17:27:51 -070010784void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
10785 TileDataEnc *tile_data, MACROBLOCK *x,
David Barker45390c12017-02-20 14:44:40 +000010786 int mi_row, int mi_col,
Angie Chiang2a2a7dd2017-04-25 16:08:47 -070010787 RD_STATS *rd_cost, BLOCK_SIZE bsize,
Yaowu Xuf883b422016-08-30 14:01:10 -070010788 PICK_MODE_CONTEXT *ctx,
10789 int64_t best_rd_so_far) {
Urvang Joshi52648442016-10-13 17:27:51 -070010790 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010791 MACROBLOCKD *const xd = &x->e_mbd;
10792 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10793 unsigned char segment_id = mbmi->segment_id;
10794 const int comp_pred = 0;
10795 int i;
10796 int64_t best_pred_diff[REFERENCE_MODES];
10797 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -070010798#if CONFIG_EXT_COMP_REFS
10799 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
10800#else
Yaowu Xuc27fc142016-08-22 16:08:15 -070010801 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -070010802#endif // CONFIG_EXT_COMP_REFS
Hui Su9d0c03d2017-12-27 16:05:23 -080010803 int *comp_inter_cost =
10804 x->comp_inter_cost[av1_get_reference_mode_context(cm, xd)];
James Zern7b9407a2016-05-18 23:48:05 -070010805 InterpFilter best_filter = SWITCHABLE;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010806 int64_t this_rd = INT64_MAX;
10807 int rate2 = 0;
10808 const int64_t distortion2 = 0;
David Barker45390c12017-02-20 14:44:40 +000010809 (void)mi_row;
10810 (void)mi_col;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010811
Zoe Liufa8bad12018-01-23 14:32:31 -080010812 av1_collect_neighbors_ref_counts(xd);
10813
Yue Chen170678a2017-10-17 13:43:10 -070010814 estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
Hui Su9d0c03d2017-12-27 16:05:23 -080010815 ref_costs_comp);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010816
10817 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
10818 for (i = LAST_FRAME; i < TOTAL_REFS_PER_FRAME; ++i)
10819 x->pred_mv_sad[i] = INT_MAX;
10820
10821 rd_cost->rate = INT_MAX;
10822
10823 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
10824
10825 mbmi->palette_mode_info.palette_size[0] = 0;
10826 mbmi->palette_mode_info.palette_size[1] = 0;
Urvang Joshib100db72016-10-12 16:28:56 -070010827
hui su5db97432016-10-14 16:10:14 -070010828#if CONFIG_FILTER_INTRA
Yue Chenb0571872017-12-18 18:12:59 -080010829 mbmi->filter_intra_mode_info.use_filter_intra = 0;
hui su5db97432016-10-14 16:10:14 -070010830#endif // CONFIG_FILTER_INTRA
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010831 mbmi->mode = GLOBALMV;
Yue Chencb60b182016-10-13 15:18:22 -070010832 mbmi->motion_mode = SIMPLE_TRANSLATION;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -040010833 mbmi->uv_mode = UV_DC_PRED;
David Barkerd92f3562017-10-09 17:46:23 +010010834 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
10835 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
10836 else
10837 mbmi->ref_frame[0] = LAST_FRAME;
Emil Keyder01770b32017-01-20 18:03:11 -050010838 mbmi->ref_frame[1] = NONE_FRAME;
Sarah Parkere5299862016-08-16 14:57:37 -070010839 mbmi->mv[0].as_int =
Sarah Parkerae7c4582017-02-28 16:30:30 -080010840 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
Luc Trudeau2eb9b842017-12-13 11:19:16 -050010841 cm->allow_high_precision_mv, bsize, mi_col, mi_row
RogerZhou3b635242017-09-19 10:06:46 -070010842#if CONFIG_AMVR
10843 ,
RogerZhou10a03802017-10-26 11:49:48 -070010844 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010845#endif
10846 )
David Barkercdcac6d2016-12-01 17:04:16 +000010847 .as_int;
Jingning Han64088952016-07-11 11:24:24 -070010848 mbmi->tx_size = max_txsize_lookup[bsize];
Yaowu Xuee775b12016-10-18 10:00:21 -070010849 x->skip = 1;
Sarah Parkere5299862016-08-16 14:57:37 -070010850
Yaowu Xuc27fc142016-08-22 16:08:15 -070010851 mbmi->ref_mv_idx = 0;
10852 mbmi->pred_mv[0].as_int = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010853
Yue Chendead17d2017-04-20 16:19:39 -070010854 mbmi->motion_mode = SIMPLE_TRANSLATION;
Yue Chendead17d2017-04-20 16:19:39 -070010855 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
Yue Chenf3e1ead2017-04-21 14:05:51 -070010856 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
10857 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
10858 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
Yunqing Wangd3c13e82018-01-02 17:52:56 -080010859#if CONFIG_EXT_WARPED_MOTION
10860 // Select the samples according to motion vector difference
10861 if (mbmi->num_proj_ref[0] > 1)
10862 mbmi->num_proj_ref[0] = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
10863 mbmi->num_proj_ref[0], bsize);
Yunqing Wang97d6a372017-10-09 14:15:15 -070010864#endif // CONFIG_EXT_WARPED_MOTION
Yue Chenf3e1ead2017-04-21 14:05:51 -070010865 }
Yue Chendead17d2017-04-20 16:19:39 -070010866
Debargha Mukherjee0df711f2017-05-02 16:00:20 -070010867 set_default_interp_filters(mbmi, cm->interp_filter);
10868
10869 if (cm->interp_filter != SWITCHABLE) {
10870 best_filter = cm->interp_filter;
10871 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010872 best_filter = EIGHTTAP_REGULAR;
Debargha Mukherjee0df711f2017-05-02 16:00:20 -070010873 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) &&
Yaowu Xuc27fc142016-08-22 16:08:15 -070010874 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
10875 int rs;
10876 int best_rs = INT_MAX;
10877 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010878 mbmi->interp_filters = av1_broadcast_interp_filter(i);
Yue Chenb23d00a2017-07-28 17:01:21 -070010879 rs = av1_get_switchable_rate(cm, x, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010880 if (rs < best_rs) {
10881 best_rs = rs;
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010882 best_filter = av1_extract_interp_filter(mbmi->interp_filters, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010883 }
10884 }
10885 }
10886 }
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010887 // Set the appropriate filter
10888 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
Yue Chenb23d00a2017-07-28 17:01:21 -070010889 rate2 += av1_get_switchable_rate(cm, x, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010890
10891 if (cm->reference_mode == REFERENCE_MODE_SELECT)
Hui Su9d0c03d2017-12-27 16:05:23 -080010892 rate2 += comp_inter_cost[comp_pred];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010893
10894 // Estimate the reference frame signaling cost and add it
10895 // to the rolling cost variable.
10896 rate2 += ref_costs_single[LAST_FRAME];
Urvang Joshi70006e42017-06-14 16:08:55 -070010897 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010898
10899 rd_cost->rate = rate2;
10900 rd_cost->dist = distortion2;
10901 rd_cost->rdcost = this_rd;
Yushin Cho8e75e8b2017-09-12 16:33:28 -070010902
Yaowu Xuc27fc142016-08-22 16:08:15 -070010903 if (this_rd >= best_rd_so_far) {
10904 rd_cost->rate = INT_MAX;
10905 rd_cost->rdcost = INT64_MAX;
10906 return;
10907 }
10908
Yaowu Xuc27fc142016-08-22 16:08:15 -070010909 assert((cm->interp_filter == SWITCHABLE) ||
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010910 (cm->interp_filter ==
10911 av1_extract_interp_filter(mbmi->interp_filters, 0)));
Yaowu Xuc27fc142016-08-22 16:08:15 -070010912
Yaowu Xuf883b422016-08-30 14:01:10 -070010913 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010914 cpi->sf.adaptive_rd_thresh, bsize, THR_GLOBALMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010915
Yaowu Xuf883b422016-08-30 14:01:10 -070010916 av1_zero(best_pred_diff);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010917
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010918 store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010919}
10920
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010921struct calc_target_weighted_pred_ctxt {
10922 const MACROBLOCK *x;
10923 const uint8_t *tmp;
10924 int tmp_stride;
10925 int overlap;
10926};
10927
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010928static INLINE void calc_target_weighted_pred_above(
10929 MACROBLOCKD *xd, int rel_mi_col, uint8_t nb_mi_width, MODE_INFO *nb_mi,
10930 void *fun_ctxt, const int num_planes) {
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010931 (void)nb_mi;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010932 (void)num_planes;
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010933
10934 struct calc_target_weighted_pred_ctxt *ctxt =
10935 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
10936
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010937 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010938
10939 const int bw = xd->n8_w << MI_SIZE_LOG2;
10940 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
10941
10942 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE);
10943 int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE);
10944 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
10945
10946 if (!is_hbd) {
10947 for (int row = 0; row < ctxt->overlap; ++row) {
10948 const uint8_t m0 = mask1d[row];
10949 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10950 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
10951 wsrc[col] = m1 * tmp[col];
10952 mask[col] = m0;
10953 }
10954 wsrc += bw;
10955 mask += bw;
10956 tmp += ctxt->tmp_stride;
10957 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010958 } else {
10959 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
10960
10961 for (int row = 0; row < ctxt->overlap; ++row) {
10962 const uint8_t m0 = mask1d[row];
10963 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10964 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
10965 wsrc[col] = m1 * tmp16[col];
10966 mask[col] = m0;
10967 }
10968 wsrc += bw;
10969 mask += bw;
10970 tmp16 += ctxt->tmp_stride;
10971 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010972 }
10973}
10974
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010975static INLINE void calc_target_weighted_pred_left(
10976 MACROBLOCKD *xd, int rel_mi_row, uint8_t nb_mi_height, MODE_INFO *nb_mi,
10977 void *fun_ctxt, const int num_planes) {
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010978 (void)nb_mi;
Imdad Sardharwallaaf8e2642018-01-19 11:46:34 +000010979 (void)num_planes;
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010980
10981 struct calc_target_weighted_pred_ctxt *ctxt =
10982 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
10983
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010984 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010985
10986 const int bw = xd->n8_w << MI_SIZE_LOG2;
10987 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
10988
10989 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
10990 int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw);
10991 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
10992
10993 if (!is_hbd) {
10994 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
10995 for (int col = 0; col < ctxt->overlap; ++col) {
10996 const uint8_t m0 = mask1d[col];
10997 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
10998 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
10999 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
11000 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
11001 }
11002 wsrc += bw;
11003 mask += bw;
11004 tmp += ctxt->tmp_stride;
11005 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011006 } else {
11007 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
11008
11009 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
11010 for (int col = 0; col < ctxt->overlap; ++col) {
11011 const uint8_t m0 = mask1d[col];
11012 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
11013 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
11014 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
11015 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
11016 }
11017 wsrc += bw;
11018 mask += bw;
11019 tmp16 += ctxt->tmp_stride;
11020 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011021 }
11022}
11023
Yaowu Xuf883b422016-08-30 14:01:10 -070011024// This function has a structure similar to av1_build_obmc_inter_prediction
Yaowu Xuc27fc142016-08-22 16:08:15 -070011025//
11026// The OBMC predictor is computed as:
11027//
11028// PObmc(x,y) =
Yaowu Xuf883b422016-08-30 14:01:10 -070011029// AOM_BLEND_A64(Mh(x),
11030// AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
Yaowu Xuc27fc142016-08-22 16:08:15 -070011031// PLeft(x, y))
11032//
Yaowu Xuf883b422016-08-30 14:01:10 -070011033// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
Yaowu Xuc27fc142016-08-22 16:08:15 -070011034// rounding, this can be written as:
11035//
Yaowu Xuf883b422016-08-30 14:01:10 -070011036// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
Yaowu Xuc27fc142016-08-22 16:08:15 -070011037// Mh(x) * Mv(y) * P(x,y) +
11038// Mh(x) * Cv(y) * Pabove(x,y) +
Yaowu Xuf883b422016-08-30 14:01:10 -070011039// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011040//
11041// Where :
11042//
Yaowu Xuf883b422016-08-30 14:01:10 -070011043// Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
11044// Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011045//
11046// This function computes 'wsrc' and 'mask' as:
11047//
11048// wsrc(x, y) =
Yaowu Xuf883b422016-08-30 14:01:10 -070011049// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
Yaowu Xuc27fc142016-08-22 16:08:15 -070011050// Mh(x) * Cv(y) * Pabove(x,y) +
Yaowu Xuf883b422016-08-30 14:01:10 -070011051// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011052//
11053// mask(x, y) = Mh(x) * Mv(y)
11054//
11055// These can then be used to efficiently approximate the error for any
11056// predictor P in the context of the provided neighbouring predictors by
11057// computing:
11058//
11059// error(x, y) =
Yaowu Xuf883b422016-08-30 14:01:10 -070011060// wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011061//
Yaowu Xuf883b422016-08-30 14:01:10 -070011062static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
Yaowu Xuc27fc142016-08-22 16:08:15 -070011063 const MACROBLOCKD *xd, int mi_row,
11064 int mi_col, const uint8_t *above,
11065 int above_stride, const uint8_t *left,
Yue Chene9638cc2016-10-10 12:37:54 -070011066 int left_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070011067 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
Jingning Hanff6ee6a2016-12-07 09:55:21 -080011068 const int bw = xd->n8_w << MI_SIZE_LOG2;
11069 const int bh = xd->n8_h << MI_SIZE_LOG2;
Yue Chene9638cc2016-10-10 12:37:54 -070011070 int32_t *mask_buf = x->mask_buf;
11071 int32_t *wsrc_buf = x->wsrc_buf;
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011072
Yaowu Xuf883b422016-08-30 14:01:10 -070011073 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011074 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011075
11076 // plane 0 should not be subsampled
11077 assert(xd->plane[0].subsampling_x == 0);
11078 assert(xd->plane[0].subsampling_y == 0);
11079
Yaowu Xuf883b422016-08-30 14:01:10 -070011080 av1_zero_array(wsrc_buf, bw * bh);
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011081 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011082
11083 // handle above row
11084 if (xd->up_available) {
Jingning Han440d4252017-07-24 14:07:34 -070011085 const int overlap =
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011086 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
11087 struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride,
11088 overlap };
11089 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, mi_col,
11090 max_neighbor_obmc[b_width_log2_lookup[bsize]],
11091 calc_target_weighted_pred_above, &ctxt);
Yaowu Xuc27fc142016-08-22 16:08:15 -070011092 }
11093
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011094 for (int i = 0; i < bw * bh; ++i) {
Yaowu Xuf883b422016-08-30 14:01:10 -070011095 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
11096 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011097 }
11098
11099 // handle left column
11100 if (xd->left_available) {
Jingning Han440d4252017-07-24 14:07:34 -070011101 const int overlap =
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011102 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
11103 struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride,
11104 overlap };
11105 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, mi_row,
11106 max_neighbor_obmc[b_height_log2_lookup[bsize]],
11107 calc_target_weighted_pred_left, &ctxt);
Yaowu Xuc27fc142016-08-22 16:08:15 -070011108 }
11109
11110 if (!is_hbd) {
11111 const uint8_t *src = x->plane[0].src.buf;
11112
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011113 for (int row = 0; row < bh; ++row) {
11114 for (int col = 0; col < bw; ++col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070011115 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
11116 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011117 wsrc_buf += bw;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011118 src += x->plane[0].src.stride;
11119 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070011120 } else {
11121 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
11122
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011123 for (int row = 0; row < bh; ++row) {
11124 for (int col = 0; col < bw; ++col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070011125 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
11126 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011127 wsrc_buf += bw;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011128 src += x->plane[0].src.stride;
11129 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070011130 }
11131}