blob: 3ad9e392170131100bc93fc9776ff0134724341c [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <assert.h>
13#include <math.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_dsp_rtcd.h"
Jingning Han1aab8182016-06-03 11:09:06 -070016#include "./av1_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070017
Yaowu Xuf883b422016-08-30 14:01:10 -070018#include "aom_dsp/aom_dsp_common.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "aom_dsp/blend.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070020#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070021#include "aom_ports/mem.h"
22#include "aom_ports/system_state.h"
23
David Michael Barr5b2021e2017-08-17 18:12:39 +090024#if CONFIG_CFL
25#include "av1/common/cfl.h"
26#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -070027#include "av1/common/common.h"
28#include "av1/common/common_data.h"
29#include "av1/common/entropy.h"
30#include "av1/common/entropymode.h"
31#include "av1/common/idct.h"
32#include "av1/common/mvref_common.h"
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010033#include "av1/common/obmc.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070034#include "av1/common/pred_common.h"
35#include "av1/common/quant_common.h"
36#include "av1/common/reconinter.h"
37#include "av1/common/reconintra.h"
38#include "av1/common/scan.h"
39#include "av1/common/seg_common.h"
Angie Chiang47e4b362017-03-24 11:25:10 -070040#if CONFIG_LV_MAP
41#include "av1/common/txb_common.h"
42#endif
Yue Chen69f18e12016-09-08 14:48:15 -070043#include "av1/common/warped_motion.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070044
Jingning Han1aab8182016-06-03 11:09:06 -070045#include "av1/encoder/aq_variance.h"
Tom Finegan17ce8b12017-02-08 12:46:31 -080046#include "av1/encoder/av1_quantize.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070047#include "av1/encoder/cost.h"
48#include "av1/encoder/encodemb.h"
49#include "av1/encoder/encodemv.h"
50#include "av1/encoder/encoder.h"
Angie Chiang47e4b362017-03-24 11:25:10 -070051#if CONFIG_LV_MAP
52#include "av1/encoder/encodetxb.h"
53#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -070054#include "av1/encoder/hybrid_fwd_txfm.h"
55#include "av1/encoder/mcomp.h"
56#include "av1/encoder/palette.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070057#include "av1/encoder/ratectrl.h"
58#include "av1/encoder/rd.h"
59#include "av1/encoder/rdopt.h"
Debargha Mukherjeeceebb702016-10-11 05:26:50 -070060#include "av1/encoder/tokenize.h"
Alexander Bokov0c7eb102017-09-07 18:49:00 -070061#include "av1/encoder/tx_prune_model_weights.h"
Yushin Chod0b77ac2017-10-20 17:33:16 -070062
Yaowu Xuc27fc142016-08-22 16:08:15 -070063#if CONFIG_DUAL_FILTER
Angie Chiang5678ad92016-11-21 09:38:40 -080064#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
Angie Chiangaadbb022017-06-01 16:08:03 -070065#if USE_EXTRA_FILTER
Angie Chiang5678ad92016-11-21 09:38:40 -080066static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
Angie Chiangd91ab372016-11-21 18:16:49 -080067 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
68 { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
69 { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
Yaowu Xuc27fc142016-08-22 16:08:15 -070070};
Angie Chiangaadbb022017-06-01 16:08:03 -070071#else // USE_EXTRA_FILTER
72static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
73 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
74 { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
75};
76#endif // USE_EXTRA_FILTER
Angie Chiang5678ad92016-11-21 09:38:40 -080077#endif // CONFIG_DUAL_FILTER
Yaowu Xuc27fc142016-08-22 16:08:15 -070078
Zoe Liue9b15e22017-07-19 15:53:01 -070079#define LAST_FRAME_MODE_MASK \
80 ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
81 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
82 (1 << ALTREF_FRAME))
83#define LAST2_FRAME_MODE_MASK \
84 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
85 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
86 (1 << ALTREF_FRAME))
87#define LAST3_FRAME_MODE_MASK \
88 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
89 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
90 (1 << ALTREF_FRAME))
91#define GOLDEN_FRAME_MODE_MASK \
92 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
93 (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
94 (1 << ALTREF_FRAME))
95#define BWDREF_FRAME_MODE_MASK \
96 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
97 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF2_FRAME) | \
98 (1 << ALTREF_FRAME))
99#define ALTREF2_FRAME_MODE_MASK \
100 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
101 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
102 (1 << ALTREF_FRAME))
103#define ALTREF_FRAME_MODE_MASK \
104 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
105 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
106 (1 << ALTREF2_FRAME))
107
Zoe Liuc082bbc2017-05-17 13:31:37 -0700108#if CONFIG_EXT_COMP_REFS
Zoe Liuac889702017-08-23 14:22:58 -0700109#define SECOND_REF_FRAME_MASK \
110 ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | \
111 (1 << GOLDEN_FRAME) | (1 << LAST2_FRAME) | 0x01)
Zoe Liu3ac20932017-08-30 16:35:55 -0700112#else // !CONFIG_EXT_COMP_REFS
Zoe Liue9b15e22017-07-19 15:53:01 -0700113#define SECOND_REF_FRAME_MASK \
114 ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | 0x01)
Zoe Liuc082bbc2017-05-17 13:31:37 -0700115#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -0700116
117#define MIN_EARLY_TERM_INDEX 3
118#define NEW_MV_DISCOUNT_FACTOR 8
119
120#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -0700121#define ANGLE_SKIP_THRESH 10
122#define FILTER_FAST_SEARCH 1
123#endif // CONFIG_EXT_INTRA
124
James Zern67932792017-08-21 11:13:19 -0700125static const double ADST_FLIP_SVM[8] = {
126 /* vertical */
127 -6.6623, -2.8062, -3.2531, 3.1671,
128 /* horizontal */
129 -7.7051, -3.2234, -3.6193, 3.4533
130};
Yaowu Xuc27fc142016-08-22 16:08:15 -0700131
132typedef struct {
133 PREDICTION_MODE mode;
134 MV_REFERENCE_FRAME ref_frame[2];
135} MODE_DEFINITION;
136
137typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
138
139struct rdcost_block_args {
Yaowu Xuf883b422016-08-30 14:01:10 -0700140 const AV1_COMP *cpi;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700141 MACROBLOCK *x;
142 ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
143 ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
Angie Chiang7c2b7f22016-11-07 16:00:00 -0800144 RD_STATS rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700145 int64_t this_rd;
146 int64_t best_rd;
147 int exit_early;
148 int use_fast_coef_costing;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700149};
150
151#define LAST_NEW_MV_INDEX 6
Yaowu Xuf883b422016-08-30 14:01:10 -0700152static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
Emil Keyder01770b32017-01-20 18:03:11 -0500153 { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500154 { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
155 { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
156 { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700157 { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500158 { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
159 { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700160
Emil Keyder01770b32017-01-20 18:03:11 -0500161 { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700162
Emil Keyder01770b32017-01-20 18:03:11 -0500163 { NEWMV, { LAST_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500164 { NEWMV, { LAST2_FRAME, NONE_FRAME } },
165 { NEWMV, { LAST3_FRAME, NONE_FRAME } },
166 { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700167 { NEWMV, { ALTREF2_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500168 { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
169 { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700170
Emil Keyder01770b32017-01-20 18:03:11 -0500171 { NEARMV, { LAST_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500172 { NEARMV, { LAST2_FRAME, NONE_FRAME } },
173 { NEARMV, { LAST3_FRAME, NONE_FRAME } },
174 { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700175 { NEARMV, { ALTREF2_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500176 { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
177 { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700178
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700179 { GLOBALMV, { LAST_FRAME, NONE_FRAME } },
180 { GLOBALMV, { LAST2_FRAME, NONE_FRAME } },
181 { GLOBALMV, { LAST3_FRAME, NONE_FRAME } },
182 { GLOBALMV, { BWDREF_FRAME, NONE_FRAME } },
183 { GLOBALMV, { ALTREF2_FRAME, NONE_FRAME } },
184 { GLOBALMV, { GOLDEN_FRAME, NONE_FRAME } },
185 { GLOBALMV, { ALTREF_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700186
Sebastien Alaiwan34d55662017-11-15 09:36:03 +0100187 // TODO(zoeliu): May need to reconsider the order on the modes to check
Zoe Liu85b66462017-04-20 14:28:19 -0700188
Yaowu Xuc27fc142016-08-22 16:08:15 -0700189 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700190 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
191 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700192 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700193 { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
194 { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
195 { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
196 { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700197 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
198 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
199 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
200 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
Zoe Liuc082bbc2017-05-17 13:31:37 -0700201
202#if CONFIG_EXT_COMP_REFS
203 { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
Zoe Liufcf5fa22017-06-26 16:00:38 -0700204 { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
Zoe Liuc082bbc2017-05-17 13:31:37 -0700205 { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
206 { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
207#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -0700208
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700209 { PAETH_PRED, { INTRA_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700210
Emil Keyder01770b32017-01-20 18:03:11 -0500211 { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
Urvang Joshie6ca8e82017-03-15 14:57:41 -0700212 { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
213 { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
Urvang Joshi6be4a542016-11-03 15:24:05 -0700214
Yaowu Xuc27fc142016-08-22 16:08:15 -0700215 { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
216 { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
217 { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
218 { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
219 { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
220 { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700221 { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700222
Yaowu Xuc27fc142016-08-22 16:08:15 -0700223 { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
224 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
225 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
226 { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
227 { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
228 { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700229 { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700230
Yaowu Xuc27fc142016-08-22 16:08:15 -0700231 { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
232 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
233 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
234 { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
235 { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
236 { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700237 { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700238
Yaowu Xuc27fc142016-08-22 16:08:15 -0700239 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
240 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
241 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
242 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
243 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
244 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700245 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700246
Yaowu Xuc27fc142016-08-22 16:08:15 -0700247 { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
248 { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
249 { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
250 { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
251 { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
252 { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700253 { GLOBAL_GLOBALMV, { LAST_FRAME, BWDREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700254
Yaowu Xuc27fc142016-08-22 16:08:15 -0700255 { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
256 { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
257 { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
258 { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
259 { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
260 { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700261 { GLOBAL_GLOBALMV, { LAST2_FRAME, BWDREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700262
Yaowu Xuc27fc142016-08-22 16:08:15 -0700263 { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
264 { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
265 { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
266 { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
267 { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
268 { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700269 { GLOBAL_GLOBALMV, { LAST3_FRAME, BWDREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700270
Yaowu Xuc27fc142016-08-22 16:08:15 -0700271 { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
272 { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
273 { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
274 { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
275 { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
276 { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700277 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, BWDREF_FRAME } },
Zoe Liuc082bbc2017-05-17 13:31:37 -0700278
Zoe Liue9b15e22017-07-19 15:53:01 -0700279 { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
280 { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
281 { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
282 { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
283 { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
284 { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700285 { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700286
287 { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
288 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
289 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
290 { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
291 { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
292 { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700293 { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700294
295 { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
296 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
297 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
298 { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
299 { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
300 { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700301 { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700302
303 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
304 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
305 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
306 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
307 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
308 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700309 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700310
Emil Keyder01770b32017-01-20 18:03:11 -0500311 { H_PRED, { INTRA_FRAME, NONE_FRAME } },
312 { V_PRED, { INTRA_FRAME, NONE_FRAME } },
313 { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
314 { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
315 { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
316 { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
317 { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
318 { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700319
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700320 { GLOBALMV, { LAST_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700321 { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
322 { NEARMV, { LAST_FRAME, INTRA_FRAME } },
323 { NEWMV, { LAST_FRAME, INTRA_FRAME } },
324
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700325 { GLOBALMV, { LAST2_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700326 { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
327 { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
328 { NEWMV, { LAST2_FRAME, INTRA_FRAME } },
329
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700330 { GLOBALMV, { LAST3_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700331 { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
332 { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
333 { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700334
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700335 { GLOBALMV, { GOLDEN_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700336 { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
337 { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
338 { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
339
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700340 { GLOBALMV, { BWDREF_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700341 { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
342 { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
343 { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700344
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700345 { GLOBALMV, { ALTREF2_FRAME, INTRA_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700346 { NEARESTMV, { ALTREF2_FRAME, INTRA_FRAME } },
347 { NEARMV, { ALTREF2_FRAME, INTRA_FRAME } },
348 { NEWMV, { ALTREF2_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700349
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700350 { GLOBALMV, { ALTREF_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700351 { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
352 { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
353 { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
Debargha Mukherjeec1077e92017-11-06 20:17:33 -0800354
355#if CONFIG_EXT_COMP_REFS
356 { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
357 { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
358 { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
359 { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
360 { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
361 { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
362 { GLOBAL_GLOBALMV, { LAST_FRAME, LAST2_FRAME } },
363
364 { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
365 { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
366 { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
367 { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
368 { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
369 { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
370 { GLOBAL_GLOBALMV, { LAST_FRAME, LAST3_FRAME } },
371
372 { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
373 { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
374 { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
375 { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
376 { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
377 { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
378 { GLOBAL_GLOBALMV, { LAST_FRAME, GOLDEN_FRAME } },
379
380 { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
381 { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
382 { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
383 { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
384 { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
385 { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
386 { GLOBAL_GLOBALMV, { BWDREF_FRAME, ALTREF_FRAME } },
387#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -0700388};
389
hui su8a516a82017-07-06 10:00:36 -0700390static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700391 DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED,
Urvang Joshib7301cd2017-11-09 15:08:56 -0800392 SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D207_PRED, D153_PRED,
393 D63_PRED, D117_PRED, D45_PRED,
hui su8a516a82017-07-06 10:00:36 -0700394};
395
Luc Trudeaud6d9eee2017-07-12 12:36:50 -0400396#if CONFIG_CFL
397static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
Urvang Joshib7301cd2017-11-09 15:08:56 -0800398 UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED,
399 UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
400 UV_D135_PRED, UV_D207_PRED, UV_D153_PRED, UV_D63_PRED,
401 UV_D117_PRED, UV_D45_PRED,
Luc Trudeaud6d9eee2017-07-12 12:36:50 -0400402};
403#else
404#define uv_rd_search_mode_order intra_rd_search_mode_order
405#endif // CONFIG_CFL
406
Yaowu Xuc27fc142016-08-22 16:08:15 -0700407static INLINE int write_uniform_cost(int n, int v) {
hui su37499292017-04-26 09:49:53 -0700408 const int l = get_unsigned_bits(n);
409 const int m = (1 << l) - n;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700410 if (l == 0) return 0;
411 if (v < m)
Yaowu Xuf883b422016-08-30 14:01:10 -0700412 return (l - 1) * av1_cost_bit(128, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700413 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700414 return l * av1_cost_bit(128, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700415}
416
417// constants for prune 1 and prune 2 decision boundaries
418#define FAST_EXT_TX_CORR_MID 0.0
419#define FAST_EXT_TX_EDST_MID 0.1
420#define FAST_EXT_TX_CORR_MARGIN 0.5
421#define FAST_EXT_TX_EDST_MARGIN 0.3
422
Debargha Mukherjee51666862017-10-24 14:29:13 -0700423int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
424 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast);
425int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
426 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast);
427
Yushin Cho2f025aa2017-09-28 17:39:21 -0700428static unsigned pixel_dist_visible_only(
429 const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src,
430 const int src_stride, const uint8_t *dst, const int dst_stride,
431 const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows,
432 int visible_cols) {
433 unsigned sse;
434
Debargha Mukherjee35a4db32017-11-14 11:58:16 -0800435 if (txb_rows == visible_rows && txb_cols == visible_cols) {
Yushin Cho2f025aa2017-09-28 17:39:21 -0700436 cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
437 return sse;
438 }
439#if CONFIG_HIGHBITDEPTH
440 const MACROBLOCKD *xd = &x->e_mbd;
441
442 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
443 uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
444 visible_cols, visible_rows);
445 return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2);
446 }
447#else
448 (void)x;
449#endif // CONFIG_HIGHBITDEPTH
450 sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols,
451 visible_rows);
452 return sse;
453}
454
Yushin Choe30a47c2017-08-15 13:08:30 -0700455#if CONFIG_DIST_8X8
Yushin Choc49177e2017-07-18 17:18:09 -0700456static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
457 int sstride, int coeff_shift) {
458 uint64_t svar = 0;
459 uint64_t dvar = 0;
460 uint64_t sum_s = 0;
461 uint64_t sum_d = 0;
462 uint64_t sum_s2 = 0;
463 uint64_t sum_d2 = 0;
464 uint64_t sum_sd = 0;
465 uint64_t dist = 0;
466
467 int i, j;
468 for (i = 0; i < 8; i++) {
469 for (j = 0; j < 8; j++) {
470 sum_s += src[i * sstride + j];
471 sum_d += dst[i * dstride + j];
472 sum_s2 += src[i * sstride + j] * src[i * sstride + j];
473 sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
474 sum_sd += src[i * sstride + j] * dst[i * dstride + j];
475 }
476 }
477 /* Compute the variance -- the calculation cannot go negative. */
478 svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
479 dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
480
481 // Tuning of jm's original dering distortion metric used in CDEF tool,
482 // suggested by jm
483 const uint64_t a = 4;
484 const uint64_t b = 2;
485 const uint64_t c1 = (400 * a << 2 * coeff_shift);
486 const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift);
487
488 dist =
489 (uint64_t)floor(.5 +
490 (sum_d2 + sum_s2 - 2 * sum_sd) * .5 * (svar + dvar + c1) /
491 (sqrt(svar * (double)dvar + c2)));
492
493 // Calibrate dist to have similar rate for the same QP with MSE only
494 // distortion (as in master branch)
495 dist = (uint64_t)((float)dist * 0.75);
496
497 return dist;
498}
Yushin Choc49177e2017-07-18 17:18:09 -0700499
Yushin Chob7b60c52017-07-14 16:18:52 -0700500static int od_compute_var_4x4(uint16_t *x, int stride) {
Yushin Cho7a428ba2017-01-12 16:28:49 -0800501 int sum;
502 int s2;
503 int i;
504 sum = 0;
505 s2 = 0;
506 for (i = 0; i < 4; i++) {
507 int j;
508 for (j = 0; j < 4; j++) {
509 int t;
510
511 t = x[i * stride + j];
512 sum += t;
513 s2 += t * t;
514 }
515 }
Yushin Chob7b60c52017-07-14 16:18:52 -0700516
Yushin Cho7a428ba2017-01-12 16:28:49 -0800517 return (s2 - (sum * sum >> 4)) >> 4;
518}
519
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500520/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
521 the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
522 is applied both horizontally and vertically. For X=5, the filter is
523 a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
524#define OD_DIST_LP_MID (5)
525#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
526
Yushin Chob7b60c52017-07-14 16:18:52 -0700527static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
528 uint16_t *y, od_coeff *e_lp, int stride) {
Yushin Cho7a428ba2017-01-12 16:28:49 -0800529 double sum;
530 int min_var;
531 double mean_var;
532 double var_stat;
533 double activity;
534 double calibration;
535 int i;
536 int j;
537 double vardist;
Yushin Cho7a428ba2017-01-12 16:28:49 -0800538
539 vardist = 0;
Yushin Chob7b60c52017-07-14 16:18:52 -0700540
Yushin Cho7a428ba2017-01-12 16:28:49 -0800541#if 1
542 min_var = INT_MAX;
543 mean_var = 0;
544 for (i = 0; i < 3; i++) {
545 for (j = 0; j < 3; j++) {
546 int varx;
547 int vary;
548 varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
549 vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
550 min_var = OD_MINI(min_var, varx);
551 mean_var += 1. / (1 + varx);
552 /* The cast to (double) is to avoid an overflow before the sqrt.*/
553 vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
554 }
555 }
556 /* We use a different variance statistic depending on whether activity
James Zern89a015b2017-08-08 12:39:00 -0400557 masking is used, since the harmonic mean appeared slightly worse with
Yushin Cho7a428ba2017-01-12 16:28:49 -0800558 masking off. The calibration constant just ensures that we preserve the
559 rate compared to activity=1. */
560 if (use_activity_masking) {
561 calibration = 1.95;
562 var_stat = 9. / mean_var;
563 } else {
564 calibration = 1.62;
565 var_stat = min_var;
566 }
567 /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
568 activity masking constant. */
569 activity = calibration * pow(.25 + var_stat, -1. / 6);
570#else
571 activity = 1;
Fergus Simpson4063a682017-02-28 16:52:22 -0800572#endif // 1
Yushin Cho7a428ba2017-01-12 16:28:49 -0800573 sum = 0;
574 for (i = 0; i < 8; i++) {
575 for (j = 0; j < 8; j++)
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500576 sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
Yushin Cho7a428ba2017-01-12 16:28:49 -0800577 }
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500578 /* Normalize the filter to unit DC response. */
579 sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
580 OD_DIST_LP_NORM);
Yushin Cho7a428ba2017-01-12 16:28:49 -0800581 return activity * activity * (sum + vardist);
582}
583
584// Note : Inputs x and y are in a pixel domain
Yushin Chob7b60c52017-07-14 16:18:52 -0700585static double od_compute_dist_common(int activity_masking, uint16_t *x,
586 uint16_t *y, int bsize_w, int bsize_h,
Yushin Cho75b01002017-06-21 13:43:57 -0700587 int qindex, od_coeff *tmp,
588 od_coeff *e_lp) {
589 int i, j;
590 double sum = 0;
591 const int mid = OD_DIST_LP_MID;
592
593 for (j = 0; j < bsize_w; j++) {
594 e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
595 e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] +
596 2 * tmp[(bsize_h - 2) * bsize_w + j];
597 }
598 for (i = 1; i < bsize_h - 1; i++) {
599 for (j = 0; j < bsize_w; j++) {
600 e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
601 tmp[(i - 1) * bsize_w + j] +
602 tmp[(i + 1) * bsize_w + j];
603 }
604 }
605 for (i = 0; i < bsize_h; i += 8) {
606 for (j = 0; j < bsize_w; j += 8) {
Yushin Chob7b60c52017-07-14 16:18:52 -0700607 sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j],
Yushin Cho75b01002017-06-21 13:43:57 -0700608 &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
609 bsize_w);
610 }
611 }
612 /* Scale according to linear regression against SSE, for 8x8 blocks. */
613 if (activity_masking) {
614 sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
615 (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
616 } else {
617 sum *= qindex >= 128
618 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
619 : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
620 : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
621 }
622
623 return sum;
624}
625
Yushin Chob7b60c52017-07-14 16:18:52 -0700626static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
627 int bsize_h, int qindex) {
Yushin Cho7a428ba2017-01-12 16:28:49 -0800628 assert(bsize_w >= 8 && bsize_h >= 8);
Yushin Chod0b77ac2017-10-20 17:33:16 -0700629
Yushin Chob7b60c52017-07-14 16:18:52 -0700630 int activity_masking = 0;
Yushin Chod0b77ac2017-10-20 17:33:16 -0700631
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400632 int i, j;
633 DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
634 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
635 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
636 for (i = 0; i < bsize_h; i++) {
637 for (j = 0; j < bsize_w; j++) {
638 e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500639 }
Yushin Cho75b01002017-06-21 13:43:57 -0700640 }
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400641 int mid = OD_DIST_LP_MID;
642 for (i = 0; i < bsize_h; i++) {
643 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
644 tmp[i * bsize_w + bsize_w - 1] =
645 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
646 for (j = 1; j < bsize_w - 1; j++) {
647 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
648 e[i * bsize_w + j + 1];
649 }
650 }
651 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
652 qindex, tmp, e_lp);
Yushin Cho75b01002017-06-21 13:43:57 -0700653}
654
Yushin Chob7b60c52017-07-14 16:18:52 -0700655static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
656 int bsize_h, int qindex) {
Yushin Cho75b01002017-06-21 13:43:57 -0700657 assert(bsize_w >= 8 && bsize_h >= 8);
Yushin Chod0b77ac2017-10-20 17:33:16 -0700658
Yushin Chob7b60c52017-07-14 16:18:52 -0700659 int activity_masking = 0;
Yushin Chod0b77ac2017-10-20 17:33:16 -0700660
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400661 DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]);
662 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
663 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
664 int i, j;
665 for (i = 0; i < bsize_h; i++) {
666 for (j = 0; j < bsize_w; j++) {
667 y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500668 }
Yushin Cho7a428ba2017-01-12 16:28:49 -0800669 }
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400670 int mid = OD_DIST_LP_MID;
671 for (i = 0; i < bsize_h; i++) {
672 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
673 tmp[i * bsize_w + bsize_w - 1] =
674 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
675 for (j = 1; j < bsize_w - 1; j++) {
676 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
677 e[i * bsize_w + j + 1];
678 }
679 }
680 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
681 qindex, tmp, e_lp);
Yushin Cho7a428ba2017-01-12 16:28:49 -0800682}
683
Yushin Choe30a47c2017-08-15 13:08:30 -0700684int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
Yushin Chob7b60c52017-07-14 16:18:52 -0700685 const uint8_t *src, int src_stride, const uint8_t *dst,
686 int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
687 int bsh, int visible_w, int visible_h, int qindex) {
688 int64_t d = 0;
Yushin Cho7a428ba2017-01-12 16:28:49 -0800689 int i, j;
Yushin Choe30a47c2017-08-15 13:08:30 -0700690 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -0700691
692 DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
693 DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]);
Yushin Chob7b60c52017-07-14 16:18:52 -0700694
Yushin Choee810272017-09-13 17:30:25 -0700695 assert(bsw >= 8);
696 assert(bsh >= 8);
697 assert((bsw & 0x07) == 0);
698 assert((bsh & 0x07) == 0);
699
Yushin Choe30a47c2017-08-15 13:08:30 -0700700 if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
701 x->tune_metric == AOM_TUNE_DAALA_DIST) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700702#if CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700703 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Yushin Cho75b01002017-06-21 13:43:57 -0700704 for (j = 0; j < bsh; j++)
Yushin Cho8ab875d2017-06-23 14:47:21 -0700705 for (i = 0; i < bsw; i++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700706 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
Yushin Cho75b01002017-06-21 13:43:57 -0700707
Yushin Choe30a47c2017-08-15 13:08:30 -0700708 if ((bsw == visible_w) && (bsh == visible_h)) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700709 for (j = 0; j < bsh; j++)
Yushin Cho8ab875d2017-06-23 14:47:21 -0700710 for (i = 0; i < bsw; i++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700711 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
712 } else {
713 for (j = 0; j < visible_h; j++)
714 for (i = 0; i < visible_w; i++)
715 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700716
Yushin Choe30a47c2017-08-15 13:08:30 -0700717 if (visible_w < bsw) {
718 for (j = 0; j < bsh; j++)
719 for (i = visible_w; i < bsw; i++)
720 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
721 }
722
723 if (visible_h < bsh) {
724 for (j = visible_h; j < bsh; j++)
725 for (i = 0; i < bsw; i++)
726 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
727 }
728 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700729 } else {
Yushin Choe30a47c2017-08-15 13:08:30 -0700730#endif
731 for (j = 0; j < bsh; j++)
732 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700733
Yushin Choe30a47c2017-08-15 13:08:30 -0700734 if ((bsw == visible_w) && (bsh == visible_h)) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700735 for (j = 0; j < bsh; j++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700736 for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
737 } else {
738 for (j = 0; j < visible_h; j++)
739 for (i = 0; i < visible_w; i++)
740 rec[j * bsw + i] = dst[j * dst_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700741
Yushin Choe30a47c2017-08-15 13:08:30 -0700742 if (visible_w < bsw) {
743 for (j = 0; j < bsh; j++)
744 for (i = visible_w; i < bsw; i++)
745 rec[j * bsw + i] = src[j * src_stride + i];
746 }
747
748 if (visible_h < bsh) {
749 for (j = visible_h; j < bsh; j++)
750 for (i = 0; i < bsw; i++)
751 rec[j * bsw + i] = src[j * src_stride + i];
752 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700753 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700754#if CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700755 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700756#endif // CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700757 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700758
Yushin Choe30a47c2017-08-15 13:08:30 -0700759 if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
760 d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
761 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
Yushin Choc49177e2017-07-18 17:18:09 -0700762 int coeff_shift = AOMMAX(xd->bd - 8, 0);
763
764 for (i = 0; i < bsh; i += 8) {
765 for (j = 0; j < bsw; j += 8) {
766 d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j],
767 bsw, coeff_shift);
768 }
769 }
770#if CONFIG_HIGHBITDEPTH
771 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
772 d = ((uint64_t)d) >> 2 * coeff_shift;
773#endif
Yushin Choe30a47c2017-08-15 13:08:30 -0700774 } else {
775 // Otherwise, MSE by default
Yushin Cho2f025aa2017-09-28 17:39:21 -0700776 d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride,
777 tx_bsize, bsh, bsw, visible_h, visible_w);
Yushin Choc49177e2017-07-18 17:18:09 -0700778 }
Yushin Chob7b60c52017-07-14 16:18:52 -0700779
Yushin Cho7a428ba2017-01-12 16:28:49 -0800780 return d;
781}
Yushin Cho75b01002017-06-21 13:43:57 -0700782
Yushin Choe30a47c2017-08-15 13:08:30 -0700783static int64_t av1_dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src,
Yushin Chob7b60c52017-07-14 16:18:52 -0700784 int src_stride, const int16_t *diff,
785 int diff_stride, int bsw, int bsh,
786 int visible_w, int visible_h, int qindex) {
787 int64_t d = 0;
Yushin Cho75b01002017-06-21 13:43:57 -0700788 int i, j;
Yushin Choe30a47c2017-08-15 13:08:30 -0700789 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -0700790
791 DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
792 DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]);
Yushin Chob7b60c52017-07-14 16:18:52 -0700793
Yushin Choee810272017-09-13 17:30:25 -0700794 assert(bsw >= 8);
795 assert(bsh >= 8);
796 assert((bsw & 0x07) == 0);
797 assert((bsh & 0x07) == 0);
798
Yushin Choe30a47c2017-08-15 13:08:30 -0700799 if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
800 x->tune_metric == AOM_TUNE_DAALA_DIST) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700801#if CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700802 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
803 for (j = 0; j < bsh; j++)
804 for (i = 0; i < bsw; i++)
805 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
806 } else {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700807#endif
Yushin Choe30a47c2017-08-15 13:08:30 -0700808 for (j = 0; j < bsh; j++)
809 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700810#if CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700811 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700812#endif // CONFIG_HIGHBITDEPTH
Yushin Cho75b01002017-06-21 13:43:57 -0700813
Yushin Choe30a47c2017-08-15 13:08:30 -0700814 if ((bsw == visible_w) && (bsh == visible_h)) {
Yushin Cho75b01002017-06-21 13:43:57 -0700815 for (j = 0; j < bsh; j++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700816 for (i = 0; i < bsw; i++)
817 diff16[j * bsw + i] = diff[j * diff_stride + i];
818 } else {
819 for (j = 0; j < visible_h; j++)
820 for (i = 0; i < visible_w; i++)
821 diff16[j * bsw + i] = diff[j * diff_stride + i];
Yushin Cho75b01002017-06-21 13:43:57 -0700822
Yushin Choe30a47c2017-08-15 13:08:30 -0700823 if (visible_w < bsw) {
824 for (j = 0; j < bsh; j++)
825 for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
826 }
827
828 if (visible_h < bsh) {
829 for (j = visible_h; j < bsh; j++)
830 for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
831 }
Yushin Cho75b01002017-06-21 13:43:57 -0700832 }
833 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700834
Yushin Choe30a47c2017-08-15 13:08:30 -0700835 if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
836 d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
837 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
Yushin Choc49177e2017-07-18 17:18:09 -0700838 int coeff_shift = AOMMAX(xd->bd - 8, 0);
839 DECLARE_ALIGNED(16, uint16_t, dst16[MAX_TX_SQUARE]);
840
841 for (i = 0; i < bsh; i++) {
842 for (j = 0; j < bsw; j++) {
843 dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j];
844 }
845 }
846
847 for (i = 0; i < bsh; i += 8) {
848 for (j = 0; j < bsw; j += 8) {
849 d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j],
850 bsw, coeff_shift);
851 }
852 }
853 // Don't scale 'd' for HBD since it will be done by caller side for diff
854 // input
Yushin Choe30a47c2017-08-15 13:08:30 -0700855 } else {
856 // Otherwise, MSE by default
Yushin Cho2f025aa2017-09-28 17:39:21 -0700857 d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h);
Yushin Choc49177e2017-07-18 17:18:09 -0700858 }
Yushin Cho75b01002017-06-21 13:43:57 -0700859
860 return d;
861}
Yushin Chob7b60c52017-07-14 16:18:52 -0700862#endif // CONFIG_DIST_8X8
Yushin Cho7a428ba2017-01-12 16:28:49 -0800863
Yaowu Xuf883b422016-08-30 14:01:10 -0700864static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse9f217762017-04-20 15:34:54 -0700865 const uint8_t *src, int src_stride,
866 const uint8_t *dst, int dst_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700867 double *hordist, double *verdist) {
Alex Converse9f217762017-04-20 15:34:54 -0700868 const int bw = block_size_wide[bsize];
869 const int bh = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700870 unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -0700871
872 const int f_index = bsize - BLOCK_16X16;
873 if (f_index < 0) {
Alex Converse9f217762017-04-20 15:34:54 -0700874 const int w_shift = bw == 8 ? 1 : 2;
875 const int h_shift = bh == 8 ? 1 : 2;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200876#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700877 if (cpi->common.use_highbitdepth) {
Alex Converse9f217762017-04-20 15:34:54 -0700878 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
879 const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
880 for (int i = 0; i < bh; ++i)
881 for (int j = 0; j < bw; ++j) {
882 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700883 esq[index] +=
884 (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
885 (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
886 }
887 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200888#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700889
Alex Converse9f217762017-04-20 15:34:54 -0700890 for (int i = 0; i < bh; ++i)
891 for (int j = 0; j < bw; ++j) {
892 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700893 esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
894 (src[j + i * src_stride] - dst[j + i * dst_stride]);
895 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200896#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700897 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200898#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700899 } else {
Alex Converse9f217762017-04-20 15:34:54 -0700900 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
901 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
902 &esq[1]);
903 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
904 &esq[2]);
905 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
906 dst_stride, &esq[3]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700907 src += bh / 4 * src_stride;
908 dst += bh / 4 * dst_stride;
909
Alex Converse9f217762017-04-20 15:34:54 -0700910 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
911 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
912 &esq[5]);
913 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
914 &esq[6]);
915 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
916 dst_stride, &esq[7]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700917 src += bh / 4 * src_stride;
918 dst += bh / 4 * dst_stride;
919
Alex Converse9f217762017-04-20 15:34:54 -0700920 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
921 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
922 &esq[9]);
923 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
924 &esq[10]);
925 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
926 dst_stride, &esq[11]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700927 src += bh / 4 * src_stride;
928 dst += bh / 4 * dst_stride;
929
Alex Converse9f217762017-04-20 15:34:54 -0700930 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
931 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
932 &esq[13]);
933 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
934 &esq[14]);
935 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
936 dst_stride, &esq[15]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700937 }
938
Alex Converse9f217762017-04-20 15:34:54 -0700939 double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
940 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
941 esq[12] + esq[13] + esq[14] + esq[15];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700942 if (total > 0) {
943 const double e_recip = 1.0 / total;
Alex Converse9f217762017-04-20 15:34:54 -0700944 hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
945 hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
946 hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
947 verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
948 verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
949 verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700950 } else {
951 hordist[0] = verdist[0] = 0.25;
952 hordist[1] = verdist[1] = 0.25;
953 hordist[2] = verdist[2] = 0.25;
954 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700955}
956
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -0700957static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
958 const uint8_t *src, int src_stride,
959 const uint8_t *dst, int dst_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700960 int prune_bitmask = 0;
961 double svm_proj_h = 0, svm_proj_v = 0;
Alex Converse89912f92017-04-21 13:28:50 -0700962 double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -0700963 get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
964 hdist, vdist);
965
966 svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
967 vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
968 svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
969 hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
970 if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
971 prune_bitmask |= 1 << FLIPADST_1D;
972 else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
973 prune_bitmask |= 1 << ADST_1D;
974
975 if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
976 prune_bitmask |= 1 << (FLIPADST_1D + 8);
977 else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
978 prune_bitmask |= 1 << (ADST_1D + 8);
979
980 return prune_bitmask;
981}
982
Alex Converse89912f92017-04-21 13:28:50 -0700983static void get_horver_correlation(const int16_t *diff, int stride, int w,
984 int h, double *hcorr, double *vcorr) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700985 // Returns hor/ver correlation coefficient
986 const int num = (h - 1) * (w - 1);
987 double num_r;
988 int i, j;
989 int64_t xy_sum = 0, xz_sum = 0;
990 int64_t x_sum = 0, y_sum = 0, z_sum = 0;
991 int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
992 double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
993 *hcorr = *vcorr = 1;
994
995 assert(num > 0);
996 num_r = 1.0 / num;
997 for (i = 1; i < h; ++i) {
998 for (j = 1; j < w; ++j) {
999 const int16_t x = diff[i * stride + j];
1000 const int16_t y = diff[i * stride + j - 1];
1001 const int16_t z = diff[(i - 1) * stride + j];
1002 xy_sum += x * y;
1003 xz_sum += x * z;
1004 x_sum += x;
1005 y_sum += y;
1006 z_sum += z;
1007 x2_sum += x * x;
1008 y2_sum += y * y;
1009 z2_sum += z * z;
1010 }
1011 }
1012 x_var_n = x2_sum - (x_sum * x_sum) * num_r;
1013 y_var_n = y2_sum - (y_sum * y_sum) * num_r;
1014 z_var_n = z2_sum - (z_sum * z_sum) * num_r;
1015 xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
1016 xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
1017 if (x_var_n > 0 && y_var_n > 0) {
1018 *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
1019 *hcorr = *hcorr < 0 ? 0 : *hcorr;
1020 }
1021 if (x_var_n > 0 && z_var_n > 0) {
1022 *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
1023 *vcorr = *vcorr < 0 ? 0 : *vcorr;
1024 }
1025}
1026
Alex Converse89912f92017-04-21 13:28:50 -07001027int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
1028 double hcorr, vcorr;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001029 int prune_bitmask = 0;
Alex Converse89912f92017-04-21 13:28:50 -07001030 get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001031
Alex Converse89912f92017-04-21 13:28:50 -07001032 if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001033 prune_bitmask |= 1 << IDTX_1D;
Alex Converse89912f92017-04-21 13:28:50 -07001034 else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001035 prune_bitmask |= 1 << DCT_1D;
1036
Alex Converse89912f92017-04-21 13:28:50 -07001037 if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001038 prune_bitmask |= 1 << (IDTX_1D + 8);
Alex Converse89912f92017-04-21 13:28:50 -07001039 else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001040 prune_bitmask |= 1 << (DCT_1D + 8);
1041 return prune_bitmask;
1042}
1043
1044// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xuf883b422016-08-30 14:01:10 -07001045static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse89912f92017-04-21 13:28:50 -07001046 MACROBLOCK *x, const MACROBLOCKD *xd,
1047 int adst_flipadst, int dct_idtx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001048 int prune = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001049
Alex Converse89912f92017-04-21 13:28:50 -07001050 if (adst_flipadst) {
1051 const struct macroblock_plane *const p = &x->plane[0];
1052 const struct macroblockd_plane *const pd = &xd->plane[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001053 prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
Alex Converse89912f92017-04-21 13:28:50 -07001054 pd->dst.buf, pd->dst.stride);
1055 }
1056 if (dct_idtx) {
1057 av1_subtract_plane(x, bsize, 0);
1058 const struct macroblock_plane *const p = &x->plane[0];
1059 const int bw = 4 << (b_width_log2_lookup[bsize]);
1060 const int bh = 4 << (b_height_log2_lookup[bsize]);
1061 prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
1062 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001063
1064 return prune;
1065}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001066
1067// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xuf883b422016-08-30 14:01:10 -07001068static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse89912f92017-04-21 13:28:50 -07001069 const MACROBLOCK *x, const MACROBLOCKD *xd) {
1070 const struct macroblock_plane *const p = &x->plane[0];
1071 const struct macroblockd_plane *const pd = &xd->plane[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001072 return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
Alex Converse89912f92017-04-21 13:28:50 -07001073 pd->dst.stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001074}
1075
Hui Su032ab8b2017-09-19 14:53:40 -07001076// 1D Transforms used in inter set, this needs to be changed if
1077// ext_tx_used_inter is changed
1078static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
1079 { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 },
Hui Su032ab8b2017-09-19 14:53:40 -07001080};
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001081
1082static void get_energy_distribution_finer(const int16_t *diff, int stride,
1083 int bw, int bh, float *hordist,
1084 float *verdist) {
1085 // First compute downscaled block energy values (esq); downscale factors
1086 // are defined by w_shift and h_shift.
1087 unsigned int esq[256];
1088 const int w_shift = bw <= 8 ? 0 : 1;
1089 const int h_shift = bh <= 8 ? 0 : 1;
1090 const int esq_w = bw <= 8 ? bw : bw / 2;
1091 const int esq_h = bh <= 8 ? bh : bh / 2;
1092 const int esq_sz = esq_w * esq_h;
1093 int i, j;
1094 memset(esq, 0, esq_sz * sizeof(esq[0]));
1095 for (i = 0; i < bh; i++) {
1096 unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w;
1097 const int16_t *cur_diff_row = diff + i * stride;
1098 for (j = 0; j < bw; j++) {
1099 cur_esq_row[j >> w_shift] += cur_diff_row[j] * cur_diff_row[j];
1100 }
1101 }
1102
1103 uint64_t total = 0;
1104 for (i = 0; i < esq_sz; i++) total += esq[i];
1105
1106 // Output hordist and verdist arrays are normalized 1D projections of esq
1107 if (total == 0) {
1108 float hor_val = 1.0f / esq_w;
1109 for (j = 0; j < esq_w - 1; j++) hordist[j] = hor_val;
1110 float ver_val = 1.0f / esq_h;
1111 for (i = 0; i < esq_h - 1; i++) verdist[i] = ver_val;
1112 return;
1113 }
1114
1115 const float e_recip = 1.0f / (float)total;
1116 memset(hordist, 0, (esq_w - 1) * sizeof(hordist[0]));
1117 memset(verdist, 0, (esq_h - 1) * sizeof(verdist[0]));
1118 const unsigned int *cur_esq_row;
1119 for (i = 0; i < esq_h - 1; i++) {
1120 cur_esq_row = esq + i * esq_w;
1121 for (j = 0; j < esq_w - 1; j++) {
1122 hordist[j] += (float)cur_esq_row[j];
1123 verdist[i] += (float)cur_esq_row[j];
1124 }
1125 verdist[i] += (float)cur_esq_row[j];
1126 }
1127 cur_esq_row = esq + i * esq_w;
1128 for (j = 0; j < esq_w - 1; j++) hordist[j] += (float)cur_esq_row[j];
1129
1130 for (j = 0; j < esq_w - 1; j++) hordist[j] *= e_recip;
1131 for (i = 0; i < esq_h - 1; i++) verdist[i] *= e_recip;
1132}
1133
Alexander Bokov79a37242017-09-29 11:25:55 -07001134// Instead of 1D projections of the block energy distribution computed by
1135// get_energy_distribution_finer() this function computes a full
1136// two-dimensional energy distribution of the input block.
1137static void get_2D_energy_distribution(const int16_t *diff, int stride, int bw,
1138 int bh, float *edist) {
1139 unsigned int esq[256] = { 0 };
1140 const int esq_w = bw >> 2;
1141 const int esq_h = bh >> 2;
1142 const int esq_sz = esq_w * esq_h;
1143 uint64_t total = 0;
1144 for (int i = 0; i < bh; i += 4) {
1145 for (int j = 0; j < bw; j += 4) {
1146 unsigned int cur_sum_energy = 0;
1147 for (int k = 0; k < 4; k++) {
1148 const int16_t *cur_diff = diff + (i + k) * stride + j;
1149 cur_sum_energy += cur_diff[0] * cur_diff[0] +
1150 cur_diff[1] * cur_diff[1] +
1151 cur_diff[2] * cur_diff[2] + cur_diff[3] * cur_diff[3];
1152 }
1153 esq[(i >> 2) * esq_w + (j >> 2)] = cur_sum_energy;
1154 total += cur_sum_energy;
1155 }
1156 }
1157
1158 const float e_recip = 1.0f / (float)total;
1159 for (int i = 0; i < esq_sz - 1; i++) edist[i] = esq[i] * e_recip;
1160}
1161
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001162// Similar to get_horver_correlation, but also takes into account first
1163// row/column, when computing horizontal/vertical correlation.
1164static void get_horver_correlation_full(const int16_t *diff, int stride, int w,
1165 int h, float *hcorr, float *vcorr) {
Yaowu Xu29373ee2017-10-19 15:50:34 -07001166 const float num_hor = (float)(h * (w - 1));
1167 const float num_ver = (float)((h - 1) * w);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001168 int i, j;
1169
1170 // The following notation is used:
1171 // x - current pixel
1172 // y - left neighbor pixel
1173 // z - top neighbor pixel
1174 int64_t xy_sum = 0, xz_sum = 0;
1175 int64_t xhor_sum = 0, xver_sum = 0, y_sum = 0, z_sum = 0;
1176 int64_t x2hor_sum = 0, x2ver_sum = 0, y2_sum = 0, z2_sum = 0;
1177
1178 int16_t x, y, z;
1179 for (j = 1; j < w; ++j) {
1180 x = diff[j];
1181 y = diff[j - 1];
1182 xy_sum += x * y;
1183 xhor_sum += x;
1184 y_sum += y;
1185 x2hor_sum += x * x;
1186 y2_sum += y * y;
1187 }
1188 for (i = 1; i < h; ++i) {
1189 x = diff[i * stride];
1190 z = diff[(i - 1) * stride];
1191 xz_sum += x * z;
1192 xver_sum += x;
1193 z_sum += z;
1194 x2ver_sum += x * x;
1195 z2_sum += z * z;
1196 for (j = 1; j < w; ++j) {
1197 x = diff[i * stride + j];
1198 y = diff[i * stride + j - 1];
1199 z = diff[(i - 1) * stride + j];
1200 xy_sum += x * y;
1201 xz_sum += x * z;
1202 xhor_sum += x;
1203 xver_sum += x;
1204 y_sum += y;
1205 z_sum += z;
1206 x2hor_sum += x * x;
1207 x2ver_sum += x * x;
1208 y2_sum += y * y;
1209 z2_sum += z * z;
1210 }
1211 }
1212 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
1213 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
1214 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
1215 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
1216 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
1217 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
1218
1219 *hcorr = *vcorr = 1;
1220 if (xhor_var_n > 0 && y_var_n > 0) {
1221 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
1222 *hcorr = *hcorr < 0 ? 0 : *hcorr;
1223 }
1224 if (xver_var_n > 0 && z_var_n > 0) {
1225 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
1226 *vcorr = *vcorr < 0 ? 0 : *vcorr;
1227 }
1228}
1229
1230// Performs a forward pass through a neural network with 2 fully-connected
1231// layers, assuming ReLU as activation function. Number of output neurons
1232// is always equal to 4.
1233// fc1, fc2 - weight matrices of the respective layers.
1234// b1, b2 - bias vectors of the respective layers.
1235static void compute_1D_scores(float *features, int num_features,
1236 const float *fc1, const float *b1,
1237 const float *fc2, const float *b2,
1238 int num_hidden_units, float *dst_scores) {
1239 assert(num_hidden_units <= 32);
1240 float hidden_layer[32];
1241 for (int i = 0; i < num_hidden_units; i++) {
1242 const float *cur_coef = fc1 + i * num_features;
1243 hidden_layer[i] = 0.0f;
1244 for (int j = 0; j < num_features; j++)
1245 hidden_layer[i] += cur_coef[j] * features[j];
1246 hidden_layer[i] = AOMMAX(hidden_layer[i] + b1[i], 0.0f);
1247 }
1248 for (int i = 0; i < 4; i++) {
1249 const float *cur_coef = fc2 + i * num_hidden_units;
1250 dst_scores[i] = 0.0f;
1251 for (int j = 0; j < num_hidden_units; j++)
1252 dst_scores[i] += cur_coef[j] * hidden_layer[j];
1253 dst_scores[i] += b2[i];
1254 }
1255}
1256
1257// Transforms raw scores into a probability distribution across 16 TX types
1258static void score_2D_transform_pow8(float *scores_2D, float shift) {
1259 float sum = 0.0f;
1260 int i;
1261
1262 for (i = 0; i < 16; i++) {
1263 float v, v2, v4;
1264 v = AOMMAX(scores_2D[i] + shift, 0.0f);
1265 v2 = v * v;
1266 v4 = v2 * v2;
1267 scores_2D[i] = v4 * v4;
1268 sum += scores_2D[i];
1269 }
1270 for (i = 0; i < 16; i++) scores_2D[i] /= sum;
1271}
1272
Alexander Bokov79a37242017-09-29 11:25:55 -07001273// Similarly to compute_1D_scores() performs a forward pass through a
1274// neural network with two fully-connected layers. The only difference
1275// is that it assumes 1 output neuron, as required by the classifier used
1276// for TX size pruning.
1277static float compute_tx_split_prune_score(float *features, int num_features,
1278 const float *fc1, const float *b1,
1279 const float *fc2, float b2,
1280 int num_hidden_units) {
1281 assert(num_hidden_units <= 64);
1282 float hidden_layer[64];
1283 for (int i = 0; i < num_hidden_units; i++) {
1284 const float *cur_coef = fc1 + i * num_features;
1285 hidden_layer[i] = 0.0f;
1286 for (int j = 0; j < num_features; j++)
1287 hidden_layer[i] += cur_coef[j] * features[j];
1288 hidden_layer[i] = AOMMAX(hidden_layer[i] + b1[i], 0.0f);
1289 }
1290 float dst_score = 0.0f;
1291 for (int j = 0; j < num_hidden_units; j++)
1292 dst_score += fc2[j] * hidden_layer[j];
1293 dst_score += b2;
1294 return dst_score;
1295}
1296
1297static int prune_tx_split(BLOCK_SIZE bsize, const int16_t *diff, float hcorr,
1298 float vcorr) {
1299 if (bsize <= BLOCK_4X4 || bsize > BLOCK_16X16) return 0;
1300
1301 float features[17];
1302 const int bw = block_size_wide[bsize], bh = block_size_high[bsize];
1303 const int feature_num = (bw / 4) * (bh / 4) + 1;
1304 assert(feature_num <= 17);
1305
1306 get_2D_energy_distribution(diff, bw, bw, bh, features);
1307 features[feature_num - 2] = hcorr;
1308 features[feature_num - 1] = vcorr;
1309
1310 const int bidx = bsize - BLOCK_4X4 - 1;
1311 const float *fc1 = av1_prune_tx_split_learned_weights[bidx];
1312 const float *b1 =
1313 fc1 + av1_prune_tx_split_num_hidden_units[bidx] * feature_num;
1314 const float *fc2 = b1 + av1_prune_tx_split_num_hidden_units[bidx];
1315 float b2 = *(fc2 + av1_prune_tx_split_num_hidden_units[bidx]);
1316 float score =
1317 compute_tx_split_prune_score(features, feature_num, fc1, b1, fc2, b2,
1318 av1_prune_tx_split_num_hidden_units[bidx]);
1319
1320 return (score > av1_prune_tx_split_thresholds[bidx]);
1321}
1322
1323static int prune_tx_2D(BLOCK_SIZE bsize, const MACROBLOCK *x, int tx_set_type,
1324 int tx_type_pruning_aggressiveness,
1325 int use_tx_split_prune) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001326 if (bsize >= BLOCK_32X32) return 0;
Hui Su9bf85992017-11-21 14:18:03 -08001327 aom_clear_system_state();
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001328 const struct macroblock_plane *const p = &x->plane[0];
1329 const int bidx = AOMMAX(bsize - BLOCK_4X4, 0);
1330 const float score_thresh =
Alexander Bokov79a37242017-09-29 11:25:55 -07001331 av1_prune_2D_adaptive_thresholds[bidx]
1332 [tx_type_pruning_aggressiveness - 1];
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001333 float hfeatures[16], vfeatures[16];
1334 float hscores[4], vscores[4];
1335 float scores_2D[16];
1336 int tx_type_table_2D[16] = {
1337 DCT_DCT, DCT_ADST, DCT_FLIPADST, V_DCT,
1338 ADST_DCT, ADST_ADST, ADST_FLIPADST, V_ADST,
1339 FLIPADST_DCT, FLIPADST_ADST, FLIPADST_FLIPADST, V_FLIPADST,
1340 H_DCT, H_ADST, H_FLIPADST, IDTX
1341 };
1342 const int bw = block_size_wide[bsize], bh = block_size_high[bsize];
1343 const int hfeatures_num = bw <= 8 ? bw : bw / 2;
1344 const int vfeatures_num = bh <= 8 ? bh : bh / 2;
1345 assert(hfeatures_num <= 16);
1346 assert(vfeatures_num <= 16);
1347
1348 get_energy_distribution_finer(p->src_diff, bw, bw, bh, hfeatures, vfeatures);
1349 get_horver_correlation_full(p->src_diff, bw, bw, bh,
1350 &hfeatures[hfeatures_num - 1],
1351 &vfeatures[vfeatures_num - 1]);
1352
1353 const float *fc1_hor = av1_prune_2D_learned_weights_hor[bidx];
1354 const float *b1_hor =
1355 fc1_hor + av1_prune_2D_num_hidden_units_hor[bidx] * hfeatures_num;
1356 const float *fc2_hor = b1_hor + av1_prune_2D_num_hidden_units_hor[bidx];
1357 const float *b2_hor = fc2_hor + av1_prune_2D_num_hidden_units_hor[bidx] * 4;
1358 compute_1D_scores(hfeatures, hfeatures_num, fc1_hor, b1_hor, fc2_hor, b2_hor,
1359 av1_prune_2D_num_hidden_units_hor[bidx], hscores);
1360
1361 const float *fc1_ver = av1_prune_2D_learned_weights_ver[bidx];
1362 const float *b1_ver =
1363 fc1_ver + av1_prune_2D_num_hidden_units_ver[bidx] * vfeatures_num;
1364 const float *fc2_ver = b1_ver + av1_prune_2D_num_hidden_units_ver[bidx];
1365 const float *b2_ver = fc2_ver + av1_prune_2D_num_hidden_units_ver[bidx] * 4;
1366 compute_1D_scores(vfeatures, vfeatures_num, fc1_ver, b1_ver, fc2_ver, b2_ver,
1367 av1_prune_2D_num_hidden_units_ver[bidx], vscores);
1368
1369 float score_2D_average = 0.0f;
1370 for (int i = 0; i < 4; i++) {
1371 float *cur_scores_2D = scores_2D + i * 4;
1372 cur_scores_2D[0] = vscores[i] * hscores[0];
1373 cur_scores_2D[1] = vscores[i] * hscores[1];
1374 cur_scores_2D[2] = vscores[i] * hscores[2];
1375 cur_scores_2D[3] = vscores[i] * hscores[3];
1376 score_2D_average += cur_scores_2D[0] + cur_scores_2D[1] + cur_scores_2D[2] +
1377 cur_scores_2D[3];
1378 }
1379 score_2D_average /= 16;
1380 score_2D_transform_pow8(scores_2D, (20 - score_2D_average));
1381
1382 // Always keep the TX type with the highest score, prune all others with
1383 // score below score_thresh.
1384 int max_score_i = 0;
1385 float max_score = 0.0f;
1386 for (int i = 0; i < 16; i++) {
1387 if (scores_2D[i] > max_score &&
1388 av1_ext_tx_used[tx_set_type][tx_type_table_2D[i]]) {
1389 max_score = scores_2D[i];
1390 max_score_i = i;
1391 }
1392 }
1393
1394 int prune_bitmask = 0;
1395 for (int i = 0; i < 16; i++) {
1396 if (scores_2D[i] < score_thresh && i != max_score_i)
1397 prune_bitmask |= (1 << tx_type_table_2D[i]);
1398 }
1399
Alexander Bokov79a37242017-09-29 11:25:55 -07001400 // Also apply TX size pruning if it's turned on. The value
1401 // of prune_tx_split_flag indicates whether we should do
1402 // full TX size search (flag=0) or use the largest available
1403 // TX size without performing any further search (flag=1).
1404 int prune_tx_split_flag = 0;
1405 if (use_tx_split_prune) {
1406 prune_tx_split_flag =
1407 prune_tx_split(bsize, p->src_diff, hfeatures[hfeatures_num - 1],
1408 vfeatures[vfeatures_num - 1]);
1409 }
1410 prune_bitmask |= (prune_tx_split_flag << TX_TYPES);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001411 return prune_bitmask;
1412}
Hui Su032ab8b2017-09-19 14:53:40 -07001413
Alexander Bokov79a37242017-09-29 11:25:55 -07001414static int prune_tx(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
1415 const MACROBLOCKD *const xd, int tx_set_type,
1416 int use_tx_split_prune) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001417 int tx_set = ext_tx_set_index[1][tx_set_type];
1418 assert(tx_set >= 0);
1419 const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001420
1421 switch (cpi->sf.tx_type_search.prune_mode) {
1422 case NO_PRUNE: return 0; break;
1423 case PRUNE_ONE:
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001424 if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001425 return prune_one_for_sby(cpi, bsize, x, xd);
1426 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001427 case PRUNE_TWO:
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001428 if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001429 if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
1430 return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
1431 }
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001432 if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
Yaowu Xuc27fc142016-08-22 16:08:15 -07001433 return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
1434 return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
1435 break;
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001436 case PRUNE_2D_ACCURATE:
1437 if (tx_set_type == EXT_TX_SET_ALL16)
Alexander Bokov79a37242017-09-29 11:25:55 -07001438 return prune_tx_2D(bsize, x, tx_set_type, 6, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001439 else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT)
Alexander Bokov79a37242017-09-29 11:25:55 -07001440 return prune_tx_2D(bsize, x, tx_set_type, 4, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001441 else
1442 return 0;
1443 break;
1444 case PRUNE_2D_FAST:
1445 if (tx_set_type == EXT_TX_SET_ALL16)
Alexander Bokov79a37242017-09-29 11:25:55 -07001446 return prune_tx_2D(bsize, x, tx_set_type, 10, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001447 else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT)
Alexander Bokov79a37242017-09-29 11:25:55 -07001448 return prune_tx_2D(bsize, x, tx_set_type, 7, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001449 else
1450 return 0;
1451 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001452 }
1453 assert(0);
1454 return 0;
1455}
1456
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001457static int do_tx_type_search(TX_TYPE tx_type, int prune,
1458 TX_TYPE_PRUNE_MODE mode) {
Sebastien Alaiwan3bac9922017-11-02 12:34:41 +01001459 // TODO(sarahparker) implement for non ext tx
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001460 if (mode >= PRUNE_2D_ACCURATE) {
1461 return !((prune >> tx_type) & 1);
1462 } else {
1463 return !(((prune >> vtx_tab[tx_type]) & 1) |
1464 ((prune >> (htx_tab[tx_type] + 8)) & 1));
1465 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001466}
1467
Yaowu Xuf883b422016-08-30 14:01:10 -07001468static void model_rd_from_sse(const AV1_COMP *const cpi,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001469 const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
1470 int plane, int64_t sse, int *rate,
1471 int64_t *dist) {
1472 const struct macroblockd_plane *const pd = &xd->plane[plane];
1473 const int dequant_shift =
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001474#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001475 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001476#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001477 3;
1478
1479 // Fast approximate the modelling function.
1480 if (cpi->sf.simple_model_rd_from_var) {
1481 const int64_t square_error = sse;
Monty Montgomery125c0fc2017-10-26 00:44:35 -04001482 int quantizer = (pd->dequant_Q3[1] >> dequant_shift);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001483
1484 if (quantizer < 120)
1485 *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xuf883b422016-08-30 14:01:10 -07001486 (16 - AV1_PROB_COST_SHIFT));
Yaowu Xuc27fc142016-08-22 16:08:15 -07001487 else
1488 *rate = 0;
1489 *dist = (square_error * quantizer) >> 8;
1490 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001491 av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
Monty Montgomery125c0fc2017-10-26 00:44:35 -04001492 pd->dequant_Q3[1] >> dequant_shift, rate,
1493 dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001494 }
1495
1496 *dist <<= 4;
1497}
1498
Yaowu Xuf883b422016-08-30 14:01:10 -07001499static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001500 MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
1501 int plane_to, int *out_rate_sum,
1502 int64_t *out_dist_sum, int *skip_txfm_sb,
1503 int64_t *skip_sse_sb) {
1504 // Note our transform coeffs are 8 times an orthogonal transform.
1505 // Hence quantizer step is also 8 times. To get effective quantizer
1506 // we need to divide by 8 before sending to modeling function.
1507 int plane;
1508 const int ref = xd->mi[0]->mbmi.ref_frame[0];
1509
1510 int64_t rate_sum = 0;
1511 int64_t dist_sum = 0;
1512 int64_t total_sse = 0;
1513
1514 x->pred_sse[ref] = 0;
1515
1516 for (plane = plane_from; plane <= plane_to; ++plane) {
1517 struct macroblock_plane *const p = &x->plane[plane];
1518 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han9ce464c2017-02-20 15:36:30 -08001519 const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
Yaowu Xuc27fc142016-08-22 16:08:15 -07001520 unsigned int sse;
1521 int rate;
1522 int64_t dist;
1523
Jingning Han9ce464c2017-02-20 15:36:30 -08001524 if (x->skip_chroma_rd && plane) continue;
Jingning Han9ce464c2017-02-20 15:36:30 -08001525
Yaowu Xuc27fc142016-08-22 16:08:15 -07001526 // TODO(geza): Write direct sse functions that do not compute
1527 // variance as well.
1528 cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
1529 &sse);
1530
1531 if (plane == 0) x->pred_sse[ref] = sse;
1532
1533 total_sse += sse;
1534
1535 model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
1536
1537 rate_sum += rate;
1538 dist_sum += dist;
1539 }
1540
1541 *skip_txfm_sb = total_sse == 0;
1542 *skip_sse_sb = total_sse << 4;
1543 *out_rate_sum = (int)rate_sum;
1544 *out_dist_sum = dist_sum;
1545}
1546
Yaowu Xuf883b422016-08-30 14:01:10 -07001547int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
1548 intptr_t block_size, int64_t *ssz) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001549 int i;
1550 int64_t error = 0, sqcoeff = 0;
1551
1552 for (i = 0; i < block_size; i++) {
1553 const int diff = coeff[i] - dqcoeff[i];
1554 error += diff * diff;
1555 sqcoeff += coeff[i] * coeff[i];
1556 }
1557
1558 *ssz = sqcoeff;
1559 return error;
1560}
1561
Yaowu Xuf883b422016-08-30 14:01:10 -07001562int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
1563 int block_size) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001564 int i;
1565 int64_t error = 0;
1566
1567 for (i = 0; i < block_size; i++) {
1568 const int diff = coeff[i] - dqcoeff[i];
1569 error += diff * diff;
1570 }
1571
1572 return error;
1573}
1574
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001575#if CONFIG_HIGHBITDEPTH
Yaowu Xuf883b422016-08-30 14:01:10 -07001576int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
1577 const tran_low_t *dqcoeff, intptr_t block_size,
1578 int64_t *ssz, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001579 int i;
1580 int64_t error = 0, sqcoeff = 0;
Monty Montgomery4a05a582017-11-01 21:21:07 -04001581#if CONFIG_DAALA_TX
1582 (void)bd;
1583 int shift = 2 * (TX_COEFF_DEPTH - 11);
1584#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07001585 int shift = 2 * (bd - 8);
Monty Montgomery4a05a582017-11-01 21:21:07 -04001586#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001587 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
1588
1589 for (i = 0; i < block_size; i++) {
1590 const int64_t diff = coeff[i] - dqcoeff[i];
1591 error += diff * diff;
1592 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
1593 }
1594 assert(error >= 0 && sqcoeff >= 0);
1595 error = (error + rounding) >> shift;
1596 sqcoeff = (sqcoeff + rounding) >> shift;
1597
1598 *ssz = sqcoeff;
1599 return error;
1600}
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001601#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001602
Angie Chiang47e4b362017-03-24 11:25:10 -07001603#if !CONFIG_LV_MAP
1604static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
1605 int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
1606 const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
1607 int use_fast_coef_costing) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001608 MACROBLOCKD *const xd = &x->e_mbd;
1609 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1610 const struct macroblock_plane *p = &x->plane[plane];
1611 const struct macroblockd_plane *pd = &xd->plane[plane];
1612 const PLANE_TYPE type = pd->plane_type;
1613 const uint16_t *band_count = &band_count_table[tx_size][1];
1614 const int eob = p->eobs[block];
1615 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08001616 const TX_SIZE tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001617 uint8_t token_cache[MAX_TX_SQUARE];
Angie Chiang77368af2017-03-23 16:22:07 -07001618 int pt = combine_entropy_contexts(*a, *l);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001619 int c, cost;
Angie Chiang77368af2017-03-23 16:22:07 -07001620 const int16_t *scan = scan_order->scan;
1621 const int16_t *nb = scan_order->neighbors;
Thomas Daviesed8e2d22017-01-04 16:42:09 +00001622 const int ref = is_inter_block(mbmi);
hui suc0cf71d2017-07-20 16:38:50 -07001623 int(*head_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
1624 x->token_head_costs[tx_size_ctx][type][ref];
1625 int(*tail_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
1626 x->token_tail_costs[tx_size_ctx][type][ref];
1627 const int seg_eob = av1_get_tx_eob(&cm->seg, mbmi->segment_id, tx_size);
Yaowu Xuabe52152017-10-20 14:37:54 -07001628 int8_t eob_val;
Thomas Davies10525752017-03-06 12:10:46 +00001629
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001630#if CONFIG_HIGHBITDEPTH
Alex Converseda3d94f2017-03-15 14:54:29 -07001631 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001632#else
Alex Converseda3d94f2017-03-15 14:54:29 -07001633 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001634#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001635
Angie Chiang22ba7512016-10-20 17:10:33 -07001636 (void)cm;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001637
1638 if (eob == 0) {
hui suc0cf71d2017-07-20 16:38:50 -07001639 // block zero
1640 cost = (*head_token_costs)[pt][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001641 } else {
1642 if (use_fast_coef_costing) {
1643 int band_left = *band_count++;
1644
1645 // dc token
1646 int v = qcoeff[0];
1647 int16_t prev_t;
Alex Converseda3d94f2017-03-15 14:54:29 -07001648 cost = av1_get_token_cost(v, &prev_t, cat6_bits);
hui suc0cf71d2017-07-20 16:38:50 -07001649 eob_val = (eob == 1) ? EARLY_EOB : NO_EOB;
1650 cost += av1_get_coeff_token_cost(
1651 prev_t, eob_val, 1, (*head_token_costs)[pt], (*tail_token_costs)[pt]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001652
Yaowu Xuf883b422016-08-30 14:01:10 -07001653 token_cache[0] = av1_pt_energy_class[prev_t];
hui suc0cf71d2017-07-20 16:38:50 -07001654 ++head_token_costs;
1655 ++tail_token_costs;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001656
1657 // ac tokens
1658 for (c = 1; c < eob; c++) {
1659 const int rc = scan[c];
1660 int16_t t;
1661
1662 v = qcoeff[rc];
Alex Converseda3d94f2017-03-15 14:54:29 -07001663 cost += av1_get_token_cost(v, &t, cat6_bits);
hui suc0cf71d2017-07-20 16:38:50 -07001664 eob_val =
1665 (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
1666 cost += av1_get_coeff_token_cost(t, eob_val, 0,
1667 (*head_token_costs)[!prev_t],
1668 (*tail_token_costs)[!prev_t]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001669 prev_t = t;
1670 if (!--band_left) {
1671 band_left = *band_count++;
hui suc0cf71d2017-07-20 16:38:50 -07001672 ++head_token_costs;
1673 ++tail_token_costs;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001674 }
1675 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001676 } else { // !use_fast_coef_costing
1677 int band_left = *band_count++;
1678
1679 // dc token
1680 int v = qcoeff[0];
1681 int16_t tok;
Alex Converseda3d94f2017-03-15 14:54:29 -07001682 cost = av1_get_token_cost(v, &tok, cat6_bits);
hui suc0cf71d2017-07-20 16:38:50 -07001683 eob_val = (eob == 1) ? EARLY_EOB : NO_EOB;
1684 cost += av1_get_coeff_token_cost(tok, eob_val, 1, (*head_token_costs)[pt],
1685 (*tail_token_costs)[pt]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001686
Yaowu Xuf883b422016-08-30 14:01:10 -07001687 token_cache[0] = av1_pt_energy_class[tok];
hui suc0cf71d2017-07-20 16:38:50 -07001688 ++head_token_costs;
1689 ++tail_token_costs;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001690
Yaowu Xuc27fc142016-08-22 16:08:15 -07001691 // ac tokens
1692 for (c = 1; c < eob; c++) {
1693 const int rc = scan[c];
1694
1695 v = qcoeff[rc];
Alex Converseda3d94f2017-03-15 14:54:29 -07001696 cost += av1_get_token_cost(v, &tok, cat6_bits);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001697 pt = get_coef_context(nb, token_cache, c);
hui suc0cf71d2017-07-20 16:38:50 -07001698 eob_val =
1699 (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
1700 cost += av1_get_coeff_token_cost(
1701 tok, eob_val, 0, (*head_token_costs)[pt], (*tail_token_costs)[pt]);
Yaowu Xuf883b422016-08-30 14:01:10 -07001702 token_cache[rc] = av1_pt_energy_class[tok];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001703 if (!--band_left) {
1704 band_left = *band_count++;
hui suc0cf71d2017-07-20 16:38:50 -07001705 ++head_token_costs;
1706 ++tail_token_costs;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001707 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001708 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001709 }
1710 }
1711
Yaowu Xuc27fc142016-08-22 16:08:15 -07001712 return cost;
1713}
Angie Chiang47e4b362017-03-24 11:25:10 -07001714#endif // !CONFIG_LV_MAP
1715
Angie Chiang05917872017-04-15 12:28:56 -07001716int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
Jingning Han7eab9ff2017-07-06 10:12:54 -07001717 int blk_row, int blk_col, int block, TX_SIZE tx_size,
1718 const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a,
1719 const ENTROPY_CONTEXT *l, int use_fast_coef_costing) {
Angie Chiang3627de22017-08-18 20:15:59 -07001720 const AV1_COMMON *const cm = &cpi->common;
Angie Chiang47e4b362017-03-24 11:25:10 -07001721#if !CONFIG_LV_MAP
Jingning Han7eab9ff2017-07-06 10:12:54 -07001722 (void)blk_row;
1723 (void)blk_col;
Angie Chiang47e4b362017-03-24 11:25:10 -07001724 return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
1725 use_fast_coef_costing);
Hui Su9fa96232017-10-23 15:46:04 -07001726#else // !CONFIG_LV_MAP
Angie Chiang47e4b362017-03-24 11:25:10 -07001727 (void)scan_order;
1728 (void)use_fast_coef_costing;
1729 const MACROBLOCKD *xd = &x->e_mbd;
1730 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1731 const struct macroblockd_plane *pd = &xd->plane[plane];
1732 const BLOCK_SIZE bsize = mbmi->sb_type;
Angie Chiang47e4b362017-03-24 11:25:10 -07001733 const BLOCK_SIZE plane_bsize =
1734 AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
Angie Chiang47e4b362017-03-24 11:25:10 -07001735 TXB_CTX txb_ctx;
1736 get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
Angie Chiang3627de22017-08-18 20:15:59 -07001737 return av1_cost_coeffs_txb(cm, x, plane, blk_row, blk_col, block, tx_size,
Jingning Han7eab9ff2017-07-06 10:12:54 -07001738 &txb_ctx);
Angie Chiang47e4b362017-03-24 11:25:10 -07001739#endif // !CONFIG_LV_MAP
1740}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001741
Alex Converse61f37b82017-03-29 15:26:03 -07001742// Get transform block visible dimensions cropped to the MI units.
1743static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
1744 BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
1745 BLOCK_SIZE tx_bsize, int *width, int *height,
1746 int *visible_width, int *visible_height) {
1747 assert(tx_bsize <= plane_bsize);
1748 int txb_height = block_size_high[tx_bsize];
1749 int txb_width = block_size_wide[tx_bsize];
1750 const int block_height = block_size_high[plane_bsize];
1751 const int block_width = block_size_wide[plane_bsize];
1752 const struct macroblockd_plane *const pd = &xd->plane[plane];
1753 // TODO(aconverse@google.com): Investigate using crop_width/height here rather
1754 // than the MI size
1755 const int block_rows =
1756 (xd->mb_to_bottom_edge >= 0)
1757 ? block_height
1758 : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
1759 const int block_cols =
1760 (xd->mb_to_right_edge >= 0)
1761 ? block_width
1762 : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
1763 const int tx_unit_size = tx_size_wide_log2[0];
1764 if (width) *width = txb_width;
1765 if (height) *height = txb_height;
1766 *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
1767 *visible_height =
1768 clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
1769}
1770
Yushin Cho75b01002017-06-21 13:43:57 -07001771// Compute the pixel domain distortion from src and dst on all visible 4x4s in
1772// the
Alex Converse61f37b82017-03-29 15:26:03 -07001773// transform block.
Yushin Cho75b01002017-06-21 13:43:57 -07001774static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
1775 int plane, const uint8_t *src, const int src_stride,
1776 const uint8_t *dst, const int dst_stride,
1777 int blk_row, int blk_col,
1778 const BLOCK_SIZE plane_bsize,
1779 const BLOCK_SIZE tx_bsize) {
Alex Converse61f37b82017-03-29 15:26:03 -07001780 int txb_rows, txb_cols, visible_rows, visible_cols;
Yushin Cho75b01002017-06-21 13:43:57 -07001781 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -07001782
Alex Converse61f37b82017-03-29 15:26:03 -07001783 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
1784 &txb_cols, &txb_rows, &visible_cols, &visible_rows);
1785 assert(visible_rows > 0);
1786 assert(visible_cols > 0);
Yushin Cho75b01002017-06-21 13:43:57 -07001787
Yushin Chob7b60c52017-07-14 16:18:52 -07001788#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07001789 if (x->using_dist_8x8 && plane == 0 && txb_cols >= 8 && txb_rows >= 8)
Yushin Chofcddadf2017-08-30 13:49:38 -07001790 return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
1791 tx_bsize, txb_cols, txb_rows, visible_cols,
1792 visible_rows, x->qindex);
Yushin Chob7b60c52017-07-14 16:18:52 -07001793#endif // CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07001794
Yushin Cho2f025aa2017-09-28 17:39:21 -07001795 unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst,
1796 dst_stride, tx_bsize, txb_rows,
1797 txb_cols, visible_rows, visible_cols);
1798
Alex Converse61f37b82017-03-29 15:26:03 -07001799 return sse;
1800}
1801
Yushin Cho75b01002017-06-21 13:43:57 -07001802// Compute the pixel domain distortion from diff on all visible 4x4s in the
1803// transform block.
1804static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
1805 const int16_t *diff, const int diff_stride,
1806 int blk_row, int blk_col,
1807 const BLOCK_SIZE plane_bsize,
1808 const BLOCK_SIZE tx_bsize) {
Alex Converse61f37b82017-03-29 15:26:03 -07001809 int visible_rows, visible_cols;
Yushin Cho75b01002017-06-21 13:43:57 -07001810 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -07001811#if CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07001812 int txb_height = block_size_high[tx_bsize];
1813 int txb_width = block_size_wide[tx_bsize];
1814 const int src_stride = x->plane[plane].src.stride;
1815 const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0];
1816 const uint8_t *src = &x->plane[plane].src.buf[src_idx];
Yushin Cho75b01002017-06-21 13:43:57 -07001817#endif
1818
Alex Converse61f37b82017-03-29 15:26:03 -07001819 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
1820 NULL, &visible_cols, &visible_rows);
Yushin Cho75b01002017-06-21 13:43:57 -07001821
Yushin Chob7b60c52017-07-14 16:18:52 -07001822#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07001823 if (x->using_dist_8x8 && plane == 0 && txb_width >= 8 && txb_height >= 8)
Yushin Choe30a47c2017-08-15 13:08:30 -07001824 return av1_dist_8x8_diff(x, src, src_stride, diff, diff_stride, txb_width,
Yushin Chob7b60c52017-07-14 16:18:52 -07001825 txb_height, visible_cols, visible_rows, x->qindex);
Yushin Cho75b01002017-06-21 13:43:57 -07001826 else
1827#endif
1828 return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols,
1829 visible_rows);
Alex Converse61f37b82017-03-29 15:26:03 -07001830}
1831
Hui Su4d51bed2017-11-29 15:52:40 -08001832int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
1833 int *val_count) {
1834 const int max_pix_val = 1 << 8;
1835 memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
hui sud9a812b2017-07-06 14:34:37 -07001836 for (int r = 0; r < rows; ++r) {
1837 for (int c = 0; c < cols; ++c) {
Hui Su4d51bed2017-11-29 15:52:40 -08001838 const int this_val = src[r * stride + c];
1839 assert(this_val < max_pix_val);
1840 ++val_count[this_val];
hui sud9a812b2017-07-06 14:34:37 -07001841 }
1842 }
1843 int n = 0;
Hui Su4d51bed2017-11-29 15:52:40 -08001844 for (int i = 0; i < max_pix_val; ++i) {
hui sud9a812b2017-07-06 14:34:37 -07001845 if (val_count[i]) ++n;
1846 }
1847 return n;
1848}
1849
1850#if CONFIG_HIGHBITDEPTH
1851int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
Hui Su4d51bed2017-11-29 15:52:40 -08001852 int bit_depth, int *val_count) {
hui sud9a812b2017-07-06 14:34:37 -07001853 assert(bit_depth <= 12);
Hui Su4d51bed2017-11-29 15:52:40 -08001854 const int max_pix_val = 1 << bit_depth;
hui sud9a812b2017-07-06 14:34:37 -07001855 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
Hui Su4d51bed2017-11-29 15:52:40 -08001856 memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
hui sud9a812b2017-07-06 14:34:37 -07001857 for (int r = 0; r < rows; ++r) {
1858 for (int c = 0; c < cols; ++c) {
Hui Su4d51bed2017-11-29 15:52:40 -08001859 const int this_val = src[r * stride + c];
1860 assert(this_val < max_pix_val);
1861 ++val_count[this_val];
hui sud9a812b2017-07-06 14:34:37 -07001862 }
1863 }
1864 int n = 0;
Hui Su4d51bed2017-11-29 15:52:40 -08001865 for (int i = 0; i < max_pix_val; ++i) {
hui sud9a812b2017-07-06 14:34:37 -07001866 if (val_count[i]) ++n;
1867 }
1868 return n;
1869}
1870#endif // CONFIG_HIGHBITDEPTH
hui sud9a812b2017-07-06 14:34:37 -07001871
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001872void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
Frederic Barbier33b39f02017-11-21 11:11:24 +01001873 int blk_row, int blk_col, int eob,
1874 int reduced_tx_set) {
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001875 struct macroblockd_plane *const pd = &xd->plane[plane];
1876 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001877 const PLANE_TYPE plane_type = get_plane_type(plane);
1878 const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
1879 const TX_TYPE tx_type =
1880 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
1881 const int dst_stride = pd->dst.stride;
1882 uint8_t *dst =
1883 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01001884 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
1885 dst_stride, eob, reduced_tx_set);
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001886}
1887
Angie Chiang808d8592017-04-06 18:36:55 -07001888void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
1889 BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
1890 TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
1891 OUTPUT_STATUS output_status) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001892 MACROBLOCKD *const xd = &x->e_mbd;
1893 const struct macroblock_plane *const p = &x->plane[plane];
Yushin Chob7b60c52017-07-14 16:18:52 -07001894#if CONFIG_DIST_8X8
Yushin Cho4483e3d2017-04-18 19:41:20 -07001895 struct macroblockd_plane *const pd = &xd->plane[plane];
Yushin Chob7b60c52017-07-14 16:18:52 -07001896#else // CONFIG_DIST_8X8
Yushin Cho4483e3d2017-04-18 19:41:20 -07001897 const struct macroblockd_plane *const pd = &xd->plane[plane];
Yushin Chob7b60c52017-07-14 16:18:52 -07001898#endif // CONFIG_DIST_8X8
Yushin Cho7a428ba2017-01-12 16:28:49 -08001899
Yushin Cho55104332017-08-14 16:15:43 -07001900 if (cpi->sf.use_transform_domain_distortion
1901#if CONFIG_DIST_8X8
1902 && !x->using_dist_8x8
1903#endif
1904 ) {
hui sud2f12ba2017-04-12 10:08:43 -07001905 // Transform domain distortion computation is more efficient as it does
Yaowu Xuc27fc142016-08-22 16:08:15 -07001906 // not involve an inverse transform, but it is less accurate.
Urvang Joshi80893152017-10-27 11:51:14 -07001907 const int buffer_length = av1_get_max_eob(tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001908 int64_t this_sse;
Monty Montgomery4a05a582017-11-01 21:21:07 -04001909// TX-domain results need to shift down to Q2/D10 to match pixel
1910// domain distortion values which are in Q2^2
1911#if CONFIG_DAALA_TX
1912 int shift = (TX_COEFF_DEPTH - 10) * 2;
1913#else
Jingning Hanff705452017-04-27 11:32:15 -07001914 int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
Monty Montgomery4a05a582017-11-01 21:21:07 -04001915#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001916 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
1917 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Thomas Daede6ff6af62017-02-03 16:29:24 -08001918
Monty Montgomerya26262c2017-10-31 07:32:13 -04001919#if CONFIG_DAALA_TX
1920 *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse,
1921 xd->bd);
1922#else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001923#if CONFIG_HIGHBITDEPTH
Yi Luod61e6082017-05-26 16:14:39 -07001924 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1925 *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length,
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07001926 &this_sse, xd->bd);
Yi Luod61e6082017-05-26 16:14:39 -07001927 else
1928#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07001929 *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse);
Monty Montgomerya26262c2017-10-31 07:32:13 -04001930#endif
Yushin Chod0b77ac2017-10-20 17:33:16 -07001931
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07001932 *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift);
1933 *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001934 } else {
1935 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
Jingning Hanb9c57272016-10-25 10:15:39 -07001936 const int bsw = block_size_wide[tx_bsize];
1937 const int bsh = block_size_high[tx_bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001938 const int src_stride = x->plane[plane].src.stride;
1939 const int dst_stride = xd->plane[plane].dst.stride;
Jingning Hanb9c57272016-10-25 10:15:39 -07001940 // Scale the transform block index to pixel unit.
1941 const int src_idx = (blk_row * src_stride + blk_col)
1942 << tx_size_wide_log2[0];
1943 const int dst_idx = (blk_row * dst_stride + blk_col)
1944 << tx_size_wide_log2[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001945 const uint8_t *src = &x->plane[plane].src.buf[src_idx];
1946 const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
1947 const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1948 const uint16_t eob = p->eobs[block];
1949
Yaowu Xuc27fc142016-08-22 16:08:15 -07001950 assert(cpi != NULL);
Jingning Hanb9c57272016-10-25 10:15:39 -07001951 assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001952
Angie Chiangc0cf6c02017-04-04 13:47:27 -07001953 {
1954 const int diff_stride = block_size_wide[plane_bsize];
1955 const int diff_idx = (blk_row * diff_stride + blk_col)
1956 << tx_size_wide_log2[0];
1957 const int16_t *diff = &p->src_diff[diff_idx];
Yushin Cho75b01002017-06-21 13:43:57 -07001958 *out_sse = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
1959 plane_bsize, tx_bsize);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001960#if CONFIG_HIGHBITDEPTH
hui sub1cc1f92017-04-11 17:41:29 -07001961 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
Alex Conversef323f012017-04-24 09:26:33 -07001962 *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001963#endif // CONFIG_HIGHBITDEPTH
Angie Chiangc0cf6c02017-04-04 13:47:27 -07001964 }
Alex Conversef323f012017-04-24 09:26:33 -07001965 *out_sse *= 16;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001966
1967 if (eob) {
Angie Chiang228cc182017-04-07 15:22:16 -07001968 if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
Yushin Cho75b01002017-06-21 13:43:57 -07001969 *out_dist = pixel_dist(cpi, x, plane, src, src_stride, dst, dst_stride,
1970 blk_row, blk_col, plane_bsize, tx_bsize);
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001971 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001972#if CONFIG_HIGHBITDEPTH
Jingning Han6a9dbef2017-04-10 10:25:14 -07001973 uint8_t *recon;
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001974 DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
Jingning Han6a9dbef2017-04-10 10:25:14 -07001975
1976 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1977 recon = CONVERT_TO_BYTEPTR(recon16);
1978 else
1979 recon = (uint8_t *)recon16;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001980#else
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001981 DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001982#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001983
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001984#if CONFIG_HIGHBITDEPTH
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001985 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1986 aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
1987 NULL, 0, bsw, bsh, xd->bd);
1988 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001989#endif // CONFIG_HIGHBITDEPTH
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001990 aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL,
1991 0, bsw, bsh);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001992#if CONFIG_HIGHBITDEPTH
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001993 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001994#endif // CONFIG_HIGHBITDEPTH
Angie Chiang41fffae2017-04-03 10:33:18 -07001995
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001996 const PLANE_TYPE plane_type = get_plane_type(plane);
Jingning Han19b5c8f2017-07-06 15:10:12 -07001997 TX_TYPE tx_type =
1998 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01001999 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, recon,
2000 MAX_TX_SIZE, eob,
2001 cpi->common.reduced_tx_set_used);
Angie Chiang41fffae2017-04-03 10:33:18 -07002002
Yushin Chob7b60c52017-07-14 16:18:52 -07002003#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07002004 if (x->using_dist_8x8 && plane == 0 && (bsw < 8 || bsh < 8)) {
Yushin Cho75b01002017-06-21 13:43:57 -07002005 // Save decoded pixels for inter block in pd->pred to avoid
2006 // block_8x8_rd_txfm_daala_dist() need to produce them
2007 // by calling av1_inverse_transform_block() again.
2008 const int pred_stride = block_size_wide[plane_bsize];
2009 const int pred_idx = (blk_row * pred_stride + blk_col)
2010 << tx_size_wide_log2[0];
2011 int16_t *pred = &pd->pred[pred_idx];
2012 int i, j;
Yushin Cho4483e3d2017-04-18 19:41:20 -07002013
Yushin Cho8ab875d2017-06-23 14:47:21 -07002014#if CONFIG_HIGHBITDEPTH
2015 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2016 for (j = 0; j < bsh; j++)
2017 for (i = 0; i < bsw; i++)
2018 pred[j * pred_stride + i] =
2019 CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i];
2020 } else {
2021#endif
2022 for (j = 0; j < bsh; j++)
2023 for (i = 0; i < bsw; i++)
2024 pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
2025#if CONFIG_HIGHBITDEPTH
2026 }
2027#endif // CONFIG_HIGHBITDEPTH
Angie Chiang8f6ddec2017-04-04 17:07:00 -07002028 }
Yushin Chob7b60c52017-07-14 16:18:52 -07002029#endif // CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07002030 *out_dist =
2031 pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
2032 blk_row, blk_col, plane_bsize, tx_bsize);
Angie Chiang8f6ddec2017-04-04 17:07:00 -07002033 }
Alex Conversef323f012017-04-24 09:26:33 -07002034 *out_dist *= 16;
2035 } else {
2036 *out_dist = *out_sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002037 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002038 }
2039}
2040
Yaowu Xuc27fc142016-08-22 16:08:15 -07002041static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
2042 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
2043 struct rdcost_block_args *args = arg;
2044 MACROBLOCK *const x = args->x;
2045 MACROBLOCKD *const xd = &x->e_mbd;
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002046 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Angie Chiange50f3ec2017-04-10 15:50:33 -07002047 const AV1_COMP *cpi = args->cpi;
Angie Chiang65a39bb2017-04-11 16:50:04 -07002048 ENTROPY_CONTEXT *a = args->t_above + blk_col;
2049 ENTROPY_CONTEXT *l = args->t_left + blk_row;
Angie Chiang18ad8942017-04-11 12:37:07 -07002050 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002051 int64_t rd1, rd2, rd;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002052 RD_STATS this_rd_stats;
Yushin Choc00769a2017-09-14 14:44:30 -07002053
2054#if CONFIG_DIST_8X8
2055 // If sub8x8 tx, 8x8 or larger partition, and luma channel,
2056 // dist-8x8 disables early skip, because the distortion metrics for
2057 // sub8x8 tx (MSE) and reference distortion from 8x8 or larger partition
2058 // (new distortion metric) are different.
2059 // Exception is: dist-8x8 is enabled but still MSE is used,
2060 // i.e. "--tune=" encoder option is not used.
Yushin Chof9970a52017-10-13 12:57:13 -07002061 int bw = block_size_wide[plane_bsize];
2062 int bh = block_size_high[plane_bsize];
Yushin Choc00769a2017-09-14 14:44:30 -07002063 int disable_early_skip =
Yushin Chof9970a52017-10-13 12:57:13 -07002064 x->using_dist_8x8 && plane == 0 && bw >= 8 && bh >= 8 &&
Yushin Choc00769a2017-09-14 14:44:30 -07002065 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
2066 x->tune_metric != AOM_TUNE_PSNR;
Yushin Choa4817a62017-07-27 13:09:43 -07002067#endif // CONFIG_DIST_8X8
Yushin Cho6341f5c2017-03-24 14:36:28 -07002068
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002069 av1_init_rd_stats(&this_rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002070
2071 if (args->exit_early) return;
2072
2073 if (!is_inter_block(mbmi)) {
David Barker761b1ac2017-09-25 11:23:03 +01002074 av1_predict_intra_block_facade(cm, xd, plane, block, blk_col, blk_row,
2075 tx_size);
Angie Chiang62e54cd2017-04-06 10:45:56 -07002076 av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
2077 }
2078
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002079#if !CONFIG_TXK_SEL
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08002080// full forward transform and quantization
Sarah Parker345366a2017-06-15 12:13:01 -07002081#if DISABLE_TRELLISQ_SEARCH
2082 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08002083 AV1_XFORM_QUANT_B);
Sarah Parker345366a2017-06-15 12:13:01 -07002084#else
Angie Chiang62e54cd2017-04-06 10:45:56 -07002085 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08002086 AV1_XFORM_QUANT_FP);
Jingning Han3bce7542017-07-25 10:53:57 -07002087
Monty Montgomery4a05a582017-11-01 21:21:07 -04002088// TX-domain results need to shift down to Q2/D10 to match pixel
2089// domain distortion values which are in Q2^2
2090#if CONFIG_DAALA_TX
2091 const int shift = (TX_COEFF_DEPTH - 10) * 2;
2092#else
Jingning Han3bce7542017-07-25 10:53:57 -07002093 const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
Monty Montgomery4a05a582017-11-01 21:21:07 -04002094#endif
Jingning Han3bce7542017-07-25 10:53:57 -07002095 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
2096 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
Urvang Joshi80893152017-10-27 11:51:14 -07002097 const int buffer_length = av1_get_max_eob(tx_size);
Jingning Han3bce7542017-07-25 10:53:57 -07002098 int64_t tmp_dist;
2099 int64_t tmp;
Monty Montgomerya26262c2017-10-31 07:32:13 -04002100#if CONFIG_DAALA_TX
2101 tmp_dist =
2102 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd);
2103#else
Jingning Han3bce7542017-07-25 10:53:57 -07002104#if CONFIG_HIGHBITDEPTH
2105 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
2106 tmp_dist =
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07002107 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd);
Jingning Han3bce7542017-07-25 10:53:57 -07002108 else
2109#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07002110 tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp);
Monty Montgomerya26262c2017-10-31 07:32:13 -04002111#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07002112 tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
Jingning Han3bce7542017-07-25 10:53:57 -07002113
Yushin Choa4817a62017-07-27 13:09:43 -07002114 if (
2115#if CONFIG_DIST_8X8
Yushin Choc00769a2017-09-14 14:44:30 -07002116 disable_early_skip ||
Yushin Choa4817a62017-07-27 13:09:43 -07002117#endif
2118 RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
Jingning Han3bce7542017-07-25 10:53:57 -07002119 av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
Debargha Mukherjee51666862017-10-24 14:29:13 -07002120 a, l, CONFIG_LV_MAP);
Jingning Han3bce7542017-07-25 10:53:57 -07002121 } else {
2122 args->exit_early = 1;
2123 return;
2124 }
Sarah Parker345366a2017-06-15 12:13:01 -07002125#endif // DISABLE_TRELLISQ_SEARCH
Angie Chiang62e54cd2017-04-06 10:45:56 -07002126
2127 if (!is_inter_block(mbmi)) {
2128 struct macroblock_plane *const p = &x->plane[plane];
Angie Chiangbc2288c2017-04-09 15:41:17 -07002129 av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
Frederic Barbier33b39f02017-11-21 11:11:24 +01002130 p->eobs[block], cm->reduced_tx_set_used);
Angie Chiang808d8592017-04-06 18:36:55 -07002131 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
2132 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
2133 OUTPUT_HAS_DECODED_PIXELS);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002134 } else {
Angie Chiang808d8592017-04-06 18:36:55 -07002135 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
2136 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
2137 OUTPUT_HAS_PREDICTED_PIXELS);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002138 }
Luc Trudeauc7af36d2017-10-11 21:01:00 -04002139 rd = RDCOST(x->rdmult, 0, this_rd_stats.dist);
2140 if (args->this_rd + rd > args->best_rd) {
2141 args->exit_early = 1;
2142 return;
2143 }
Luc Trudeauaa94baa2017-04-27 10:52:21 -04002144#if CONFIG_CFL
Luc Trudeaua8474b62017-12-07 12:13:45 -05002145 if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(mbmi)) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04002146 assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04002147 cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
Luc Trudeauaa94baa2017-04-27 10:52:21 -04002148 }
Luc Trudeaub05eeae2017-08-18 15:14:30 -04002149#endif // CONFIG_CFL
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002150 const PLANE_TYPE plane_type = get_plane_type(plane);
Jingning Han19b5c8f2017-07-06 15:10:12 -07002151 const TX_TYPE tx_type =
2152 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
Debargha Mukherjeed2cfbef2017-12-03 16:15:27 -08002153
Angie Chiangbd99b382017-06-20 15:11:16 -07002154 const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
Angie Chiang05917872017-04-15 12:28:56 -07002155 this_rd_stats.rate =
Jingning Han7eab9ff2017-07-06 10:12:54 -07002156 av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
2157 scan_order, a, l, args->use_fast_coef_costing);
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002158#else // !CONFIG_TXK_SEL
Angie Chiang65a39bb2017-04-11 16:50:04 -07002159 av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
2160 tx_size, a, l, args->use_fast_coef_costing,
2161 &this_rd_stats);
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002162#endif // !CONFIG_TXK_SEL
Angie Chiang65a39bb2017-04-11 16:50:04 -07002163
Angie Chiang3963d632016-11-10 18:41:40 -08002164#if CONFIG_RD_DEBUG
Angie Chiange94556b2016-11-09 10:59:30 -08002165 av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
2166 this_rd_stats.rate);
Fergus Simpson4063a682017-02-28 16:52:22 -08002167#endif // CONFIG_RD_DEBUG
Yushin Cho6341f5c2017-03-24 14:36:28 -07002168 av1_set_txb_context(x, plane, block, tx_size, a, l);
Angie Chiangb3a12b52017-03-23 14:53:10 -07002169
Urvang Joshi70006e42017-06-14 16:08:55 -07002170 rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
2171 rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002172
2173 // TODO(jingning): temporarily enabled only for luma component
Yaowu Xuf883b422016-08-30 14:01:10 -07002174 rd = AOMMIN(rd1, rd2);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002175
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002176 this_rd_stats.skip &= !x->plane[plane].eobs[block];
Yushin Chod0b77ac2017-10-20 17:33:16 -07002177
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002178 av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
Yushin Cho7a428ba2017-01-12 16:28:49 -08002179
Yaowu Xuc27fc142016-08-22 16:08:15 -07002180 args->this_rd += rd;
2181
Yushin Chob7b60c52017-07-14 16:18:52 -07002182#if CONFIG_DIST_8X8
Yushin Choc00769a2017-09-14 14:44:30 -07002183 if (!disable_early_skip)
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002184#endif
2185 if (args->this_rd > args->best_rd) {
2186 args->exit_early = 1;
2187 return;
2188 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002189}
2190
Yushin Chob7b60c52017-07-14 16:18:52 -07002191#if CONFIG_DIST_8X8
2192static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
2193 BLOCK_SIZE bsize,
2194 struct rdcost_block_args *args) {
Yushin Cho7a428ba2017-01-12 16:28:49 -08002195 MACROBLOCKD *const xd = &x->e_mbd;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002196 const struct macroblockd_plane *const pd = &xd->plane[0];
2197 const struct macroblock_plane *const p = &x->plane[0];
Yushin Cho4483e3d2017-04-18 19:41:20 -07002198 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002199 const int src_stride = p->src.stride;
2200 const int dst_stride = pd->dst.stride;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002201 const uint8_t *src = &p->src.buf[0];
2202 const uint8_t *dst = &pd->dst.buf[0];
2203 const int16_t *pred = &pd->pred[0];
Yushin Cho2f025aa2017-09-28 17:39:21 -07002204 int bw = block_size_wide[bsize];
2205 int bh = block_size_high[bsize];
2206 int visible_w = bw;
2207 int visible_h = bh;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002208
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002209 int i, j;
2210 int64_t rd, rd1, rd2;
Yushin Chof0049ba2017-11-20 15:22:43 -08002211 int64_t sse = INT64_MAX, dist = INT64_MAX;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002212 int qindex = x->qindex;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002213
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002214 assert((bw & 0x07) == 0);
2215 assert((bh & 0x07) == 0);
Yushin Cho7a428ba2017-01-12 16:28:49 -08002216
Yushin Cho2f025aa2017-09-28 17:39:21 -07002217 get_txb_dimensions(xd, 0, bsize, 0, 0, bsize, &bw, &bh, &visible_w,
2218 &visible_h);
2219
Yushin Chof0049ba2017-11-20 15:22:43 -08002220 const int diff_stride = block_size_wide[bsize];
2221 const int16_t *diff = p->src_diff;
2222 sse = av1_dist_8x8_diff(x, src, src_stride, diff, diff_stride, bw, bh,
2223 visible_w, visible_h, qindex);
2224 sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
2225 sse *= 16;
Yushin Cho4483e3d2017-04-18 19:41:20 -07002226
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002227 if (!is_inter_block(mbmi)) {
Yushin Chof0049ba2017-11-20 15:22:43 -08002228 dist = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, bsize, bw, bh,
2229 visible_w, visible_h, qindex);
2230 dist *= 16;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002231 } else {
Yushin Chof0049ba2017-11-20 15:22:43 -08002232// For inter mode, the decoded pixels are provided in pd->pred,
2233// while the predicted pixels are in dst.
2234#if CONFIG_HIGHBITDEPTH
2235 uint8_t *pred8;
2236 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
2237
2238 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
2239 pred8 = CONVERT_TO_BYTEPTR(pred16);
2240 else
2241 pred8 = (uint8_t *)pred16;
2242#else
2243 DECLARE_ALIGNED(16, uint8_t, pred8[MAX_SB_SQUARE]);
2244#endif // CONFIG_HIGHBITDEPTH
2245
2246#if CONFIG_HIGHBITDEPTH
2247 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2248 for (j = 0; j < bh; j++)
2249 for (i = 0; i < bw; i++)
2250 CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i];
2251 } else {
2252#endif
2253 for (j = 0; j < bh; j++)
2254 for (i = 0; i < bw; i++) pred8[j * bw + i] = (uint8_t)pred[j * bw + i];
2255#if CONFIG_HIGHBITDEPTH
Yushin Cho2f025aa2017-09-28 17:39:21 -07002256 }
Yushin Chof0049ba2017-11-20 15:22:43 -08002257#endif // CONFIG_HIGHBITDEPTH
2258
2259 dist = av1_dist_8x8(cpi, x, src, src_stride, pred8, bw, bsize, bw, bh,
2260 visible_w, visible_h, qindex);
2261 dist *= 16;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002262 }
2263
Yushin Chof0049ba2017-11-20 15:22:43 -08002264 if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8) {
2265 assert(args->rd_stats.sse == sse);
2266 assert(args->rd_stats.dist == dist);
2267 }
2268 args->rd_stats.sse = sse;
2269 args->rd_stats.dist = dist;
2270
Urvang Joshi70006e42017-06-14 16:08:55 -07002271 rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist);
2272 rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse);
Yushin Cho7a428ba2017-01-12 16:28:49 -08002273 rd = AOMMIN(rd1, rd2);
2274
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002275 args->rd_stats.rdcost = rd;
2276 args->this_rd = rd;
Yushin Cho04eb9592017-06-21 17:35:06 -07002277
2278 if (args->this_rd > args->best_rd) args->exit_early = 1;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002279}
Yushin Chob7b60c52017-07-14 16:18:52 -07002280#endif // CONFIG_DIST_8X8
Yushin Cho7a428ba2017-01-12 16:28:49 -08002281
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002282static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
2283 RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
2284 BLOCK_SIZE bsize, TX_SIZE tx_size,
2285 int use_fast_coef_casting) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002286 MACROBLOCKD *const xd = &x->e_mbd;
2287 const struct macroblockd_plane *const pd = &xd->plane[plane];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002288 struct rdcost_block_args args;
Yaowu Xuf883b422016-08-30 14:01:10 -07002289 av1_zero(args);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002290 args.x = x;
2291 args.cpi = cpi;
2292 args.best_rd = ref_best_rd;
2293 args.use_fast_coef_costing = use_fast_coef_casting;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002294 av1_init_rd_stats(&args.rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002295
2296 if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
2297
Yaowu Xuf883b422016-08-30 14:01:10 -07002298 av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002299
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002300 av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
2301 &args);
Yushin Chob7b60c52017-07-14 16:18:52 -07002302#if CONFIG_DIST_8X8
Yushin Chof9970a52017-10-13 12:57:13 -07002303 int bw = block_size_wide[bsize];
2304 int bh = block_size_high[bsize];
2305
2306 if (x->using_dist_8x8 && !args.exit_early && plane == 0 && bw >= 8 &&
2307 bh >= 8 && (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
Yushin Chob7b60c52017-07-14 16:18:52 -07002308 dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args);
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002309#endif
Yushin Cho7a428ba2017-01-12 16:28:49 -08002310
Yaowu Xuc27fc142016-08-22 16:08:15 -07002311 if (args.exit_early) {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002312 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002313 } else {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002314 *rd_stats = args.rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002315 }
2316}
2317
Yue Chen3dd03e32017-10-17 15:39:52 -07002318static int tx_size_cost(const AV1_COMMON *const cm, const MACROBLOCK *const x,
Urvang Joshiab8840e2017-10-06 16:38:24 -07002319 BLOCK_SIZE bsize, TX_SIZE tx_size) {
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002320 const MACROBLOCKD *const xd = &x->e_mbd;
2321 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Urvang Joshifeb925f2016-12-05 10:37:29 -08002322
Rupert Swarbrickfcff0b22017-10-05 09:26:04 +01002323 if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) {
Urvang Joshifeb925f2016-12-05 10:37:29 -08002324 const int is_inter = is_inter_block(mbmi);
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08002325 const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, is_inter);
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002326 const int depth = tx_size_to_depth(tx_size, bsize, is_inter);
Urvang Joshiab8840e2017-10-06 16:38:24 -07002327 const int tx_size_ctx = get_tx_size_context(xd);
Yue Chenb23d00a2017-07-28 17:01:21 -07002328 int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
Urvang Joshifeb925f2016-12-05 10:37:29 -08002329 return r_tx_size;
2330 } else {
2331 return 0;
2332 }
2333}
2334
Hui Suddbcde22017-09-18 17:22:02 -07002335// TODO(angiebird): use this function whenever it's possible
Yue Chenb23d00a2017-07-28 17:01:21 -07002336int av1_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
2337 const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
2338 TX_SIZE tx_size, TX_TYPE tx_type) {
Angie Chiang05917872017-04-15 12:28:56 -07002339 if (plane > 0) return 0;
2340
Jingning Han243b66b2017-06-23 12:11:47 -07002341 tx_size = get_min_tx_size(tx_size);
Jingning Han243b66b2017-06-23 12:11:47 -07002342
Angie Chiang65201562017-04-10 15:23:28 -07002343 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2344 const int is_inter = is_inter_block(mbmi);
Angie Chiang65201562017-04-10 15:23:28 -07002345 if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
2346 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
2347 const int ext_tx_set =
2348 get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
2349 if (is_inter) {
2350 if (ext_tx_set > 0)
Yue Chenb23d00a2017-07-28 17:01:21 -07002351 return x
Angie Chiang65201562017-04-10 15:23:28 -07002352 ->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]][tx_type];
2353 } else {
Yue Chen57b8ff62017-10-10 23:37:31 -07002354 if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) {
2355#if CONFIG_FILTER_INTRA
2356 PREDICTION_MODE intra_dir;
2357 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0])
2358 intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
2359 .filter_intra_mode[0]];
2360 else
2361 intra_dir = mbmi->mode;
2362 return x->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
2363 [intra_dir][tx_type];
2364#else
Yue Chenb23d00a2017-07-28 17:01:21 -07002365 return x->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
2366 [mbmi->mode][tx_type];
Yue Chen57b8ff62017-10-10 23:37:31 -07002367#endif
2368 }
Angie Chiang65201562017-04-10 15:23:28 -07002369 }
2370 }
Angie Chiang65201562017-04-10 15:23:28 -07002371 return 0;
2372}
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002373static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2374 RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
Urvang Joshi9752a2e2017-10-02 17:32:27 -07002375 TX_TYPE tx_type, TX_SIZE tx_size) {
Urvang Joshi52648442016-10-13 17:27:51 -07002376 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002377 MACROBLOCKD *const xd = &x->e_mbd;
2378 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2379 int64_t rd = INT64_MAX;
Zoe Liu1eed2df2017-10-16 17:13:15 -07002380 const int skip_ctx = av1_get_skip_context(xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002381 int s0, s1;
2382 const int is_inter = is_inter_block(mbmi);
Jingning Hanbf9c6b72016-12-14 14:50:45 -08002383 const int tx_select =
2384 cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8;
Urvang Joshifeb925f2016-12-05 10:37:29 -08002385
Yue Chen3dd03e32017-10-17 15:39:52 -07002386 const int r_tx_size = tx_size_cost(cm, x, bs, tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002387
Yaowu Xuc27fc142016-08-22 16:08:15 -07002388 assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
Yaowu Xuc27fc142016-08-22 16:08:15 -07002389
Zoe Liu1eed2df2017-10-16 17:13:15 -07002390 s0 = x->skip_cost[skip_ctx][0];
2391 s1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002392
2393 mbmi->tx_type = tx_type;
2394 mbmi->tx_size = tx_size;
Yue Chen95e13e22017-11-01 23:56:35 -07002395#if CONFIG_FILTER_INTRA
2396 if (!is_inter_block(mbmi) &&
2397 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] &&
Yue Chen18f6c152017-11-06 11:23:47 -08002398 !av1_filter_intra_allowed_txsize(tx_size)) {
2399 rd_stats->rate = INT_MAX;
Urvang Joshife8323c2017-11-17 15:23:51 -08002400 // Note: Initialize following to avoid uninitialied warnings.
2401 rd_stats->dist = INT64_MAX;
2402 rd_stats->skip = 0;
2403 rd_stats->sse = INT64_MAX;
Yue Chen95e13e22017-11-01 23:56:35 -07002404 return INT64_MAX;
Yue Chen18f6c152017-11-06 11:23:47 -08002405 }
Yue Chen95e13e22017-11-01 23:56:35 -07002406#endif
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002407 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, tx_size,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002408 cpi->sf.use_fast_coef_costing);
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002409 if (rd_stats->rate == INT_MAX) return INT64_MAX;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002410#if !CONFIG_TXK_SEL
Angie Chiang05917872017-04-15 12:28:56 -07002411 int plane = 0;
Yue Chenb23d00a2017-07-28 17:01:21 -07002412 rd_stats->rate += av1_tx_type_cost(cm, x, xd, bs, plane, tx_size, tx_type);
Angie Chiang05917872017-04-15 12:28:56 -07002413#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002414
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002415 if (rd_stats->skip) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002416 if (is_inter) {
Urvang Joshi70006e42017-06-14 16:08:55 -07002417 rd = RDCOST(x->rdmult, s1, rd_stats->sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002418 } else {
Urvang Joshi70006e42017-06-14 16:08:55 -07002419 rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002420 }
2421 } else {
Urvang Joshi70006e42017-06-14 16:08:55 -07002422 rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select,
2423 rd_stats->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002424 }
2425
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002426 if (tx_select) rd_stats->rate += r_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002427
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002428 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
2429 !(rd_stats->skip))
Urvang Joshi70006e42017-06-14 16:08:55 -07002430 rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07002431
2432 return rd;
2433}
2434
Angie Chiang2d147c12017-04-05 11:23:59 -07002435static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002436 TX_TYPE tx_type, TX_SIZE tx_size, int prune) {
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002437 const MACROBLOCKD *const xd = &x->e_mbd;
2438 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Angie Chiang2d147c12017-04-05 11:23:59 -07002439 const int is_inter = is_inter_block(mbmi);
Angie Chiang2d147c12017-04-05 11:23:59 -07002440
Angie Chianga4fa1902017-04-05 15:26:09 -07002441 if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002442 if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
2443 return 1;
2444 if (!is_inter && x->use_default_intra_tx_type &&
2445 tx_type != get_default_tx_type(0, xd, 0, tx_size))
2446 return 1;
2447 if (is_inter && x->use_default_inter_tx_type &&
2448 tx_type != get_default_tx_type(0, xd, 0, tx_size))
2449 return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002450 const AV1_COMMON *const cm = &cpi->common;
Hui Suddbcde22017-09-18 17:22:02 -07002451 const TxSetType tx_set_type =
2452 get_ext_tx_set_type(tx_size, bs, is_inter, cm->reduced_tx_set_used);
2453 if (!av1_ext_tx_used[tx_set_type][tx_type]) return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002454 if (is_inter) {
Angie Chiang2d147c12017-04-05 11:23:59 -07002455 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002456 if (!do_tx_type_search(tx_type, prune, cpi->sf.tx_type_search.prune_mode))
2457 return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002458 }
2459 } else {
2460 if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
2461 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) return 1;
2462 }
Angie Chiang2d147c12017-04-05 11:23:59 -07002463 }
Angie Chiang2d147c12017-04-05 11:23:59 -07002464 return 0;
2465}
2466
Urvang Joshi52648442016-10-13 17:27:51 -07002467static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
2468 MACROBLOCK *x, int *r, int64_t *d, int *s,
2469 int64_t *sse, int64_t ref_best_rd) {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002470 RD_STATS rd_stats;
2471 int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs, DCT_DCT,
2472 max_txsize_lookup[bs]);
2473 *r = rd_stats.rate;
2474 *d = rd_stats.dist;
2475 *s = rd_stats.skip;
2476 *sse = rd_stats.sse;
2477 return rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002478}
Yaowu Xuc27fc142016-08-22 16:08:15 -07002479
Urvang Joshi52648442016-10-13 17:27:51 -07002480static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002481 RD_STATS *rd_stats, int64_t ref_best_rd,
Urvang Joshi52648442016-10-13 17:27:51 -07002482 BLOCK_SIZE bs) {
2483 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002484 MACROBLOCKD *const xd = &x->e_mbd;
2485 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2486 TX_TYPE tx_type, best_tx_type = DCT_DCT;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002487 int64_t this_rd, best_rd = INT64_MAX;
Zoe Liu1eed2df2017-10-16 17:13:15 -07002488 const int skip_ctx = av1_get_skip_context(xd);
2489 int s0 = x->skip_cost[skip_ctx][0];
2490 int s1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002491 const int is_inter = is_inter_block(mbmi);
2492 int prune = 0;
Angie Chiangaa0c34b2017-04-25 12:25:38 -07002493 const int plane = 0;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002494 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002495
2496 mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
Jingning Hane67b38a2016-11-04 10:30:00 -07002497 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
Hui Suddbcde22017-09-18 17:22:02 -07002498 const TxSetType tx_set_type =
2499 get_ext_tx_set_type(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002500
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002501 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2502 !x->use_default_inter_tx_type) {
Alexander Bokov79a37242017-09-29 11:25:55 -07002503 prune = prune_tx(cpi, bs, x, xd, tx_set_type, 0);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002504 }
Sarah Parkere68a3e42017-02-16 14:03:24 -08002505 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used) >
2506 1 &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07002507 !xd->lossless[mbmi->segment_id]) {
Yushin Cho77bba8d2016-11-04 16:36:56 -07002508 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
Hui Suddbcde22017-09-18 17:22:02 -07002509 if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002510 RD_STATS this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002511 if (is_inter) {
2512 if (x->use_default_inter_tx_type &&
2513 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2514 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002515 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002516 if (!do_tx_type_search(tx_type, prune,
2517 cpi->sf.tx_type_search.prune_mode))
2518 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002519 }
2520 } else {
2521 if (x->use_default_intra_tx_type &&
2522 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2523 continue;
2524 if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
2525 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
2526 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002527 }
2528
2529 mbmi->tx_type = tx_type;
2530
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002531 txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002532 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Yushin Chod0b77ac2017-10-20 17:33:16 -07002533
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002534 if (this_rd_stats.rate == INT_MAX) continue;
Yue Chenb23d00a2017-07-28 17:01:21 -07002535 av1_tx_type_cost(cm, x, xd, bs, plane, mbmi->tx_size, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002536
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002537 if (this_rd_stats.skip)
Urvang Joshi70006e42017-06-14 16:08:55 -07002538 this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002539 else
Urvang Joshi70006e42017-06-14 16:08:55 -07002540 this_rd =
2541 RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002542 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
2543 !this_rd_stats.skip)
Urvang Joshi70006e42017-06-14 16:08:55 -07002544 this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07002545
2546 if (this_rd < best_rd) {
2547 best_rd = this_rd;
2548 best_tx_type = mbmi->tx_type;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002549 *rd_stats = this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002550 }
2551 }
Yushin Chod0b77ac2017-10-20 17:33:16 -07002552
Guillaume Martres4e4d3a02016-08-21 19:02:33 -07002553 } else {
2554 mbmi->tx_type = DCT_DCT;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002555 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2556 cpi->sf.use_fast_coef_costing);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002557 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002558 mbmi->tx_type = best_tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002559}
2560
Urvang Joshi52648442016-10-13 17:27:51 -07002561static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002562 RD_STATS *rd_stats, int64_t ref_best_rd,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002563 BLOCK_SIZE bs) {
2564 MACROBLOCKD *const xd = &x->e_mbd;
2565 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2566
2567 mbmi->tx_size = TX_4X4;
2568 mbmi->tx_type = DCT_DCT;
Jingning Hane67b38a2016-11-04 10:30:00 -07002569 mbmi->min_tx_size = get_min_tx_size(TX_4X4);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002570
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002571 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2572 cpi->sf.use_fast_coef_costing);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002573}
2574
Angie Chiangf1cb0752017-04-10 16:01:20 -07002575static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
2576 int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
2577 return num_blk;
2578}
Angie Chiangf1cb0752017-04-10 16:01:20 -07002579
Urvang Joshi52648442016-10-13 17:27:51 -07002580static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002581 MACROBLOCK *x, RD_STATS *rd_stats,
2582 int64_t ref_best_rd, BLOCK_SIZE bs) {
Angie Chianga4fa1902017-04-05 15:26:09 -07002583 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002584 MACROBLOCKD *const xd = &x->e_mbd;
2585 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002586 int64_t rd = INT64_MAX;
Angie Chianga4fa1902017-04-05 15:26:09 -07002587 int n;
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002588 int start_tx;
2589 int depth;
Angie Chianga4fa1902017-04-05 15:26:09 -07002590 int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002591 const int is_inter = is_inter_block(mbmi);
2592 const TX_SIZE max_rect_tx_size = get_max_rect_tx_size(bs, is_inter);
2593 TX_SIZE best_tx_size = max_rect_tx_size;
Angie Chianga4fa1902017-04-05 15:26:09 -07002594 TX_TYPE best_tx_type = DCT_DCT;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002595#if CONFIG_TXK_SEL
Angie Chiangf1cb0752017-04-10 16:01:20 -07002596 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002597#endif // CONFIG_TXK_SEL
Angie Chianga4fa1902017-04-05 15:26:09 -07002598 const int tx_select = cm->tx_mode == TX_MODE_SELECT;
Angie Chianga4fa1902017-04-05 15:26:09 -07002599
2600 av1_invalid_rd_stats(rd_stats);
2601
Angie Chianga4fa1902017-04-05 15:26:09 -07002602 if (tx_select) {
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002603 start_tx = max_rect_tx_size;
2604 depth = 0;
Angie Chianga4fa1902017-04-05 15:26:09 -07002605 } else {
2606 const TX_SIZE chosen_tx_size =
2607 tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2608 start_tx = chosen_tx_size;
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002609 depth = MAX_TX_DEPTH;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002610 }
2611
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002612 int prune = 0;
2613 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2614 !x->use_default_inter_tx_type) {
Alexander Bokov79a37242017-09-29 11:25:55 -07002615 prune = prune_tx(cpi, bs, x, xd, EXT_TX_SET_ALL16, 0);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002616 }
2617
Angie Chianga4fa1902017-04-05 15:26:09 -07002618 last_rd = INT64_MAX;
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08002619 for (n = start_tx; depth <= MAX_TX_DEPTH;
2620 depth++, n = sub_tx_size_map[0][n]) {
Angie Chiangf1cb0752017-04-10 16:01:20 -07002621 TX_TYPE tx_start = DCT_DCT;
2622 TX_TYPE tx_end = TX_TYPES;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002623#if CONFIG_TXK_SEL
Angie Chiangf1cb0752017-04-10 16:01:20 -07002624 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2625 // performed in av1_search_txk_type()
2626 tx_end = DCT_DCT + 1;
2627#endif
Angie Chianga4fa1902017-04-05 15:26:09 -07002628 TX_TYPE tx_type;
Angie Chiangf1cb0752017-04-10 16:01:20 -07002629 for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
Angie Chianga4fa1902017-04-05 15:26:09 -07002630 RD_STATS this_rd_stats;
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002631 if (skip_txfm_search(cpi, x, bs, tx_type, n, prune)) continue;
Angie Chianga4fa1902017-04-05 15:26:09 -07002632 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, n);
Yushin Chod0b77ac2017-10-20 17:33:16 -07002633
Angie Chianga4fa1902017-04-05 15:26:09 -07002634 // Early termination in transform size search.
2635 if (cpi->sf.tx_size_search_breakout &&
2636 (rd == INT64_MAX ||
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002637 (this_rd_stats.skip == 1 && tx_type != DCT_DCT && n != start_tx) ||
2638 (n != (int)start_tx && rd > last_rd))) {
Angie Chianga4fa1902017-04-05 15:26:09 -07002639 break;
Lester Lu432012f2017-08-17 14:39:29 -07002640 }
Angie Chianga4fa1902017-04-05 15:26:09 -07002641
2642 last_rd = rd;
Hui Suda816a12017-08-18 14:46:02 -07002643 ref_best_rd = AOMMIN(rd, ref_best_rd);
Angie Chianga4fa1902017-04-05 15:26:09 -07002644 if (rd < best_rd) {
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002645#if CONFIG_TXK_SEL
Angie Chiangbce07f12017-12-01 16:34:31 -08002646 memcpy(best_txk_type, mbmi->txk_type,
2647 sizeof(best_txk_type[0]) * MAX_SB_SQUARE /
2648 (TX_SIZE_W_MIN * TX_SIZE_H_MIN));
Angie Chiangf1cb0752017-04-10 16:01:20 -07002649#endif
Angie Chianga4fa1902017-04-05 15:26:09 -07002650 best_tx_type = tx_type;
2651 best_tx_size = n;
2652 best_rd = rd;
2653 *rd_stats = this_rd_stats;
2654 }
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07002655#if !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
Angie Chianga4fa1902017-04-05 15:26:09 -07002656 const int is_inter = is_inter_block(mbmi);
2657 if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07002658#endif // !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
Angie Chianga4fa1902017-04-05 15:26:09 -07002659 }
2660 }
2661 mbmi->tx_size = best_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002662 mbmi->tx_type = best_tx_type;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002663#if CONFIG_TXK_SEL
Angie Chiangbce07f12017-12-01 16:34:31 -08002664 memcpy(mbmi->txk_type, best_txk_type,
2665 sizeof(best_txk_type[0]) * MAX_SB_SQUARE /
2666 (TX_SIZE_W_MIN * TX_SIZE_H_MIN));
Angie Chiangf1cb0752017-04-10 16:01:20 -07002667#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002668
Jingning Hane67b38a2016-11-04 10:30:00 -07002669 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002670}
2671
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002672static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2673 RD_STATS *rd_stats, BLOCK_SIZE bs,
2674 int64_t ref_best_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002675 MACROBLOCKD *xd = &x->e_mbd;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002676 av1_init_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002677
2678 assert(bs == xd->mi[0]->mbmi.sb_type);
2679
Yaowu Xu1e2aae12017-02-27 16:33:14 -08002680 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002681 choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002682 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002683 choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002684 } else {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002685 choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002686 }
2687}
2688
2689static int conditional_skipintra(PREDICTION_MODE mode,
2690 PREDICTION_MODE best_intra_mode) {
2691 if (mode == D117_PRED && best_intra_mode != V_PRED &&
2692 best_intra_mode != D135_PRED)
2693 return 1;
2694 if (mode == D63_PRED && best_intra_mode != V_PRED &&
2695 best_intra_mode != D45_PRED)
2696 return 1;
2697 if (mode == D207_PRED && best_intra_mode != H_PRED &&
2698 best_intra_mode != D45_PRED)
2699 return 1;
2700 if (mode == D153_PRED && best_intra_mode != H_PRED &&
2701 best_intra_mode != D135_PRED)
2702 return 1;
2703 return 0;
2704}
2705
hui su308a6392017-01-12 14:49:57 -08002706// Model based RD estimation for luma intra blocks.
2707static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
hui su9a416f52017-01-13 11:37:53 -08002708 BLOCK_SIZE bsize, int mode_cost) {
David Barker761b1ac2017-09-25 11:23:03 +01002709 const AV1_COMMON *cm = &cpi->common;
hui su308a6392017-01-12 14:49:57 -08002710 MACROBLOCKD *const xd = &x->e_mbd;
2711 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07002712 assert(!is_inter_block(mbmi));
hui su308a6392017-01-12 14:49:57 -08002713 RD_STATS this_rd_stats;
2714 int row, col;
2715 int64_t temp_sse, this_rd;
2716 const TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cpi->common.tx_mode, 0);
2717 const int stepr = tx_size_high_unit[tx_size];
2718 const int stepc = tx_size_wide_unit[tx_size];
2719 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
2720 const int max_blocks_high = max_block_high(xd, bsize, 0);
2721 mbmi->tx_size = tx_size;
2722 // Prediction.
Angie Chiang3d005e42017-04-02 16:31:35 -07002723 const int step = stepr * stepc;
2724 int block = 0;
hui su308a6392017-01-12 14:49:57 -08002725 for (row = 0; row < max_blocks_high; row += stepr) {
2726 for (col = 0; col < max_blocks_wide; col += stepc) {
David Barker761b1ac2017-09-25 11:23:03 +01002727 av1_predict_intra_block_facade(cm, xd, 0, block, col, row, tx_size);
Angie Chiang3d005e42017-04-02 16:31:35 -07002728 block += step;
hui su308a6392017-01-12 14:49:57 -08002729 }
2730 }
2731 // RD estimation.
2732 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
2733 &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
hui su9a416f52017-01-13 11:37:53 -08002734#if CONFIG_EXT_INTRA
Joe Young830d4ce2017-05-30 17:48:13 -07002735 if (av1_is_directional_mode(mbmi->mode, bsize) &&
2736 av1_use_angle_delta(bsize)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07002737#if CONFIG_EXT_INTRA_MOD
2738 mode_cost += x->angle_delta_cost[mbmi->mode - V_PRED]
2739 [MAX_ANGLE_DELTA + mbmi->angle_delta[0]];
2740#else
hui su0a6731f2017-04-26 15:23:47 -07002741 mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
2742 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
Joe Young3ca43bf2017-10-06 15:12:46 -07002743#endif // CONFIG_EXT_INTRA_MOD
hui su9a416f52017-01-13 11:37:53 -08002744 }
2745#endif // CONFIG_EXT_INTRA
hui su8f4cc0a2017-01-13 15:14:49 -08002746#if CONFIG_FILTER_INTRA
Yue Chen4eba69b2017-11-09 22:37:35 -08002747 if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
hui su8f4cc0a2017-01-13 15:14:49 -08002748 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
2749 const int mode = mbmi->filter_intra_mode_info.filter_intra_mode[0];
Yue Chen4eba69b2017-11-09 22:37:35 -08002750 mode_cost += x->filter_intra_cost[mbmi->tx_size][1] +
2751 x->filter_intra_mode_cost[0][mode];
hui su8f4cc0a2017-01-13 15:14:49 -08002752 } else {
Yue Chen4eba69b2017-11-09 22:37:35 -08002753 mode_cost += x->filter_intra_cost[mbmi->tx_size][0];
hui su8f4cc0a2017-01-13 15:14:49 -08002754 }
2755 }
2756#endif // CONFIG_FILTER_INTRA
Urvang Joshi70006e42017-06-14 16:08:55 -07002757 this_rd =
2758 RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
hui su308a6392017-01-12 14:49:57 -08002759 return this_rd;
2760}
2761
Urvang Joshi56ba91b2017-01-10 13:22:09 -08002762// Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
2763// new_height'. Extra rows and columns are filled in by copying last valid
2764// row/column.
2765static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
2766 int orig_height, int new_width,
2767 int new_height) {
2768 int j;
2769 assert(new_width >= orig_width);
2770 assert(new_height >= orig_height);
2771 if (new_width == orig_width && new_height == orig_height) return;
2772
2773 for (j = orig_height - 1; j >= 0; --j) {
2774 memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
2775 // Copy last column to extra columns.
2776 memset(color_map + j * new_width + orig_width,
2777 color_map[j * new_width + orig_width - 1], new_width - orig_width);
2778 }
2779 // Copy last row to extra rows.
2780 for (j = orig_height; j < new_height; ++j) {
2781 memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
2782 new_width);
2783 }
2784}
2785
hui su33567b22017-04-30 16:40:19 -07002786#if CONFIG_PALETTE_DELTA_ENCODING
2787// Bias toward using colors in the cache.
2788// TODO(huisu): Try other schemes to improve compression.
2789static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
2790 int n_colors, int stride,
2791 float *centroids) {
2792 if (n_cache <= 0) return;
2793 for (int i = 0; i < n_colors * stride; i += stride) {
2794 float min_diff = fabsf(centroids[i] - color_cache[0]);
2795 int idx = 0;
2796 for (int j = 1; j < n_cache; ++j) {
2797 float this_diff = fabsf(centroids[i] - color_cache[j]);
2798 if (this_diff < min_diff) {
2799 min_diff = this_diff;
2800 idx = j;
2801 }
2802 }
2803 if (min_diff < 1.5) centroids[i] = color_cache[idx];
2804 }
2805}
2806#endif // CONFIG_PALETTE_DELTA_ENCODING
2807
Hui Su4d51bed2017-11-29 15:52:40 -08002808// Given the base colors as specified in centroids[], calculate the RD cost
2809// of palette mode.
2810static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x,
2811 MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, int palette_ctx,
2812 int dc_mode_cost, const float *data, float *centroids,
2813 int n,
2814#if CONFIG_PALETTE_DELTA_ENCODING
2815 uint16_t *color_cache, int n_cache,
2816#endif
2817 MB_MODE_INFO *best_mbmi,
2818 uint8_t *best_palette_color_map, int64_t *best_rd,
2819 int64_t *best_model_rd, int *rate, int *rate_tokenonly,
2820 int *rate_overhead, int64_t *distortion,
2821 int *skippable) {
2822#if CONFIG_PALETTE_DELTA_ENCODING
2823 optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
2824#endif // CONFIG_PALETTE_DELTA_ENCODING
2825 int k = av1_remove_duplicates(centroids, n);
2826 if (k < PALETTE_MIN_SIZE) {
2827 // Too few unique colors to create a palette. And DC_PRED will work
2828 // well for that case anyway. So skip.
2829 return;
2830 }
2831 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
2832#if CONFIG_HIGHBITDEPTH
2833 if (cpi->common.use_highbitdepth)
2834 for (int i = 0; i < k; ++i)
2835 pmi->palette_colors[i] =
2836 clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth);
2837 else
2838#endif // CONFIG_HIGHBITDEPTH
2839 for (int i = 0; i < k; ++i)
2840 pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
2841 pmi->palette_size[0] = k;
2842 MACROBLOCKD *const xd = &x->e_mbd;
2843 uint8_t *const color_map = xd->plane[0].color_index_map;
2844 int block_width, block_height, rows, cols;
2845 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
2846 &cols);
2847 av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
2848 extend_palette_color_map(color_map, cols, rows, block_width, block_height);
2849 int palette_mode_cost =
2850 dc_mode_cost +
2851 x->palette_y_size_cost[bsize - BLOCK_8X8][k - PALETTE_MIN_SIZE] +
2852 write_uniform_cost(k, color_map[0]) +
2853 x->palette_y_mode_cost[bsize - BLOCK_8X8][palette_ctx][1];
2854 palette_mode_cost += av1_palette_color_cost_y(pmi,
2855#if CONFIG_PALETTE_DELTA_ENCODING
2856 color_cache, n_cache,
2857#endif // CONFIG_PALETTE_DELTA_ENCODING
2858 cpi->common.bit_depth);
2859 palette_mode_cost +=
2860 av1_cost_color_map(x, 0, 0, bsize, mbmi->tx_size, PALETTE_MAP);
2861 int64_t this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
2862 if (*best_model_rd != INT64_MAX &&
2863 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
2864 return;
2865 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
2866 RD_STATS tokenonly_rd_stats;
2867 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
2868 if (tokenonly_rd_stats.rate == INT_MAX) return;
2869 int this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
2870 int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
2871 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
2872 tokenonly_rd_stats.rate -=
2873 tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
2874 }
2875 if (this_rd < *best_rd) {
2876 *best_rd = this_rd;
2877 memcpy(best_palette_color_map, color_map,
2878 block_width * block_height * sizeof(color_map[0]));
2879 *best_mbmi = *mbmi;
2880 *rate_overhead = this_rate - tokenonly_rd_stats.rate;
2881 if (rate) *rate = this_rate;
2882 if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
2883 if (distortion) *distortion = tokenonly_rd_stats.dist;
2884 if (skippable) *skippable = tokenonly_rd_stats.skip;
2885 }
2886}
2887
hui sude0c70a2017-01-09 17:12:17 -08002888static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
2889 BLOCK_SIZE bsize, int palette_ctx,
2890 int dc_mode_cost, MB_MODE_INFO *best_mbmi,
2891 uint8_t *best_palette_color_map,
hui su78c611a2017-01-13 17:06:04 -08002892 int64_t *best_rd, int64_t *best_model_rd,
2893 int *rate, int *rate_tokenonly,
2894 int64_t *distortion, int *skippable) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002895 int rate_overhead = 0;
2896 MACROBLOCKD *const xd = &x->e_mbd;
2897 MODE_INFO *const mic = xd->mi[0];
hui sude0c70a2017-01-09 17:12:17 -08002898 MB_MODE_INFO *const mbmi = &mic->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07002899 assert(!is_inter_block(mbmi));
Urvang Joshic9e71d42017-08-09 18:58:33 -07002900 assert(bsize >= BLOCK_8X8);
Hui Su4d51bed2017-11-29 15:52:40 -08002901 int colors, n;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002902 const int src_stride = x->plane[0].src.stride;
2903 const uint8_t *const src = x->plane[0].src.buf;
hui sude0c70a2017-01-09 17:12:17 -08002904 uint8_t *const color_map = xd->plane[0].color_index_map;
Urvang Joshi56ba91b2017-01-10 13:22:09 -08002905 int block_width, block_height, rows, cols;
2906 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
2907 &cols);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002908
2909 assert(cpi->common.allow_screen_content_tools);
2910
Hui Su4d51bed2017-11-29 15:52:40 -08002911 int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002912#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002913 if (cpi->common.use_highbitdepth)
Yaowu Xuf883b422016-08-30 14:01:10 -07002914 colors = av1_count_colors_highbd(src, src_stride, rows, cols,
Hui Su4d51bed2017-11-29 15:52:40 -08002915 cpi->common.bit_depth, count_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002916 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002917#endif // CONFIG_HIGHBITDEPTH
Hui Su4d51bed2017-11-29 15:52:40 -08002918 colors = av1_count_colors(src, src_stride, rows, cols, count_buf);
hui su5db97432016-10-14 16:10:14 -07002919#if CONFIG_FILTER_INTRA
hui sude0c70a2017-01-09 17:12:17 -08002920 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
hui su5db97432016-10-14 16:10:14 -07002921#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07002922
2923 if (colors > 1 && colors <= 64) {
Hui Suc3769e52017-11-16 23:13:42 -08002924 aom_clear_system_state();
Hui Su4d51bed2017-11-29 15:52:40 -08002925 int r, c, i;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002926 const int max_itr = 50;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002927 float *const data = x->palette_buffer->kmeans_data_buf;
2928 float centroids[PALETTE_MAX_SIZE];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002929 float lb, ub, val;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002930#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002931 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
2932 if (cpi->common.use_highbitdepth)
2933 lb = ub = src16[0];
2934 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002935#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002936 lb = ub = src[0];
2937
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002938#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002939 if (cpi->common.use_highbitdepth) {
2940 for (r = 0; r < rows; ++r) {
2941 for (c = 0; c < cols; ++c) {
2942 val = src16[r * src_stride + c];
2943 data[r * cols + c] = val;
2944 if (val < lb)
2945 lb = val;
2946 else if (val > ub)
2947 ub = val;
2948 }
2949 }
2950 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002951#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002952 for (r = 0; r < rows; ++r) {
2953 for (c = 0; c < cols; ++c) {
2954 val = src[r * src_stride + c];
2955 data[r * cols + c] = val;
2956 if (val < lb)
2957 lb = val;
2958 else if (val > ub)
2959 ub = val;
2960 }
2961 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002962#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002963 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002964#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002965
2966 mbmi->mode = DC_PRED;
hui su5db97432016-10-14 16:10:14 -07002967#if CONFIG_FILTER_INTRA
2968 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
2969#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07002970
Hui Su473cf892017-11-08 18:14:31 -08002971 if (rows * cols > MAX_PALETTE_SQUARE) return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002972
hui su33567b22017-04-30 16:40:19 -07002973#if CONFIG_PALETTE_DELTA_ENCODING
hui su33567b22017-04-30 16:40:19 -07002974 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
Hui Su3748bc22017-08-23 11:30:41 -07002975 const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
hui su33567b22017-04-30 16:40:19 -07002976#endif // CONFIG_PALETTE_DELTA_ENCODING
2977
Hui Su4d51bed2017-11-29 15:52:40 -08002978 // Find the dominant colors, stored in top_colors[].
2979 int top_colors[PALETTE_MAX_SIZE] = { 0 };
2980 for (i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i) {
2981 int max_count = 0;
2982 for (int j = 0; j < (1 << cpi->common.bit_depth); ++j) {
2983 if (count_buf[j] > max_count) {
2984 max_count = count_buf[j];
2985 top_colors[i] = j;
2986 }
2987 }
2988 assert(max_count > 0);
2989 count_buf[top_colors[i]] = 0;
2990 }
2991
2992 // Try the dominant colors directly.
2993 // TODO(huisu@google.com): Try to avoid duplicate computation in cases
2994 // where the dominant colors and the k-means results are similar.
2995 for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
2996 for (i = 0; i < n; ++i) centroids[i] = top_colors[i];
2997 palette_rd_y(cpi, x, mbmi, bsize, palette_ctx, dc_mode_cost, data,
2998 centroids, n,
2999#if CONFIG_PALETTE_DELTA_ENCODING
3000 color_cache, n_cache,
3001#endif // CONFIG_PALETTE_DELTA_ENCODING
3002 best_mbmi, best_palette_color_map, best_rd, best_model_rd,
3003 rate, rate_tokenonly, &rate_overhead, distortion, skippable);
3004 }
3005
3006 // K-means clustering.
3007 for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
Urvang Joshi773e3542017-05-05 18:09:42 -07003008 if (colors == PALETTE_MIN_SIZE) {
3009 // Special case: These colors automatically become the centroids.
3010 assert(colors == n);
3011 assert(colors == 2);
3012 centroids[0] = lb;
3013 centroids[1] = ub;
Urvang Joshi773e3542017-05-05 18:09:42 -07003014 } else {
3015 for (i = 0; i < n; ++i) {
3016 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
3017 }
3018 av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
Urvang Joshi5fb95f72017-05-05 17:36:16 -07003019 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003020
Hui Su4d51bed2017-11-29 15:52:40 -08003021 palette_rd_y(cpi, x, mbmi, bsize, palette_ctx, dc_mode_cost, data,
3022 centroids, n,
hui su33567b22017-04-30 16:40:19 -07003023#if CONFIG_PALETTE_DELTA_ENCODING
Hui Su4d51bed2017-11-29 15:52:40 -08003024 color_cache, n_cache,
hui su33567b22017-04-30 16:40:19 -07003025#endif // CONFIG_PALETTE_DELTA_ENCODING
Hui Su4d51bed2017-11-29 15:52:40 -08003026 best_mbmi, best_palette_color_map, best_rd, best_model_rd,
3027 rate, rate_tokenonly, &rate_overhead, distortion, skippable);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003028 }
3029 }
hui sude0c70a2017-01-09 17:12:17 -08003030
3031 if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
3032 memcpy(color_map, best_palette_color_map,
Luc Trudeau0401e892017-08-31 00:37:11 -04003033 block_width * block_height * sizeof(best_palette_color_map[0]));
hui sude0c70a2017-01-09 17:12:17 -08003034 }
3035 *mbmi = *best_mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003036 return rate_overhead;
3037}
3038
hui su5db97432016-10-14 16:10:14 -07003039#if CONFIG_FILTER_INTRA
3040// Return 1 if an filter intra mode is selected; return 0 otherwise.
3041static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3042 int *rate, int *rate_tokenonly,
3043 int64_t *distortion, int *skippable,
3044 BLOCK_SIZE bsize, int mode_cost,
Yue Chenda2eefc2017-11-16 15:25:28 -08003045 int64_t *best_rd, int64_t *best_model_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003046 MACROBLOCKD *const xd = &x->e_mbd;
3047 MODE_INFO *const mic = xd->mi[0];
3048 MB_MODE_INFO *mbmi = &mic->mbmi;
hui su5db97432016-10-14 16:10:14 -07003049 int filter_intra_selected_flag = 0;
hui su5db97432016-10-14 16:10:14 -07003050 FILTER_INTRA_MODE mode;
Yue Chen18f6c152017-11-06 11:23:47 -08003051 TX_SIZE best_tx_size = TX_8X8;
hui su5db97432016-10-14 16:10:14 -07003052 FILTER_INTRA_MODE_INFO filter_intra_mode_info;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003053 TX_TYPE best_tx_type;
3054
hui su5db97432016-10-14 16:10:14 -07003055 av1_zero(filter_intra_mode_info);
3056 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003057 mbmi->mode = DC_PRED;
3058 mbmi->palette_mode_info.palette_size[0] = 0;
3059
3060 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
hui su8f4cc0a2017-01-13 15:14:49 -08003061 int this_rate;
3062 int64_t this_rd, this_model_rd;
3063 RD_STATS tokenonly_rd_stats;
hui su5db97432016-10-14 16:10:14 -07003064 mbmi->filter_intra_mode_info.filter_intra_mode[0] = mode;
hui su8f4cc0a2017-01-13 15:14:49 -08003065 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3066 if (*best_model_rd != INT64_MAX &&
3067 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3068 continue;
3069 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003070 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
3071 if (tokenonly_rd_stats.rate == INT_MAX) continue;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003072 this_rate = tokenonly_rd_stats.rate +
Yue Chen4eba69b2017-11-09 22:37:35 -08003073 x->filter_intra_cost[mbmi->tx_size][1] +
Yue Chen63ce36f2017-10-10 23:37:31 -07003074 x->filter_intra_mode_cost[0][mode] + mode_cost;
Urvang Joshi70006e42017-06-14 16:08:55 -07003075 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003076
3077 if (this_rd < *best_rd) {
3078 *best_rd = this_rd;
3079 best_tx_size = mic->mbmi.tx_size;
hui su5db97432016-10-14 16:10:14 -07003080 filter_intra_mode_info = mbmi->filter_intra_mode_info;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003081 best_tx_type = mic->mbmi.tx_type;
3082 *rate = this_rate;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003083 *rate_tokenonly = tokenonly_rd_stats.rate;
3084 *distortion = tokenonly_rd_stats.dist;
3085 *skippable = tokenonly_rd_stats.skip;
hui su5db97432016-10-14 16:10:14 -07003086 filter_intra_selected_flag = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003087 }
3088 }
3089
hui su5db97432016-10-14 16:10:14 -07003090 if (filter_intra_selected_flag) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003091 mbmi->mode = DC_PRED;
3092 mbmi->tx_size = best_tx_size;
hui su5db97432016-10-14 16:10:14 -07003093 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] =
3094 filter_intra_mode_info.use_filter_intra_mode[0];
3095 mbmi->filter_intra_mode_info.filter_intra_mode[0] =
3096 filter_intra_mode_info.filter_intra_mode[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003097 mbmi->tx_type = best_tx_type;
3098 return 1;
3099 } else {
3100 return 0;
3101 }
3102}
hui su5db97432016-10-14 16:10:14 -07003103#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003104
hui su5db97432016-10-14 16:10:14 -07003105#if CONFIG_EXT_INTRA
hui su45dc5972016-12-08 17:42:50 -08003106// Run RD calculation with given luma intra prediction angle., and return
3107// the RD cost. Update the best mode info. if the RD cost is the best so far.
3108static int64_t calc_rd_given_intra_angle(
3109 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
3110 int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
3111 RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
Angie Chiang53bf1e92017-11-29 16:53:07 -08003112 TX_TYPE *best_tx_type, int64_t *best_rd, int64_t *best_model_rd,
3113 TX_TYPE *best_txk_type) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003114 int this_rate;
3115 RD_STATS tokenonly_rd_stats;
hui su9a416f52017-01-13 11:37:53 -08003116 int64_t this_rd, this_model_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003117 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07003118 assert(!is_inter_block(mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07003119
Angie Chiang53bf1e92017-11-29 16:53:07 -08003120#if !CONFIG_TXK_SEL
3121 (void)best_txk_type;
3122#endif
3123
hui su45dc5972016-12-08 17:42:50 -08003124 mbmi->angle_delta[0] = angle_delta;
hui su9a416f52017-01-13 11:37:53 -08003125 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3126 if (*best_model_rd != INT64_MAX &&
3127 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3128 return INT64_MAX;
3129 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
hui su45dc5972016-12-08 17:42:50 -08003130 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
3131 if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
3132
3133 this_rate = tokenonly_rd_stats.rate + mode_cost +
Joe Young3ca43bf2017-10-06 15:12:46 -07003134#if CONFIG_EXT_INTRA_MOD
3135 x->angle_delta_cost[mbmi->mode - V_PRED]
3136 [max_angle_delta + mbmi->angle_delta[0]];
3137#else
hui su45dc5972016-12-08 17:42:50 -08003138 write_uniform_cost(2 * max_angle_delta + 1,
3139 mbmi->angle_delta[0] + max_angle_delta);
Joe Young3ca43bf2017-10-06 15:12:46 -07003140#endif // CONFIG_EXT_INTRA_MOD
Urvang Joshi70006e42017-06-14 16:08:55 -07003141 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003142
3143 if (this_rd < *best_rd) {
Angie Chiang53bf1e92017-11-29 16:53:07 -08003144#if CONFIG_TXK_SEL
3145 memcpy(best_txk_type, mbmi->txk_type,
3146 sizeof(*best_txk_type) *
3147 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
3148#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003149 *best_rd = this_rd;
3150 *best_angle_delta = mbmi->angle_delta[0];
3151 *best_tx_size = mbmi->tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003152 *best_tx_type = mbmi->tx_type;
3153 *rate = this_rate;
hui su45dc5972016-12-08 17:42:50 -08003154 rd_stats->rate = tokenonly_rd_stats.rate;
3155 rd_stats->dist = tokenonly_rd_stats.dist;
3156 rd_stats->skip = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003157 }
hui su45dc5972016-12-08 17:42:50 -08003158 return this_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003159}
3160
hui su45dc5972016-12-08 17:42:50 -08003161// With given luma directional intra prediction mode, pick the best angle delta
3162// Return the RD cost corresponding to the best angle delta.
Urvang Joshi52648442016-10-13 17:27:51 -07003163static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
hui su45dc5972016-12-08 17:42:50 -08003164 int *rate, RD_STATS *rd_stats,
3165 BLOCK_SIZE bsize, int mode_cost,
hui su9a416f52017-01-13 11:37:53 -08003166 int64_t best_rd,
3167 int64_t *best_model_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003168 MACROBLOCKD *const xd = &x->e_mbd;
3169 MODE_INFO *const mic = xd->mi[0];
3170 MB_MODE_INFO *mbmi = &mic->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07003171 assert(!is_inter_block(mbmi));
hui su45dc5972016-12-08 17:42:50 -08003172 int i, angle_delta, best_angle_delta = 0;
hui su45dc5972016-12-08 17:42:50 -08003173 int first_try = 1;
Debargha Mukherjeedf0e0d72017-04-27 15:16:53 -07003174 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003175 TX_SIZE best_tx_size = mic->mbmi.tx_size;
3176 TX_TYPE best_tx_type = mbmi->tx_type;
Angie Chiang53bf1e92017-11-29 16:53:07 -08003177#if CONFIG_TXK_SEL
3178 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
3179#else
3180 TX_TYPE *best_txk_type = NULL;
3181#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003182
hui su0a6731f2017-04-26 15:23:47 -07003183 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003184
hui su0a6731f2017-04-26 15:23:47 -07003185 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
Hui Su259d4422017-10-13 10:08:17 -07003186 for (i = 0; i < 2; ++i) {
3187 best_rd_in = (best_rd == INT64_MAX)
3188 ? INT64_MAX
3189 : (best_rd + (best_rd >> (first_try ? 3 : 5)));
3190 this_rd = calc_rd_given_intra_angle(
3191 cpi, x, bsize, mode_cost, best_rd_in, (1 - 2 * i) * angle_delta,
3192 MAX_ANGLE_DELTA, rate, rd_stats, &best_angle_delta, &best_tx_size,
Angie Chiang53bf1e92017-11-29 16:53:07 -08003193 &best_tx_type, &best_rd, best_model_rd, best_txk_type);
Hui Su259d4422017-10-13 10:08:17 -07003194 rd_cost[2 * angle_delta + i] = this_rd;
3195 if (first_try && this_rd == INT64_MAX) return best_rd;
3196 first_try = 0;
3197 if (angle_delta == 0) {
3198 rd_cost[1] = this_rd;
3199 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003200 }
3201 }
hui su45dc5972016-12-08 17:42:50 -08003202 }
3203
3204 assert(best_rd != INT64_MAX);
hui su0a6731f2017-04-26 15:23:47 -07003205 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
hui su45dc5972016-12-08 17:42:50 -08003206 int64_t rd_thresh;
Hui Su259d4422017-10-13 10:08:17 -07003207 for (i = 0; i < 2; ++i) {
3208 int skip_search = 0;
3209 rd_thresh = best_rd + (best_rd >> 5);
3210 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
3211 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
3212 skip_search = 1;
3213 if (!skip_search) {
Sebastien Alaiwan7fc6b2a2017-11-02 18:14:50 +01003214 calc_rd_given_intra_angle(
3215 cpi, x, bsize, mode_cost, best_rd, (1 - 2 * i) * angle_delta,
3216 MAX_ANGLE_DELTA, rate, rd_stats, &best_angle_delta, &best_tx_size,
Angie Chiang53bf1e92017-11-29 16:53:07 -08003217 &best_tx_type, &best_rd, best_model_rd, best_txk_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003218 }
3219 }
3220 }
3221
3222 mbmi->tx_size = best_tx_size;
3223 mbmi->angle_delta[0] = best_angle_delta;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003224 mbmi->tx_type = best_tx_type;
Angie Chiang53bf1e92017-11-29 16:53:07 -08003225#if CONFIG_TXK_SEL
3226 memcpy(mbmi->txk_type, best_txk_type,
3227 sizeof(*best_txk_type) *
3228 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
3229#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003230 return best_rd;
3231}
3232
3233// Indices are sign, integer, and fractional part of the gradient value
3234static const uint8_t gradient_to_angle_bin[2][7][16] = {
3235 {
3236 { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
3237 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
3238 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3239 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3240 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3241 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3242 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3243 },
3244 {
3245 { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
3246 { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
3247 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3248 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3249 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3250 { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3251 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3252 },
3253};
3254
Yue Chen56e226e2017-05-02 16:21:40 -07003255/* clang-format off */
Yaowu Xuc27fc142016-08-22 16:08:15 -07003256static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
3257 0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
hui su9cc10652017-04-27 17:22:07 -07003258 0,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003259};
Yue Chen56e226e2017-05-02 16:21:40 -07003260/* clang-format on */
Yaowu Xuc27fc142016-08-22 16:08:15 -07003261
3262static void angle_estimation(const uint8_t *src, int src_stride, int rows,
hui su9cc10652017-04-27 17:22:07 -07003263 int cols, BLOCK_SIZE bsize,
3264 uint8_t *directional_mode_skip_mask) {
3265 memset(directional_mode_skip_mask, 0,
3266 INTRA_MODES * sizeof(*directional_mode_skip_mask));
Joe Young830d4ce2017-05-30 17:48:13 -07003267 // Check if angle_delta is used
3268 if (!av1_use_angle_delta(bsize)) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003269 uint64_t hist[DIRECTIONAL_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003270 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3271 src += src_stride;
hui su9cc10652017-04-27 17:22:07 -07003272 int r, c, dx, dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003273 for (r = 1; r < rows; ++r) {
3274 for (c = 1; c < cols; ++c) {
3275 dx = src[c] - src[c - 1];
3276 dy = src[c] - src[c - src_stride];
hui su9cc10652017-04-27 17:22:07 -07003277 int index;
3278 const int temp = dx * dx + dy * dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003279 if (dy == 0) {
3280 index = 2;
3281 } else {
hui su9cc10652017-04-27 17:22:07 -07003282 const int sn = (dx > 0) ^ (dy > 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003283 dx = abs(dx);
3284 dy = abs(dy);
hui su9cc10652017-04-27 17:22:07 -07003285 const int remd = (dx % dy) * 16 / dy;
3286 const int quot = dx / dy;
Yaowu Xuf883b422016-08-30 14:01:10 -07003287 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003288 }
3289 hist[index] += temp;
3290 }
3291 src += src_stride;
3292 }
3293
hui su9cc10652017-04-27 17:22:07 -07003294 int i;
3295 uint64_t hist_sum = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003296 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3297 for (i = 0; i < INTRA_MODES; ++i) {
hui su9cc10652017-04-27 17:22:07 -07003298 if (av1_is_directional_mode(i, bsize)) {
Urvang Joshida70e7b2016-10-19 11:48:54 -07003299 const uint8_t angle_bin = mode_to_angle_bin[i];
3300 uint64_t score = 2 * hist[angle_bin];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003301 int weight = 2;
Urvang Joshida70e7b2016-10-19 11:48:54 -07003302 if (angle_bin > 0) {
3303 score += hist[angle_bin - 1];
3304 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003305 }
Urvang Joshida70e7b2016-10-19 11:48:54 -07003306 if (angle_bin < DIRECTIONAL_MODES - 1) {
3307 score += hist[angle_bin + 1];
3308 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003309 }
3310 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3311 directional_mode_skip_mask[i] = 1;
3312 }
3313 }
3314}
3315
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003316#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003317static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
hui su9cc10652017-04-27 17:22:07 -07003318 int rows, int cols, BLOCK_SIZE bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003319 uint8_t *directional_mode_skip_mask) {
hui su9cc10652017-04-27 17:22:07 -07003320 memset(directional_mode_skip_mask, 0,
3321 INTRA_MODES * sizeof(*directional_mode_skip_mask));
Joe Young830d4ce2017-05-30 17:48:13 -07003322 // Check if angle_delta is used
3323 if (!av1_use_angle_delta(bsize)) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003324 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
hui su9cc10652017-04-27 17:22:07 -07003325 uint64_t hist[DIRECTIONAL_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003326 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3327 src += src_stride;
hui su9cc10652017-04-27 17:22:07 -07003328 int r, c, dx, dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003329 for (r = 1; r < rows; ++r) {
3330 for (c = 1; c < cols; ++c) {
3331 dx = src[c] - src[c - 1];
3332 dy = src[c] - src[c - src_stride];
hui su9cc10652017-04-27 17:22:07 -07003333 int index;
3334 const int temp = dx * dx + dy * dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003335 if (dy == 0) {
3336 index = 2;
3337 } else {
hui su9cc10652017-04-27 17:22:07 -07003338 const int sn = (dx > 0) ^ (dy > 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003339 dx = abs(dx);
3340 dy = abs(dy);
hui su9cc10652017-04-27 17:22:07 -07003341 const int remd = (dx % dy) * 16 / dy;
3342 const int quot = dx / dy;
Yaowu Xuf883b422016-08-30 14:01:10 -07003343 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003344 }
3345 hist[index] += temp;
3346 }
3347 src += src_stride;
3348 }
3349
hui su9cc10652017-04-27 17:22:07 -07003350 int i;
3351 uint64_t hist_sum = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003352 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3353 for (i = 0; i < INTRA_MODES; ++i) {
hui su9cc10652017-04-27 17:22:07 -07003354 if (av1_is_directional_mode(i, bsize)) {
Urvang Joshida70e7b2016-10-19 11:48:54 -07003355 const uint8_t angle_bin = mode_to_angle_bin[i];
3356 uint64_t score = 2 * hist[angle_bin];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003357 int weight = 2;
Urvang Joshida70e7b2016-10-19 11:48:54 -07003358 if (angle_bin > 0) {
3359 score += hist[angle_bin - 1];
3360 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003361 }
Urvang Joshida70e7b2016-10-19 11:48:54 -07003362 if (angle_bin < DIRECTIONAL_MODES - 1) {
3363 score += hist[angle_bin + 1];
3364 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003365 }
3366 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3367 directional_mode_skip_mask[i] = 1;
3368 }
3369 }
3370}
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003371#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003372#endif // CONFIG_EXT_INTRA
3373
3374// This function is used only for intra_only frames
Urvang Joshi52648442016-10-13 17:27:51 -07003375static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
3376 int *rate, int *rate_tokenonly,
3377 int64_t *distortion, int *skippable,
3378 BLOCK_SIZE bsize, int64_t best_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003379 MACROBLOCKD *const xd = &x->e_mbd;
3380 MODE_INFO *const mic = xd->mi[0];
hui sude0c70a2017-01-09 17:12:17 -08003381 MB_MODE_INFO *const mbmi = &mic->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07003382 assert(!is_inter_block(mbmi));
hui sude0c70a2017-01-09 17:12:17 -08003383 MB_MODE_INFO best_mbmi = *mbmi;
hui su308a6392017-01-12 14:49:57 -08003384 int64_t best_model_rd = INT64_MAX;
hui sude0c70a2017-01-09 17:12:17 -08003385#if CONFIG_EXT_INTRA
Jingning Hanae5cfde2016-11-30 12:01:44 -08003386 const int rows = block_size_high[bsize];
3387 const int cols = block_size_wide[bsize];
hui sude0c70a2017-01-09 17:12:17 -08003388 int is_directional_mode;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003389 uint8_t directional_mode_skip_mask[INTRA_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003390 const int src_stride = x->plane[0].src.stride;
3391 const uint8_t *src = x->plane[0].src.buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003392#endif // CONFIG_EXT_INTRA
hui su5db97432016-10-14 16:10:14 -07003393#if CONFIG_FILTER_INTRA
3394 int beat_best_rd = 0;
hui su5db97432016-10-14 16:10:14 -07003395#endif // CONFIG_FILTER_INTRA
Urvang Joshi52648442016-10-13 17:27:51 -07003396 const int *bmode_costs;
hui sude0c70a2017-01-09 17:12:17 -08003397 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
Urvang Joshi23a61112017-01-30 14:59:27 -08003398 int palette_y_mode_ctx = 0;
Hui Sue87fb232017-10-05 15:00:15 -07003399 const int try_palette =
3400 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
3401 uint8_t *best_palette_color_map =
3402 try_palette ? x->palette_buffer->best_palette_color_map : NULL;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003403 const MODE_INFO *above_mi = xd->above_mi;
3404 const MODE_INFO *left_mi = xd->left_mi;
Yaowu Xuf883b422016-08-30 14:01:10 -07003405 const PREDICTION_MODE A = av1_above_block_mode(mic, above_mi, 0);
3406 const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
Urvang Joshi96d1c0a2017-10-10 13:15:32 -07003407 const PREDICTION_MODE FINAL_MODE_SEARCH = PAETH_PRED + 1;
Jingning Hana45d8422017-10-05 09:57:38 -07003408
3409#if CONFIG_KF_CTX
3410 const int above_ctx = intra_mode_context[A];
3411 const int left_ctx = intra_mode_context[L];
3412 bmode_costs = x->y_mode_costs[above_ctx][left_ctx];
3413#else
Yue Chenb23d00a2017-07-28 17:01:21 -07003414 bmode_costs = x->y_mode_costs[A][L];
Jingning Hana45d8422017-10-05 09:57:38 -07003415#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003416
3417#if CONFIG_EXT_INTRA
hui sude0c70a2017-01-09 17:12:17 -08003418 mbmi->angle_delta[0] = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003419#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003420 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
hui su9cc10652017-04-27 17:22:07 -07003421 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003422 directional_mode_skip_mask);
3423 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003424#endif // CONFIG_HIGHBITDEPTH
hui su9cc10652017-04-27 17:22:07 -07003425 angle_estimation(src, src_stride, rows, cols, bsize,
3426 directional_mode_skip_mask);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003427#endif // CONFIG_EXT_INTRA
hui su5db97432016-10-14 16:10:14 -07003428#if CONFIG_FILTER_INTRA
hui sude0c70a2017-01-09 17:12:17 -08003429 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
hui su5db97432016-10-14 16:10:14 -07003430#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003431 pmi->palette_size[0] = 0;
Hui Sue87fb232017-10-05 15:00:15 -07003432 if (try_palette) {
3433 if (above_mi) {
3434 palette_y_mode_ctx +=
3435 (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
3436 }
3437 if (left_mi) {
3438 palette_y_mode_ctx +=
3439 (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
3440 }
3441 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003442
3443 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
3444 x->use_default_intra_tx_type = 1;
3445 else
3446 x->use_default_intra_tx_type = 0;
3447
3448 /* Y Search for intra prediction mode */
hui su8a516a82017-07-06 10:00:36 -07003449 for (int mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003450 RD_STATS this_rd_stats;
hui su308a6392017-01-12 14:49:57 -08003451 int this_rate, this_rate_tokenonly, s;
3452 int64_t this_distortion, this_rd, this_model_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003453 if (mode_idx == FINAL_MODE_SEARCH) {
3454 if (x->use_default_intra_tx_type == 0) break;
hui sude0c70a2017-01-09 17:12:17 -08003455 mbmi->mode = best_mbmi.mode;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003456 x->use_default_intra_tx_type = 0;
3457 } else {
hui su8a516a82017-07-06 10:00:36 -07003458 assert(mode_idx < INTRA_MODES);
3459 mbmi->mode = intra_rd_search_mode_order[mode_idx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003460 }
Yushin Chod0b77ac2017-10-20 17:33:16 -07003461
Yaowu Xuc27fc142016-08-22 16:08:15 -07003462#if CONFIG_EXT_INTRA
hui su308a6392017-01-12 14:49:57 -08003463 mbmi->angle_delta[0] = 0;
3464#endif // CONFIG_EXT_INTRA
hui su9a416f52017-01-13 11:37:53 -08003465 this_model_rd = intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode]);
hui su308a6392017-01-12 14:49:57 -08003466 if (best_model_rd != INT64_MAX &&
3467 this_model_rd > best_model_rd + (best_model_rd >> 1))
3468 continue;
3469 if (this_model_rd < best_model_rd) best_model_rd = this_model_rd;
3470#if CONFIG_EXT_INTRA
hui sude0c70a2017-01-09 17:12:17 -08003471 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
3472 if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
Joe Young830d4ce2017-05-30 17:48:13 -07003473 if (is_directional_mode && av1_use_angle_delta(bsize)) {
hui su45dc5972016-12-08 17:42:50 -08003474 this_rd_stats.rate = INT_MAX;
Yue Chenb0f808b2017-04-26 11:55:14 -07003475 rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
3476 bmode_costs[mbmi->mode], best_rd, &best_model_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003477 } else {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003478 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003479 }
3480#else
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003481 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
hui su45dc5972016-12-08 17:42:50 -08003482#endif // CONFIG_EXT_INTRA
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003483 this_rate_tokenonly = this_rd_stats.rate;
3484 this_distortion = this_rd_stats.dist;
3485 s = this_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003486
3487 if (this_rate_tokenonly == INT_MAX) continue;
3488
hui sude0c70a2017-01-09 17:12:17 -08003489 this_rate = this_rate_tokenonly + bmode_costs[mbmi->mode];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003490
Rupert Swarbrickfcff0b22017-10-05 09:26:04 +01003491 if (!xd->lossless[mbmi->segment_id] &&
3492 block_signals_txsize(mbmi->sb_type)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003493 // super_block_yrd above includes the cost of the tx_size in the
3494 // tokenonly rate, but for intra blocks, tx_size is always coded
3495 // (prediction granularity), so we account for it in the full rate,
3496 // not the tokenonly rate.
Yue Chen3dd03e32017-10-17 15:39:52 -07003497 this_rate_tokenonly -=
3498 tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003499 }
hui su9bc1d8d2017-03-24 12:36:03 -07003500 if (try_palette && mbmi->mode == DC_PRED) {
Urvang Joshi23a61112017-01-30 14:59:27 -08003501 this_rate +=
Yue Chendab2ca92017-10-16 17:48:48 -07003502 x->palette_y_mode_cost[bsize - BLOCK_8X8][palette_y_mode_ctx][0];
hui su9bc1d8d2017-03-24 12:36:03 -07003503 }
hui su5db97432016-10-14 16:10:14 -07003504#if CONFIG_FILTER_INTRA
Yue Chen95e13e22017-11-01 23:56:35 -07003505 if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_txsize(mbmi->tx_size))
Yue Chen4eba69b2017-11-09 22:37:35 -08003506 this_rate += x->filter_intra_cost[mbmi->tx_size][0];
hui su5db97432016-10-14 16:10:14 -07003507#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003508#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003509 if (is_directional_mode) {
Joe Young830d4ce2017-05-30 17:48:13 -07003510 if (av1_use_angle_delta(bsize)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07003511#if CONFIG_EXT_INTRA_MOD
3512 this_rate +=
3513 x->angle_delta_cost[mbmi->mode - V_PRED]
3514 [MAX_ANGLE_DELTA + mbmi->angle_delta[0]];
3515#else
Joe Young830d4ce2017-05-30 17:48:13 -07003516 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
3517 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
Joe Young3ca43bf2017-10-06 15:12:46 -07003518#endif // CONFIG_EXT_INTRA_MOD
Joe Young830d4ce2017-05-30 17:48:13 -07003519 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003520 }
3521#endif // CONFIG_EXT_INTRA
Hui Su8dc00922017-09-14 16:15:55 -07003522#if CONFIG_INTRABC
3523 if (bsize >= BLOCK_8X8 && cpi->common.allow_screen_content_tools)
3524 this_rate += x->intrabc_cost[0];
3525#endif // CONFIG_INTRABC
Urvang Joshi70006e42017-06-14 16:08:55 -07003526 this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003527
3528 if (this_rd < best_rd) {
hui sude0c70a2017-01-09 17:12:17 -08003529 best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003530 best_rd = this_rd;
hui su5db97432016-10-14 16:10:14 -07003531#if CONFIG_FILTER_INTRA
3532 beat_best_rd = 1;
3533#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003534 *rate = this_rate;
3535 *rate_tokenonly = this_rate_tokenonly;
3536 *distortion = this_distortion;
3537 *skippable = s;
3538 }
3539 }
3540
hui su9bc1d8d2017-03-24 12:36:03 -07003541 if (try_palette) {
Urvang Joshi23a61112017-01-30 14:59:27 -08003542 rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
3543 bmode_costs[DC_PRED], &best_mbmi,
3544 best_palette_color_map, &best_rd, &best_model_rd,
3545 rate, rate_tokenonly, distortion, skippable);
hui sude0c70a2017-01-09 17:12:17 -08003546 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003547
hui su5db97432016-10-14 16:10:14 -07003548#if CONFIG_FILTER_INTRA
Yue Chen18f6c152017-11-06 11:23:47 -08003549 if (beat_best_rd && av1_filter_intra_allowed_bsize(bsize) &&
3550 !xd->lossless[mbmi->segment_id]) {
hui su5db97432016-10-14 16:10:14 -07003551 if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
3552 skippable, bsize, bmode_costs[DC_PRED],
Yue Chenda2eefc2017-11-16 15:25:28 -08003553 &best_rd, &best_model_rd)) {
hui sude0c70a2017-01-09 17:12:17 -08003554 best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003555 }
3556 }
hui su5db97432016-10-14 16:10:14 -07003557#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003558
hui sude0c70a2017-01-09 17:12:17 -08003559 *mbmi = best_mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003560 return best_rd;
3561}
3562
Yue Chena1e48dc2016-08-29 17:29:33 -07003563// Return value 0: early termination triggered, no valid rd cost available;
3564// 1: rd cost values are valid.
Angie Chiang284d7772016-11-08 11:06:45 -08003565static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
3566 RD_STATS *rd_stats, BLOCK_SIZE bsize,
3567 int64_t ref_best_rd) {
Yue Chena1e48dc2016-08-29 17:29:33 -07003568 MACROBLOCKD *const xd = &x->e_mbd;
3569 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Luc Trudeau52301a22017-11-29 11:02:55 -05003570 struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_U];
3571 const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, pd);
Yue Chena1e48dc2016-08-29 17:29:33 -07003572 int plane;
Yue Chena1e48dc2016-08-29 17:29:33 -07003573 int is_cost_valid = 1;
Angie Chiang284d7772016-11-08 11:06:45 -08003574 av1_init_rd_stats(rd_stats);
Yue Chena1e48dc2016-08-29 17:29:33 -07003575
3576 if (ref_best_rd < 0) is_cost_valid = 0;
Jingning Han9ce464c2017-02-20 15:36:30 -08003577
Jingning Han9ce464c2017-02-20 15:36:30 -08003578 if (x->skip_chroma_rd) return is_cost_valid;
Jingning Han2d2dac22017-04-11 09:41:10 -07003579
Luc Trudeau52301a22017-11-29 11:02:55 -05003580 bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
Jingning Han9ce464c2017-02-20 15:36:30 -08003581
Yue Chena1e48dc2016-08-29 17:29:33 -07003582 if (is_inter_block(mbmi) && is_cost_valid) {
Yue Chena1e48dc2016-08-29 17:29:33 -07003583 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
3584 av1_subtract_plane(x, bsize, plane);
3585 }
Yue Chena1e48dc2016-08-29 17:29:33 -07003586
Yushin Cho09de28b2016-06-21 14:51:23 -07003587 if (is_cost_valid) {
3588 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08003589 RD_STATS pn_rd_stats;
3590 txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize,
3591 uv_tx_size, cpi->sf.use_fast_coef_costing);
3592 if (pn_rd_stats.rate == INT_MAX) {
Yushin Cho09de28b2016-06-21 14:51:23 -07003593 is_cost_valid = 0;
3594 break;
3595 }
Angie Chiang284d7772016-11-08 11:06:45 -08003596 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Urvang Joshi70006e42017-06-14 16:08:55 -07003597 if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd &&
3598 RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) {
Yushin Cho09de28b2016-06-21 14:51:23 -07003599 is_cost_valid = 0;
3600 break;
3601 }
Yue Chena1e48dc2016-08-29 17:29:33 -07003602 }
3603 }
3604
3605 if (!is_cost_valid) {
3606 // reset cost value
Angie Chiang284d7772016-11-08 11:06:45 -08003607 av1_invalid_rd_stats(rd_stats);
Yue Chena1e48dc2016-08-29 17:29:33 -07003608 }
3609
3610 return is_cost_valid;
3611}
3612
Yaowu Xuf883b422016-08-30 14:01:10 -07003613void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
3614 int blk_row, int blk_col, int plane, int block,
Angie Chiang77368af2017-03-23 16:22:07 -07003615 int plane_bsize, const ENTROPY_CONTEXT *a,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003616 const ENTROPY_CONTEXT *l, RD_STATS *rd_stats, int fast,
3617 TX_SIZE_RD_INFO *rd_info_array) {
Angie Chiang22ba7512016-10-20 17:10:33 -07003618 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003619 MACROBLOCKD *xd = &x->e_mbd;
3620 const struct macroblock_plane *const p = &x->plane[plane];
3621 struct macroblockd_plane *const pd = &xd->plane[plane];
Yaowu Xu5f2749b2017-11-02 09:12:10 -07003622#if DISABLE_TRELLISQ_SEARCH
3623 (void)fast;
3624#endif
Jingning Han243b66b2017-06-23 12:11:47 -07003625#if CONFIG_TXK_SEL
3626 av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
3627 tx_size, a, l, 0, rd_stats);
3628 return;
3629#endif
Debargha Mukherjeed2cfbef2017-12-03 16:15:27 -08003630 // This function is used only for inter
3631 assert(is_inter_block(&xd->mi[0]->mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07003632 int64_t tmp;
3633 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Luc Trudeau005feb62017-02-22 13:34:01 -05003634 PLANE_TYPE plane_type = get_plane_type(plane);
Jingning Han19b5c8f2017-07-06 15:10:12 -07003635 TX_TYPE tx_type =
3636 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
Urvang Joshi03f6fdc2016-10-14 15:53:39 -07003637 const SCAN_ORDER *const scan_order =
Angie Chiangbd99b382017-06-20 15:11:16 -07003638 get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003639 BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
Jingning Han9fdc4222016-10-27 21:32:19 -07003640 int bh = block_size_high[txm_bsize];
3641 int bw = block_size_wide[txm_bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003642 int src_stride = p->src.stride;
Jingning Han9ca05b72017-01-03 14:41:36 -08003643 uint8_t *src =
3644 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
3645 uint8_t *dst =
3646 &pd->dst
3647 .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003648#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003649 DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
3650 uint8_t *rec_buffer;
3651#else
3652 DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003653#endif // CONFIG_HIGHBITDEPTH
Timothy B. Terriberryd62e2a32017-06-10 16:04:21 -07003654 const int diff_stride = block_size_wide[plane_bsize];
Jingning Han9ca05b72017-01-03 14:41:36 -08003655 const int16_t *diff =
3656 &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
Angie Chiangd81fdb42016-11-03 12:20:58 -07003657 int txb_coeff_cost;
Jingning Hand3fada82016-11-22 10:46:55 -08003658
3659 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003660
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003661 const int coeff_ctx = get_entropy_context(tx_size, a, l);
3662 const int coeff_ctx_one_byte = combine_entropy_contexts(*a, *l);
3663 const uint8_t cur_joint_ctx = (coeff_ctx << 2) + coeff_ctx_one_byte;
Angie Chiang77368af2017-03-23 16:22:07 -07003664
Monty Montgomery4a05a582017-11-01 21:21:07 -04003665 // Note: tmp below is pixel distortion, not TX domain
Jingning Han1a7f0a82017-07-27 09:48:05 -07003666 tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
3667 plane_bsize, txm_bsize);
3668
3669#if CONFIG_HIGHBITDEPTH
3670 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3671 tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
3672#endif // CONFIG_HIGHBITDEPTH
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003673
Jingning Han1a7f0a82017-07-27 09:48:05 -07003674 rd_stats->sse += tmp << 4;
3675
3676 if (rd_stats->invalid_rate) {
3677 rd_stats->dist += tmp << 4;
3678 rd_stats->rate += rd_stats->zero_rate;
3679 rd_stats->skip = 1;
3680 return;
3681 }
3682
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003683 // Look up RD and terminate early in case when we've already processed exactly
3684 // the same residual with exactly the same entropy context.
3685 if (rd_info_array != NULL && rd_info_array[tx_type].valid &&
3686 rd_info_array[tx_type].entropy_context == cur_joint_ctx &&
3687 rd_info_array[tx_type].fast == fast) {
3688 rd_stats->dist += rd_info_array[tx_type].dist;
3689 rd_stats->rate += rd_info_array[tx_type].rate;
Hui Su8c2b9132017-12-09 10:40:15 -08003690 rd_stats->skip &= rd_info_array[tx_type].eob == 0;
3691 p->eobs[block] = rd_info_array[tx_type].eob;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003692 return;
3693 }
3694
3695 int64_t cur_dist = 0;
3696 int cur_rate = 0;
3697 uint8_t cur_skip = 1;
3698
Angie Chiang808d8592017-04-06 18:36:55 -07003699// TODO(any): Use av1_dist_block to compute distortion
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003700#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003701 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3702 rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
Yaowu Xuf883b422016-08-30 14:01:10 -07003703 aom_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL,
Jingning Han9fdc4222016-10-27 21:32:19 -07003704 0, NULL, 0, bw, bh, xd->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003705 } else {
3706 rec_buffer = (uint8_t *)rec_buffer16;
Yaowu Xuf883b422016-08-30 14:01:10 -07003707 aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0,
Jingning Han9fdc4222016-10-27 21:32:19 -07003708 NULL, 0, bw, bh);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003709 }
3710#else
Yaowu Xuf883b422016-08-30 14:01:10 -07003711 aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL,
Jingning Han9fdc4222016-10-27 21:32:19 -07003712 0, bw, bh);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003713#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003714
Jingning Han3bce7542017-07-25 10:53:57 -07003715#if DISABLE_TRELLISQ_SEARCH
3716 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08003717 AV1_XFORM_QUANT_B);
Jingning Han3bce7542017-07-25 10:53:57 -07003718
3719#else
3720 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08003721 AV1_XFORM_QUANT_FP);
Jingning Han3bce7542017-07-25 10:53:57 -07003722
Monty Montgomery4a05a582017-11-01 21:21:07 -04003723// TX-domain results need to shift down to Q2/D10 to match pixel
3724// domain distortion values which are in Q2^2
3725#if CONFIG_DAALA_TX
3726 const int shift = (TX_COEFF_DEPTH - 10) * 2;
3727#else
Jingning Han3bce7542017-07-25 10:53:57 -07003728 const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
Monty Montgomery4a05a582017-11-01 21:21:07 -04003729#endif
Jingning Han3bce7542017-07-25 10:53:57 -07003730 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Urvang Joshi80893152017-10-27 11:51:14 -07003731 const int buffer_length = av1_get_max_eob(tx_size);
Jingning Han1a7f0a82017-07-27 09:48:05 -07003732 int64_t tmp_dist, tmp_sse;
Yunqing Wang24d2d5d2017-09-20 09:45:13 -07003733#if CONFIG_DIST_8X8
Yushin Chof9970a52017-10-13 12:57:13 -07003734 int blk_w = block_size_wide[plane_bsize];
3735 int blk_h = block_size_high[plane_bsize];
Yushin Choc00769a2017-09-14 14:44:30 -07003736 int disable_early_skip =
Yushin Chof9970a52017-10-13 12:57:13 -07003737 x->using_dist_8x8 && plane == 0 && blk_w >= 8 && blk_h >= 8 &&
Yushin Choc00769a2017-09-14 14:44:30 -07003738 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
3739 x->tune_metric != AOM_TUNE_PSNR;
Yunqing Wang24d2d5d2017-09-20 09:45:13 -07003740#endif // CONFIG_DIST_8X8
3741
Monty Montgomerya26262c2017-10-31 07:32:13 -04003742#if CONFIG_DAALA_TX
3743 tmp_dist =
3744 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp_sse, xd->bd);
3745#else
Jingning Han3bce7542017-07-25 10:53:57 -07003746#if CONFIG_HIGHBITDEPTH
3747 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07003748 tmp_dist =
3749 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp_sse, xd->bd);
Jingning Han3bce7542017-07-25 10:53:57 -07003750 else
3751#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07003752 tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp_sse);
Monty Montgomerya26262c2017-10-31 07:32:13 -04003753#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07003754
3755 tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
Jingning Han3bce7542017-07-25 10:53:57 -07003756
Yushin Choa4817a62017-07-27 13:09:43 -07003757 if (
3758#if CONFIG_DIST_8X8
Yushin Choc00769a2017-09-14 14:44:30 -07003759 disable_early_skip ||
Yushin Choa4817a62017-07-27 13:09:43 -07003760#endif
3761 RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
Jingning Han3bce7542017-07-25 10:53:57 -07003762 av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
Debargha Mukherjee51666862017-10-24 14:29:13 -07003763 a, l, fast);
Jingning Han1a7f0a82017-07-27 09:48:05 -07003764 } else {
3765 rd_stats->rate += rd_stats->zero_rate;
Yushin Cho952eae22017-10-03 16:21:06 -07003766 rd_stats->dist += tmp << 4;
Jingning Han1a7f0a82017-07-27 09:48:05 -07003767 rd_stats->skip = 1;
3768 rd_stats->invalid_rate = 1;
3769 return;
Jingning Han3bce7542017-07-25 10:53:57 -07003770 }
3771#endif // DISABLE_TRELLISQ_SEARCH
3772
Angie Chiang41fffae2017-04-03 10:33:18 -07003773 const int eob = p->eobs[block];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003774
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01003775 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, rec_buffer,
3776 MAX_TX_SIZE, eob, cm->reduced_tx_set_used);
Angie Chiang41fffae2017-04-03 10:33:18 -07003777 if (eob > 0) {
Yushin Chob7b60c52017-07-14 16:18:52 -07003778#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07003779 if (x->using_dist_8x8 && plane == 0 && (bw < 8 && bh < 8)) {
Yushin Cho04749122017-05-25 14:19:07 -07003780 // Save sub8x8 luma decoded pixels
3781 // since 8x8 luma decoded pixels are not available for daala-dist
3782 // after recursive split of BLOCK_8x8 is done.
3783 const int pred_stride = block_size_wide[plane_bsize];
3784 const int pred_idx = (blk_row * pred_stride + blk_col)
3785 << tx_size_wide_log2[0];
3786 int16_t *decoded = &pd->pred[pred_idx];
3787 int i, j;
3788
Yushin Cho8ab875d2017-06-23 14:47:21 -07003789#if CONFIG_HIGHBITDEPTH
3790 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3791 for (j = 0; j < bh; j++)
3792 for (i = 0; i < bw; i++)
3793 decoded[j * pred_stride + i] =
3794 CONVERT_TO_SHORTPTR(rec_buffer)[j * MAX_TX_SIZE + i];
3795 } else {
3796#endif
3797 for (j = 0; j < bh; j++)
3798 for (i = 0; i < bw; i++)
3799 decoded[j * pred_stride + i] = rec_buffer[j * MAX_TX_SIZE + i];
3800#if CONFIG_HIGHBITDEPTH
3801 }
3802#endif // CONFIG_HIGHBITDEPTH
Yushin Cho04749122017-05-25 14:19:07 -07003803 }
Yushin Chob7b60c52017-07-14 16:18:52 -07003804#endif // CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07003805 tmp = pixel_dist(cpi, x, plane, src, src_stride, rec_buffer, MAX_TX_SIZE,
3806 blk_row, blk_col, plane_bsize, txm_bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003807 }
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003808 cur_dist = tmp * 16;
Jingning Han7eab9ff2017-07-06 10:12:54 -07003809 txb_coeff_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block,
3810 tx_size, scan_order, a, l, 0);
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003811 cur_rate = txb_coeff_cost;
3812 cur_skip = (eob == 0);
3813
3814 // Save RD results for possible reuse in future.
3815 if (rd_info_array != NULL) {
3816 rd_info_array[tx_type].valid = 1;
3817 rd_info_array[tx_type].entropy_context = cur_joint_ctx;
3818 rd_info_array[tx_type].fast = fast;
3819 rd_info_array[tx_type].dist = cur_dist;
3820 rd_info_array[tx_type].rate = cur_rate;
Hui Su8c2b9132017-12-09 10:40:15 -08003821 rd_info_array[tx_type].eob = eob;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003822 }
3823
3824 rd_stats->dist += cur_dist;
3825 rd_stats->rate += cur_rate;
3826 rd_stats->skip &= cur_skip;
Jingning Han63cbf342016-11-09 15:37:48 -08003827
Angie Chiangd81fdb42016-11-03 12:20:58 -07003828#if CONFIG_RD_DEBUG
Angie Chiange94556b2016-11-09 10:59:30 -08003829 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
3830 txb_coeff_cost);
Fergus Simpson4063a682017-02-28 16:52:22 -08003831#endif // CONFIG_RD_DEBUG
Yaowu Xuc27fc142016-08-22 16:08:15 -07003832}
3833
Yaowu Xuf883b422016-08-30 14:01:10 -07003834static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
Debargha Mukherjee9245d892017-10-06 13:54:31 -07003835 int blk_col, int plane, int block, TX_SIZE tx_size,
3836 int depth, BLOCK_SIZE plane_bsize,
Jingning Han94d5bfc2016-10-21 10:14:36 -07003837 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
3838 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
Angie Chiangb5dda482016-11-02 16:19:58 -07003839 RD_STATS *rd_stats, int64_t ref_best_rd,
Alexander Bokov79a37242017-09-29 11:25:55 -07003840 int *is_cost_valid, int fast,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003841 int tx_split_prune_flag,
3842 TX_SIZE_RD_INFO_NODE *rd_info_node) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003843 MACROBLOCKD *const xd = &x->e_mbd;
3844 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3845 struct macroblock_plane *const p = &x->plane[plane];
3846 struct macroblockd_plane *const pd = &xd->plane[plane];
3847 const int tx_row = blk_row >> (1 - pd->subsampling_y);
3848 const int tx_col = blk_col >> (1 - pd->subsampling_x);
clang-format67948d32016-09-07 22:40:40 -07003849 TX_SIZE(*const inter_tx_size)
Yaowu Xuc27fc142016-08-22 16:08:15 -07003850 [MAX_MIB_SIZE] =
3851 (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
Jingning Hanf65b8702016-10-31 12:13:20 -07003852 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
3853 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
Jingning Han58224042016-10-27 16:35:32 -07003854 const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003855 int64_t this_rd = INT64_MAX;
3856 ENTROPY_CONTEXT *pta = ta + blk_col;
3857 ENTROPY_CONTEXT *ptl = tl + blk_row;
Jingning Han331662e2017-05-30 17:03:32 -07003858 int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
3859 mbmi->sb_type, tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003860 int64_t sum_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003861 int tmp_eob = 0;
3862 int zero_blk_rate;
Angie Chiangd7246172016-11-03 11:49:15 -07003863 RD_STATS sum_rd_stats;
Jingning Hane3b81bc2017-06-23 11:43:52 -07003864#if CONFIG_TXK_SEL
3865 TX_TYPE best_tx_type = TX_TYPES;
Angie Chiangbce07f12017-12-01 16:34:31 -08003866 int txk_idx = (blk_row << MAX_MIB_SIZE_LOG2) + blk_col;
Jingning Hane3b81bc2017-06-23 11:43:52 -07003867#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003868
Jingning Han63cbf342016-11-09 15:37:48 -08003869 av1_init_rd_stats(&sum_rd_stats);
Jingning Hanfe45b212016-11-22 10:30:23 -08003870
Jingning Hand3fada82016-11-22 10:46:55 -08003871 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003872
3873 if (ref_best_rd < 0) {
3874 *is_cost_valid = 0;
3875 return;
3876 }
3877
Angie Chiangc0feea82016-11-03 15:36:18 -07003878 av1_init_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003879
3880 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
3881
Jingning Han5a995d72017-07-02 15:20:54 -07003882#if CONFIG_LV_MAP
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08003883 TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
Jingning Han5a995d72017-07-02 15:20:54 -07003884 TXB_CTX txb_ctx;
3885 get_txb_ctx(plane_bsize, tx_size, plane, pta, ptl, &txb_ctx);
Jingning Hane9814912017-08-31 16:38:59 -07003886
Jingning Hane9814912017-08-31 16:38:59 -07003887 zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(plane)]
3888 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
3889#else
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08003890 TX_SIZE tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Jingning Han5a995d72017-07-02 15:20:54 -07003891 int coeff_ctx = get_entropy_context(tx_size, pta, ptl);
hui suc0cf71d2017-07-20 16:38:50 -07003892 zero_blk_rate =
3893 x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
Jingning Han5a995d72017-07-02 15:20:54 -07003894#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003895
Jingning Han3bce7542017-07-25 10:53:57 -07003896 rd_stats->ref_rdcost = ref_best_rd;
3897 rd_stats->zero_rate = zero_blk_rate;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003898 if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
3899 inter_tx_size[0][0] = tx_size;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003900 av1_tx_block_rd_b(
3901 cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, pta, ptl,
3902 rd_stats, fast,
3903 rd_info_node != NULL ? rd_info_node->rd_info_array : NULL);
Sarah Parkerde6f0722017-08-07 20:23:46 -07003904 if (rd_stats->rate == INT_MAX) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003905
Urvang Joshi70006e42017-06-14 16:08:55 -07003906 if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
3907 RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
Angie Chiangb5dda482016-11-02 16:19:58 -07003908 rd_stats->skip == 1) &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07003909 !xd->lossless[mbmi->segment_id]) {
Jingning Hanc7ea7612017-01-11 15:01:30 -08003910#if CONFIG_RD_DEBUG
3911 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
3912 zero_blk_rate - rd_stats->rate);
Fergus Simpson4063a682017-02-28 16:52:22 -08003913#endif // CONFIG_RD_DEBUG
Angie Chiangb5dda482016-11-02 16:19:58 -07003914 rd_stats->rate = zero_blk_rate;
3915 rd_stats->dist = rd_stats->sse;
3916 rd_stats->skip = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003917 x->blk_skip[plane][blk_row * bw + blk_col] = 1;
3918 p->eobs[block] = 0;
Jingning Han19b5c8f2017-07-06 15:10:12 -07003919#if CONFIG_TXK_SEL
3920 mbmi->txk_type[txk_idx] = DCT_DCT;
3921#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003922 } else {
3923 x->blk_skip[plane][blk_row * bw + blk_col] = 0;
Angie Chiangb5dda482016-11-02 16:19:58 -07003924 rd_stats->skip = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003925 }
3926
Jingning Han571189c2016-10-24 10:38:43 -07003927 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
Yue Chen171c17d2017-10-16 18:08:22 -07003928 rd_stats->rate += x->txfm_partition_cost[ctx][0];
Urvang Joshi70006e42017-06-14 16:08:55 -07003929 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
Jingning Han2f42d772017-07-05 16:28:18 -07003930#if CONFIG_LV_MAP
3931 tmp_eob = p->txb_entropy_ctx[block];
3932#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07003933 tmp_eob = p->eobs[block];
Jingning Han2f42d772017-07-05 16:28:18 -07003934#endif
3935
Jingning Hane3b81bc2017-06-23 11:43:52 -07003936#if CONFIG_TXK_SEL
3937 best_tx_type = mbmi->txk_type[txk_idx];
3938#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003939 }
3940
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01003941 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && tx_split_prune_flag == 0) {
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08003942 const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
Yue Chen0797a202017-10-27 17:24:56 -07003943 const int bsw = tx_size_wide_unit[sub_txs];
3944 const int bsh = tx_size_high_unit[sub_txs];
3945 int sub_step = bsw * bsh;
Angie Chiangb5dda482016-11-02 16:19:58 -07003946 RD_STATS this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003947 int this_cost_valid = 1;
3948 int64_t tmp_rd = 0;
Yushin Chob7b60c52017-07-14 16:18:52 -07003949#if CONFIG_DIST_8X8
Yue Chen0797a202017-10-27 17:24:56 -07003950 int sub8x8_eob[4] = { 0, 0, 0, 0 };
Yushin Cho04749122017-05-25 14:19:07 -07003951#endif
Yue Chen171c17d2017-10-16 18:08:22 -07003952 sum_rd_stats.rate = x->txfm_partition_cost[ctx][1];
Jingning Hand3fada82016-11-22 10:46:55 -08003953
3954 assert(tx_size < TX_SIZES_ALL);
3955
Jingning Han16a9df72017-07-26 15:27:43 -07003956 ref_best_rd = AOMMIN(this_rd, ref_best_rd);
3957
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003958 int blk_idx = 0;
Yue Chen0797a202017-10-27 17:24:56 -07003959 for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) {
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003960 for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw, ++blk_idx) {
3961 const int offsetr = blk_row + r;
3962 const int offsetc = blk_col + c;
Yue Chen0797a202017-10-27 17:24:56 -07003963 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003964 assert(blk_idx < 4);
3965 select_tx_block(
3966 cpi, x, offsetr, offsetc, plane, block, sub_txs, depth + 1,
3967 plane_bsize, ta, tl, tx_above, tx_left, &this_rd_stats,
3968 ref_best_rd - tmp_rd, &this_cost_valid, fast, 0,
3969 (rd_info_node != NULL) ? rd_info_node->children[blk_idx] : NULL);
Jingning Han98d6a1f2016-11-03 12:47:47 -07003970
Yushin Chob7b60c52017-07-14 16:18:52 -07003971#if CONFIG_DIST_8X8
Yue Chen0797a202017-10-27 17:24:56 -07003972 if (!x->using_dist_8x8)
Yushin Cho04749122017-05-25 14:19:07 -07003973#endif
Yue Chen0797a202017-10-27 17:24:56 -07003974 if (!this_cost_valid) break;
3975#if CONFIG_DIST_8X8
3976 if (x->using_dist_8x8 && plane == 0 && tx_size == TX_8X8) {
3977 sub8x8_eob[2 * (r / bsh) + (c / bsw)] = p->eobs[block];
3978 }
3979#endif // CONFIG_DIST_8X8
3980 av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
3981
3982 tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
3983#if CONFIG_DIST_8X8
3984 if (!x->using_dist_8x8)
3985#endif
3986 if (this_rd < tmp_rd) break;
3987 block += sub_step;
3988 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003989 }
Yushin Chob7b60c52017-07-14 16:18:52 -07003990#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07003991 if (x->using_dist_8x8 && this_cost_valid && plane == 0 &&
3992 tx_size == TX_8X8) {
Yushin Cho04749122017-05-25 14:19:07 -07003993 const int src_stride = p->src.stride;
3994 const int dst_stride = pd->dst.stride;
3995
3996 const uint8_t *src =
3997 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
3998 const uint8_t *dst =
3999 &pd->dst
4000 .buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
4001
Yushin Chob7b60c52017-07-14 16:18:52 -07004002 int64_t dist_8x8;
Yushin Cho04749122017-05-25 14:19:07 -07004003 int qindex = x->qindex;
4004 const int pred_stride = block_size_wide[plane_bsize];
4005 const int pred_idx = (blk_row * pred_stride + blk_col)
4006 << tx_size_wide_log2[0];
4007 int16_t *pred = &pd->pred[pred_idx];
Yue Chen0797a202017-10-27 17:24:56 -07004008 int i, j;
Yushin Cho04749122017-05-25 14:19:07 -07004009 int row, col;
4010
Yushin Cho8ab875d2017-06-23 14:47:21 -07004011#if CONFIG_HIGHBITDEPTH
4012 uint8_t *pred8;
4013 DECLARE_ALIGNED(16, uint16_t, pred8_16[8 * 8]);
4014#else
Yushin Cho04749122017-05-25 14:19:07 -07004015 DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
Yushin Cho8ab875d2017-06-23 14:47:21 -07004016#endif // CONFIG_HIGHBITDEPTH
Yushin Cho04749122017-05-25 14:19:07 -07004017
Yushin Choe30a47c2017-08-15 13:08:30 -07004018 dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
Yushin Chob7b60c52017-07-14 16:18:52 -07004019 BLOCK_8X8, 8, 8, 8, 8, qindex) *
4020 16;
Yushin Chof986af12017-11-20 15:22:43 -08004021 if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
4022 assert(sum_rd_stats.sse == dist_8x8);
Yushin Chob7b60c52017-07-14 16:18:52 -07004023 sum_rd_stats.sse = dist_8x8;
Yushin Cho04749122017-05-25 14:19:07 -07004024
Yushin Cho8ab875d2017-06-23 14:47:21 -07004025#if CONFIG_HIGHBITDEPTH
4026 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
4027 pred8 = CONVERT_TO_BYTEPTR(pred8_16);
4028 else
4029 pred8 = (uint8_t *)pred8_16;
4030#endif
Yushin Cho04749122017-05-25 14:19:07 -07004031
Yushin Cho8ab875d2017-06-23 14:47:21 -07004032#if CONFIG_HIGHBITDEPTH
4033 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4034 for (row = 0; row < 2; ++row) {
4035 for (col = 0; col < 2; ++col) {
4036 int idx = row * 2 + col;
4037 int eob = sub8x8_eob[idx];
4038
4039 if (eob > 0) {
4040 for (j = 0; j < 4; j++)
4041 for (i = 0; i < 4; i++)
4042 CONVERT_TO_SHORTPTR(pred8)
4043 [(row * 4 + j) * 8 + 4 * col + i] =
4044 pred[(row * 4 + j) * pred_stride + 4 * col + i];
4045 } else {
4046 for (j = 0; j < 4; j++)
4047 for (i = 0; i < 4; i++)
4048 CONVERT_TO_SHORTPTR(pred8)
4049 [(row * 4 + j) * 8 + 4 * col + i] = CONVERT_TO_SHORTPTR(
4050 dst)[(row * 4 + j) * dst_stride + 4 * col + i];
4051 }
Yushin Cho04749122017-05-25 14:19:07 -07004052 }
4053 }
Yushin Cho8ab875d2017-06-23 14:47:21 -07004054 } else {
4055#endif
4056 for (row = 0; row < 2; ++row) {
4057 for (col = 0; col < 2; ++col) {
4058 int idx = row * 2 + col;
4059 int eob = sub8x8_eob[idx];
4060
4061 if (eob > 0) {
4062 for (j = 0; j < 4; j++)
4063 for (i = 0; i < 4; i++)
4064 pred8[(row * 4 + j) * 8 + 4 * col + i] =
Yaowu Xu7a471702017-09-29 08:38:37 -07004065 (uint8_t)pred[(row * 4 + j) * pred_stride + 4 * col + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -07004066 } else {
4067 for (j = 0; j < 4; j++)
4068 for (i = 0; i < 4; i++)
4069 pred8[(row * 4 + j) * 8 + 4 * col + i] =
4070 dst[(row * 4 + j) * dst_stride + 4 * col + i];
4071 }
4072 }
4073 }
4074#if CONFIG_HIGHBITDEPTH
Yushin Cho04749122017-05-25 14:19:07 -07004075 }
Yushin Cho8ab875d2017-06-23 14:47:21 -07004076#endif // CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -07004077 dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, pred8, 8, BLOCK_8X8, 8,
Yushin Chob7b60c52017-07-14 16:18:52 -07004078 8, 8, 8, qindex) *
4079 16;
Yushin Chof986af12017-11-20 15:22:43 -08004080 if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
Yushin Cho1cd34622017-10-06 13:00:41 -07004081 assert(sum_rd_stats.dist == dist_8x8);
Yushin Chob7b60c52017-07-14 16:18:52 -07004082 sum_rd_stats.dist = dist_8x8;
Yushin Cho04749122017-05-25 14:19:07 -07004083 tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
4084 }
Yushin Chob7b60c52017-07-14 16:18:52 -07004085#endif // CONFIG_DIST_8X8
Yaowu Xuc27fc142016-08-22 16:08:15 -07004086 if (this_cost_valid) sum_rd = tmp_rd;
4087 }
4088
4089 if (this_rd < sum_rd) {
4090 int idx, idy;
Yue Chend6bdd462017-07-19 16:05:43 -07004091 TX_SIZE tx_size_selected = tx_size;
Jingning Han2f42d772017-07-05 16:28:18 -07004092
4093#if CONFIG_LV_MAP
Debargha Mukherjee35a4db32017-11-14 11:58:16 -08004094 p->txb_entropy_ctx[block] = tmp_eob;
Jingning Han2f42d772017-07-05 16:28:18 -07004095#else
4096 p->eobs[block] = tmp_eob;
4097#endif
4098
Yue Chend6bdd462017-07-19 16:05:43 -07004099 av1_set_txb_context(x, plane, block, tx_size_selected, pta, ptl);
Jingning Han2f42d772017-07-05 16:28:18 -07004100
Jingning Han331662e2017-05-30 17:03:32 -07004101 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
4102 tx_size);
Yue Chend6bdd462017-07-19 16:05:43 -07004103 inter_tx_size[0][0] = tx_size_selected;
Yue Chenc5252a62017-10-31 15:41:12 -07004104 for (idy = 0; idy < AOMMAX(1, tx_size_high_unit[tx_size] / 2); ++idy)
4105 for (idx = 0; idx < AOMMAX(1, tx_size_wide_unit[tx_size] / 2); ++idx)
Yue Chend6bdd462017-07-19 16:05:43 -07004106 inter_tx_size[idy][idx] = tx_size_selected;
4107 mbmi->tx_size = tx_size_selected;
Jingning Hane3b81bc2017-06-23 11:43:52 -07004108#if CONFIG_TXK_SEL
4109 mbmi->txk_type[txk_idx] = best_tx_type;
4110#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07004111 if (this_rd == INT64_MAX) *is_cost_valid = 0;
Debargha Mukherjee35a4db32017-11-14 11:58:16 -08004112 x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004113 } else {
Angie Chiangd7246172016-11-03 11:49:15 -07004114 *rd_stats = sum_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004115 if (sum_rd == INT64_MAX) *is_cost_valid = 0;
4116 }
4117}
4118
Debargha Mukherjeeedc73462017-10-31 15:13:32 -07004119static int get_search_init_depth(int mi_width, int mi_height,
4120 const SPEED_FEATURES *sf) {
4121 if (sf->tx_size_search_method == USE_LARGESTALL) return MAX_VARTX_DEPTH;
4122 return (mi_height != mi_width) ? sf->tx_size_search_init_depth_rect
4123 : sf->tx_size_search_init_depth_sqr;
4124}
4125
Debargha Mukherjee51666862017-10-24 14:29:13 -07004126static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
4127 RD_STATS *rd_stats, BLOCK_SIZE bsize,
Alexander Bokov79a37242017-09-29 11:25:55 -07004128 int64_t ref_best_rd, int fast,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004129 int tx_split_prune_flag,
4130 TX_SIZE_RD_INFO_NODE *rd_info_tree) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07004131 MACROBLOCKD *const xd = &x->e_mbd;
4132 int is_cost_valid = 1;
4133 int64_t this_rd = 0;
4134
4135 if (ref_best_rd < 0) is_cost_valid = 0;
4136
Angie Chiangc0feea82016-11-03 15:36:18 -07004137 av1_init_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004138
4139 if (is_cost_valid) {
4140 const struct macroblockd_plane *const pd = &xd->plane[0];
4141 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
Jingning Han9ca05b72017-01-03 14:41:36 -08004142 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4143 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
Yue Chen0797a202017-10-27 17:24:56 -07004144 const TX_SIZE max_tx_size = get_max_rect_tx_size(plane_bsize, 1);
Jingning Han18482fe2016-11-02 17:01:58 -07004145 const int bh = tx_size_high_unit[max_tx_size];
4146 const int bw = tx_size_wide_unit[max_tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004147 int idx, idy;
4148 int block = 0;
Jingning Han18482fe2016-11-02 17:01:58 -07004149 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004150 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4151 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
Jingning Han331662e2017-05-30 17:03:32 -07004152 TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
4153 TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004154
Angie Chiangb5dda482016-11-02 16:19:58 -07004155 RD_STATS pn_rd_stats;
Debargha Mukherjeeedc73462017-10-31 15:13:32 -07004156 const int init_depth = get_search_init_depth(mi_width, mi_height, &cpi->sf);
Angie Chiangc0feea82016-11-03 15:36:18 -07004157 av1_init_rd_stats(&pn_rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004158
Jingning Han9ca05b72017-01-03 14:41:36 -08004159 av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
Jingning Han331662e2017-05-30 17:03:32 -07004160 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
4161 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004162
4163 for (idy = 0; idy < mi_height; idy += bh) {
Jingning Han18482fe2016-11-02 17:01:58 -07004164 for (idx = 0; idx < mi_width; idx += bw) {
Sarah Parkerd25ef8c2017-10-06 12:17:30 -07004165 select_tx_block(cpi, x, idy, idx, 0, block, max_tx_size, init_depth,
4166 plane_bsize, ctxa, ctxl, tx_above, tx_left,
Debargha Mukherjee51666862017-10-24 14:29:13 -07004167 &pn_rd_stats, ref_best_rd - this_rd, &is_cost_valid,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004168 fast, tx_split_prune_flag, rd_info_tree);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004169 if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) {
Sarah Parkerde6f0722017-08-07 20:23:46 -07004170 av1_invalid_rd_stats(rd_stats);
4171 return;
4172 }
Angie Chiangc0feea82016-11-03 15:36:18 -07004173 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Debargha Mukherjee2c50f9a2017-11-15 08:04:57 -08004174 this_rd +=
4175 AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
4176 RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07004177 block += step;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004178 if (rd_info_tree != NULL) rd_info_tree += 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004179 }
4180 }
4181 }
Debargha Mukherjee9c8decb2017-12-01 15:14:40 -08004182 int64_t zero_rd = RDCOST(x->rdmult, rd_stats->zero_rate, rd_stats->sse);
4183 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
4184 if (zero_rd < this_rd) {
4185 this_rd = zero_rd;
4186 rd_stats->skip = 1;
4187 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07004188 if (this_rd > ref_best_rd) is_cost_valid = 0;
4189
4190 if (!is_cost_valid) {
4191 // reset cost value
Angie Chiangc0feea82016-11-03 15:36:18 -07004192 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004193 }
4194}
4195
Yaowu Xuf883b422016-08-30 14:01:10 -07004196static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
Angie Chiangb5dda482016-11-02 16:19:58 -07004197 RD_STATS *rd_stats, BLOCK_SIZE bsize,
Debargha Mukherjee51666862017-10-24 14:29:13 -07004198 int mi_row, int mi_col,
Alexander Bokov79a37242017-09-29 11:25:55 -07004199 int64_t ref_best_rd, TX_TYPE tx_type,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004200 int tx_split_prune_flag,
4201 TX_SIZE_RD_INFO_NODE *rd_info_tree) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07004202 const int fast = cpi->sf.tx_size_search_method > USE_FULL_RD;
Yaowu Xuf883b422016-08-30 14:01:10 -07004203 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004204 MACROBLOCKD *const xd = &x->e_mbd;
4205 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004206 const int is_inter = is_inter_block(mbmi);
Zoe Liu1eed2df2017-10-16 17:13:15 -07004207 const int skip_ctx = av1_get_skip_context(xd);
4208 int s0 = x->skip_cost[skip_ctx][0];
4209 int s1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004210 int64_t rd;
Jingning Hane67b38a2016-11-04 10:30:00 -07004211 int row, col;
4212 const int max_blocks_high = max_block_high(xd, bsize, 0);
4213 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004214
Debargha Mukherjee51666862017-10-24 14:29:13 -07004215 // TODO(debargha): enable this as a speed feature where the
4216 // select_inter_block_yrd() function above will use a simplified search
4217 // such as not using full optimize, but the inter_block_yrd() function
4218 // will use more complex search given that the transform partitions have
4219 // already been decided.
4220
Zoe Liu1eed2df2017-10-16 17:13:15 -07004221 (void)cm;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004222 (void)mi_row;
4223 (void)mi_col;
Zoe Liu1eed2df2017-10-16 17:13:15 -07004224
Yaowu Xuc27fc142016-08-22 16:08:15 -07004225 mbmi->tx_type = tx_type;
Alexander Bokov79a37242017-09-29 11:25:55 -07004226 select_inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, fast,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004227 tx_split_prune_flag, rd_info_tree);
Angie Chiangb5dda482016-11-02 16:19:58 -07004228 if (rd_stats->rate == INT_MAX) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004229
Debargha Mukherjee51666862017-10-24 14:29:13 -07004230 mbmi->min_tx_size = get_min_tx_size(mbmi->inter_tx_size[0][0]);
Jingning Hane67b38a2016-11-04 10:30:00 -07004231 for (row = 0; row < max_blocks_high / 2; ++row)
4232 for (col = 0; col < max_blocks_wide / 2; ++col)
4233 mbmi->min_tx_size = AOMMIN(
4234 mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col]));
4235
Debargha Mukherjee51666862017-10-24 14:29:13 -07004236 if (fast) {
4237 // Do a better (non-fast) search with tx sizes already decided.
4238 // Currently, trellis optimization is turned on only for this pass, and
4239 // the function below performs a more accurate rd cost calculation based
4240 // on that.
4241 if (!inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, 0))
4242 return INT64_MAX;
4243 }
4244
Jingning Han1643a0a2017-07-05 15:48:25 -07004245#if !CONFIG_TXK_SEL
Sarah Parkere68a3e42017-02-16 14:03:24 -08004246 if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter,
4247 cm->reduced_tx_set_used) > 1 &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07004248 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Sarah Parkere68a3e42017-02-16 14:03:24 -08004249 const int ext_tx_set = get_ext_tx_set(mbmi->min_tx_size, bsize, is_inter,
4250 cm->reduced_tx_set_used);
Sebastien Alaiwan7fc6b2a2017-11-02 18:14:50 +01004251 if (is_inter) {
4252 if (ext_tx_set > 0)
Angie Chiangb5dda482016-11-02 16:19:58 -07004253 rd_stats->rate +=
Sebastien Alaiwan7fc6b2a2017-11-02 18:14:50 +01004254 x->inter_tx_type_costs[ext_tx_set]
4255 [txsize_sqr_map[mbmi->min_tx_size]]
4256 [mbmi->tx_type];
4257 } else {
4258 if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) {
Yue Chen57b8ff62017-10-10 23:37:31 -07004259#if CONFIG_FILTER_INTRA
Sebastien Alaiwan7fc6b2a2017-11-02 18:14:50 +01004260 PREDICTION_MODE intra_dir;
4261 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0])
4262 intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
4263 .filter_intra_mode[0]];
4264 else
4265 intra_dir = mbmi->mode;
4266 rd_stats->rate += x->intra_tx_type_costs[ext_tx_set][mbmi->min_tx_size]
4267 [intra_dir][mbmi->tx_type];
Yue Chen57b8ff62017-10-10 23:37:31 -07004268#else
4269 rd_stats->rate += x->intra_tx_type_costs[ext_tx_set][mbmi->min_tx_size]
4270 [mbmi->mode][mbmi->tx_type];
4271#endif
Lester Lu432012f2017-08-17 14:39:29 -07004272 }
4273 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07004274 }
Jingning Han1643a0a2017-07-05 15:48:25 -07004275#endif // CONFIG_TXK_SEL
Yaowu Xuc27fc142016-08-22 16:08:15 -07004276
Angie Chiangb5dda482016-11-02 16:19:58 -07004277 if (rd_stats->skip)
Urvang Joshi70006e42017-06-14 16:08:55 -07004278 rd = RDCOST(x->rdmult, s1, rd_stats->sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004279 else
Urvang Joshi70006e42017-06-14 16:08:55 -07004280 rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004281
Angie Chiangb5dda482016-11-02 16:19:58 -07004282 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
4283 !(rd_stats->skip))
Urvang Joshi70006e42017-06-14 16:08:55 -07004284 rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07004285
4286 return rd;
4287}
4288
Debargha Mukherjee51666862017-10-24 14:29:13 -07004289// Finds rd cost for a y block, given the transform size partitions
4290static void tx_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
4291 int blk_col, int plane, int block, TX_SIZE tx_size,
4292 BLOCK_SIZE plane_bsize, int depth,
4293 ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
4294 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
4295 int64_t ref_best_rd, RD_STATS *rd_stats, int fast) {
4296 MACROBLOCKD *const xd = &x->e_mbd;
4297 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4298 struct macroblockd_plane *const pd = &xd->plane[plane];
4299 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
4300 const int tx_row = blk_row >> (1 - pd->subsampling_y);
4301 const int tx_col = blk_col >> (1 - pd->subsampling_x);
4302 TX_SIZE plane_tx_size;
4303 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4304 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
4305
4306 assert(tx_size < TX_SIZES_ALL);
4307
4308 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4309
4310 plane_tx_size =
4311 plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
4312 : mbmi->inter_tx_size[tx_row][tx_col];
4313
4314 int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
4315 mbmi->sb_type, tx_size);
4316
4317 av1_init_rd_stats(rd_stats);
Debargha Mukherjee891a8772017-11-22 10:09:37 -08004318 if (tx_size == plane_tx_size
4319#if DISABLE_VARTX_FOR_CHROMA
4320 || pd->subsampling_x || pd->subsampling_y
4321#endif // DISABLE_VARTX_FOR_CHROMA
4322 ) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07004323 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
4324 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
4325#if CONFIG_LV_MAP
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08004326 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004327 TXB_CTX txb_ctx;
4328 get_txb_ctx(plane_bsize, tx_size, plane, ta, tl, &txb_ctx);
4329
4330 const int zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(plane)]
4331 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
4332#else
4333 const int coeff_ctx = get_entropy_context(tx_size, ta, tl);
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08004334 const TX_SIZE tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004335 const int zero_blk_rate =
4336 x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
4337#endif // CONFIG_LV_MAP
4338 rd_stats->zero_rate = zero_blk_rate;
4339 rd_stats->ref_rdcost = ref_best_rd;
4340 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004341 plane_bsize, ta, tl, rd_stats, fast, NULL);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004342 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4343 if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
4344 RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
4345 rd_stats->skip == 1) {
4346 rd_stats->rate = zero_blk_rate;
4347 rd_stats->dist = rd_stats->sse;
4348 rd_stats->skip = 1;
4349 x->blk_skip[plane][blk_row * mi_width + blk_col] = 1;
4350 x->plane[plane].eobs[block] = 0;
4351#if CONFIG_LV_MAP
4352 x->plane[plane].txb_entropy_ctx[block] = 0;
4353#endif // CONFIG_LV_MAP
4354 } else {
4355 rd_stats->skip = 0;
4356 x->blk_skip[plane][blk_row * mi_width + blk_col] = 0;
4357 }
4358 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
4359 rd_stats->rate += x->txfm_partition_cost[ctx][0];
4360 av1_set_txb_context(x, plane, block, tx_size, ta, tl);
4361 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
4362 tx_size);
4363 } else {
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004364 const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004365 const int bsw = tx_size_wide_unit[sub_txs];
4366 const int bsh = tx_size_high_unit[sub_txs];
4367 const int step = bsh * bsw;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004368 RD_STATS pn_rd_stats;
4369 int64_t this_rd = 0;
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004370 assert(bsw > 0 && bsh > 0);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004371
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004372 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
4373 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
4374 const int offsetr = blk_row + row;
4375 const int offsetc = blk_col + col;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004376
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004377 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004378
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004379 av1_init_rd_stats(&pn_rd_stats);
4380 tx_block_yrd(cpi, x, offsetr, offsetc, plane, block, sub_txs,
4381 plane_bsize, depth + 1, above_ctx, left_ctx, tx_above,
4382 tx_left, ref_best_rd - this_rd, &pn_rd_stats, fast);
4383 if (pn_rd_stats.rate == INT_MAX) {
4384 av1_invalid_rd_stats(rd_stats);
4385 return;
4386 }
4387 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
4388 this_rd += RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist);
4389 block += step;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004390 }
Debargha Mukherjee51666862017-10-24 14:29:13 -07004391 }
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004392
Debargha Mukherjee51666862017-10-24 14:29:13 -07004393 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
4394 rd_stats->rate += x->txfm_partition_cost[ctx][1];
4395 }
4396}
4397
4398// Return value 0: early termination triggered, no valid rd cost available;
4399// 1: rd cost values are valid.
4400int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
4401 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast) {
4402 MACROBLOCKD *const xd = &x->e_mbd;
4403 int is_cost_valid = 1;
4404 int64_t this_rd = 0;
4405
4406 if (ref_best_rd < 0) is_cost_valid = 0;
4407
4408 av1_init_rd_stats(rd_stats);
4409
4410 if (is_cost_valid) {
4411 const struct macroblockd_plane *const pd = &xd->plane[0];
4412 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
4413 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4414 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
Debargha Mukherjee891a8772017-11-22 10:09:37 -08004415 const TX_SIZE max_tx_size = get_vartx_max_txsize(
4416 xd, plane_bsize, pd->subsampling_x || pd->subsampling_y);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004417 const int bh = tx_size_high_unit[max_tx_size];
4418 const int bw = tx_size_wide_unit[max_tx_size];
Debargha Mukherjeeedc73462017-10-31 15:13:32 -07004419 const int init_depth = get_search_init_depth(mi_width, mi_height, &cpi->sf);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004420 int idx, idy;
4421 int block = 0;
4422 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
4423 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4424 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
4425 TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
4426 TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
Debargha Mukherjee51666862017-10-24 14:29:13 -07004427 RD_STATS pn_rd_stats;
4428
4429 av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
4430 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
4431 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
4432
4433 for (idy = 0; idy < mi_height; idy += bh) {
4434 for (idx = 0; idx < mi_width; idx += bw) {
4435 av1_init_rd_stats(&pn_rd_stats);
4436 tx_block_yrd(cpi, x, idy, idx, 0, block, max_tx_size, plane_bsize,
4437 init_depth, ctxa, ctxl, tx_above, tx_left,
4438 ref_best_rd - this_rd, &pn_rd_stats, fast);
4439 if (pn_rd_stats.rate == INT_MAX) {
4440 av1_invalid_rd_stats(rd_stats);
4441 return 0;
4442 }
4443 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08004444 this_rd +=
4445 AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
4446 RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse));
Debargha Mukherjee51666862017-10-24 14:29:13 -07004447 block += step;
4448 }
4449 }
4450 }
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08004451 int64_t zero_rd = RDCOST(x->rdmult, rd_stats->zero_rate, rd_stats->sse);
4452 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
4453 if (zero_rd < this_rd) {
4454 this_rd = zero_rd;
4455 rd_stats->skip = 1;
4456 }
Debargha Mukherjee51666862017-10-24 14:29:13 -07004457 if (this_rd > ref_best_rd) is_cost_valid = 0;
4458
4459 if (!is_cost_valid) {
4460 // reset cost value
4461 av1_invalid_rd_stats(rd_stats);
4462 }
4463 return is_cost_valid;
4464}
4465
Hui Su1ddf2312017-08-19 15:21:34 -07004466static uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
4467 const int rows = block_size_high[bsize];
4468 const int cols = block_size_wide[bsize];
Hui Su1ddf2312017-08-19 15:21:34 -07004469 const struct macroblock_plane *const p = &x->plane[0];
4470 const int16_t *diff = &p->src_diff[0];
Debargha Mukherjeefd65c8d2017-11-07 15:45:55 -08004471 uint16_t hash_data[MAX_SB_SQUARE];
4472 memcpy(hash_data, diff, sizeof(*hash_data) * rows * cols);
4473 return (av1_get_crc_value(&x->tx_rd_record.crc_calculator,
4474 (uint8_t *)hash_data, 2 * rows * cols)
Hui Su1ddf2312017-08-19 15:21:34 -07004475 << 7) +
4476 bsize;
4477}
4478
4479static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
4480 const RD_STATS *const rd_stats,
Hui Su89ef4932017-11-28 10:54:31 -08004481 TX_RD_RECORD *tx_rd_record) {
4482 int index;
4483 if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
4484 index =
4485 (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN;
4486 ++tx_rd_record->num;
4487 } else {
4488 index = tx_rd_record->index_start;
4489 tx_rd_record->index_start =
4490 (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN;
4491 }
4492 TX_RD_INFO *const tx_rd_info = &tx_rd_record->tx_rd_info[index];
Hui Su1ddf2312017-08-19 15:21:34 -07004493 const MACROBLOCKD *const xd = &x->e_mbd;
4494 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4495 tx_rd_info->hash_value = hash;
4496 tx_rd_info->tx_type = mbmi->tx_type;
4497 tx_rd_info->tx_size = mbmi->tx_size;
Hui Su1ddf2312017-08-19 15:21:34 -07004498 tx_rd_info->min_tx_size = mbmi->min_tx_size;
4499 memcpy(tx_rd_info->blk_skip, x->blk_skip[0],
4500 sizeof(tx_rd_info->blk_skip[0]) * n4);
4501 for (int idy = 0; idy < xd->n8_h; ++idy)
4502 for (int idx = 0; idx < xd->n8_w; ++idx)
4503 tx_rd_info->inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
Hui Su1ddf2312017-08-19 15:21:34 -07004504#if CONFIG_TXK_SEL
4505 av1_copy(tx_rd_info->txk_type, mbmi->txk_type);
4506#endif // CONFIG_TXK_SEL
4507 tx_rd_info->rd_stats = *rd_stats;
4508}
4509
4510static void fetch_tx_rd_info(int n4, const TX_RD_INFO *const tx_rd_info,
4511 RD_STATS *const rd_stats, MACROBLOCK *const x) {
4512 MACROBLOCKD *const xd = &x->e_mbd;
4513 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4514 mbmi->tx_type = tx_rd_info->tx_type;
4515 mbmi->tx_size = tx_rd_info->tx_size;
Hui Su1ddf2312017-08-19 15:21:34 -07004516 mbmi->min_tx_size = tx_rd_info->min_tx_size;
4517 memcpy(x->blk_skip[0], tx_rd_info->blk_skip,
4518 sizeof(tx_rd_info->blk_skip[0]) * n4);
4519 for (int idy = 0; idy < xd->n8_h; ++idy)
4520 for (int idx = 0; idx < xd->n8_w; ++idx)
4521 mbmi->inter_tx_size[idy][idx] = tx_rd_info->inter_tx_size[idy][idx];
Hui Su1ddf2312017-08-19 15:21:34 -07004522#if CONFIG_TXK_SEL
4523 av1_copy(mbmi->txk_type, tx_rd_info->txk_type);
4524#endif // CONFIG_TXK_SEL
4525 *rd_stats = tx_rd_info->rd_stats;
4526}
4527
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004528static int find_tx_size_rd_info(TX_SIZE_RD_RECORD *cur_record,
4529 const uint32_t hash) {
4530 // Linear search through the circular buffer to find matching hash.
4531 int index;
4532 for (int i = cur_record->num - 1; i >= 0; i--) {
4533 index = (cur_record->index_start + i) % TX_SIZE_RD_RECORD_BUFFER_LEN;
4534 if (cur_record->hash_vals[index] == hash) return index;
4535 }
4536
4537 // If not found - add new RD info into the buffer and return its index
4538 if (cur_record->num < TX_SIZE_RD_RECORD_BUFFER_LEN) {
4539 index = (cur_record->index_start + cur_record->num) %
4540 TX_SIZE_RD_RECORD_BUFFER_LEN;
4541 cur_record->num++;
4542 } else {
4543 index = cur_record->index_start;
4544 cur_record->index_start =
4545 (cur_record->index_start + 1) % TX_SIZE_RD_RECORD_BUFFER_LEN;
4546 }
4547
4548 cur_record->hash_vals[index] = hash;
4549 av1_zero(cur_record->tx_rd_info[index]);
4550 return index;
4551}
4552
4553// Go through all TX blocks that could be used in TX size search, compute
4554// residual hash values for them and find matching RD info that stores previous
4555// RD search results for these TX blocks. The idea is to prevent repeated
4556// rate/distortion computations that happen because of the combination of
4557// partition and TX size search. The resulting RD info records are returned in
4558// the form of a quadtree for easier access in actual TX size search.
4559static int find_tx_size_rd_records(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
4560 int mi_col,
4561 TX_SIZE_RD_INFO_NODE *dst_rd_info) {
4562#if CONFIG_TX64X64
4563 TX_SIZE_RD_RECORD *rd_records_table[4] = { x->tx_size_rd_record_8X8,
4564 x->tx_size_rd_record_16X16,
4565 x->tx_size_rd_record_32X32,
4566 x->tx_size_rd_record_64X64 };
4567#else
4568 TX_SIZE_RD_RECORD *rd_records_table[3] = { x->tx_size_rd_record_8X8,
4569 x->tx_size_rd_record_16X16,
4570 x->tx_size_rd_record_32X32 };
4571#endif
4572 const TX_SIZE max_square_tx_size = max_txsize_lookup[bsize];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004573 const int bw = block_size_wide[bsize];
4574 const int bh = block_size_high[bsize];
Debargha Mukherjeede80e762017-11-30 13:58:56 -08004575
4576 // Hashing is performed only for square TX sizes larger than TX_4X4
4577 if (max_square_tx_size < TX_8X8 || bw != bh) return 0;
4578
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004579 const int diff_stride = bw;
4580 const struct macroblock_plane *const p = &x->plane[0];
4581 const int16_t *diff = &p->src_diff[0];
4582
4583 // Coordinates of the top-left corner of current block within the superblock
4584 // measured in pixels:
4585 const int mi_row_in_sb = (mi_row % MAX_MIB_SIZE) << MI_SIZE_LOG2;
4586 const int mi_col_in_sb = (mi_col % MAX_MIB_SIZE) << MI_SIZE_LOG2;
4587 int cur_rd_info_idx = 0;
4588 int cur_tx_depth = 0;
4589 uint8_t parent_idx_buf[MAX_SB_SQUARE] = { 0 };
4590
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004591 int cur_tx_size = max_txsize_rect_lookup[1][bsize];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004592 while (cur_tx_depth <= MAX_VARTX_DEPTH) {
4593 const BLOCK_SIZE cur_tx_bsize = txsize_to_bsize[cur_tx_size];
4594 const int cur_tx_bw = block_size_wide[cur_tx_bsize];
4595 const int cur_tx_bh = block_size_high[cur_tx_bsize];
4596 if (cur_tx_bw < 8 || cur_tx_bh < 8) break;
4597
4598 for (int row = 0; row < bh; row += cur_tx_bh) {
4599 for (int col = 0; col < bw; col += cur_tx_bw) {
4600 if (cur_tx_bw != cur_tx_bh) {
4601 // Use dummy nodes for all rectangular transforms within the
4602 // TX size search tree.
4603 dst_rd_info[cur_rd_info_idx].rd_info_array = NULL;
4604 } else {
4605 // Get spatial location of this TX block within the superblock
4606 // (measured in cur_tx_bsize units).
4607 const int row_in_sb = (mi_row_in_sb + row) / cur_tx_bh;
4608 const int col_in_sb = (mi_col_in_sb + col) / cur_tx_bw;
4609
4610 // Compute FNV-1a hash for this TX block.
4611 uint32_t hash = 2166136261;
4612 for (int i = 0; i < cur_tx_bh; i++) {
4613 const int16_t *cur_diff_row = diff + (row + i) * diff_stride + col;
4614 for (int j = 0; j < cur_tx_bw; j++) {
4615 hash = hash ^ clip_pixel(cur_diff_row[j] + 128);
Yaowu Xu51021542017-11-20 18:23:42 -08004616 hash = (uint32_t)((int64_t)hash * 16777619);
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004617 }
4618 }
4619
4620 // Find corresponding RD info based on the hash value.
4621 const int rd_record_idx =
4622 row_in_sb * (MAX_MIB_SIZE >> (cur_tx_size + 1 - TX_8X8)) +
4623 col_in_sb;
4624 int idx = find_tx_size_rd_info(
4625 &rd_records_table[cur_tx_size - TX_8X8][rd_record_idx], hash);
4626 dst_rd_info[cur_rd_info_idx].rd_info_array =
4627 rd_records_table[cur_tx_size - TX_8X8][rd_record_idx]
4628 .tx_rd_info[idx];
4629 }
4630
4631 // Update the output quadtree RD info structure.
4632 av1_zero(dst_rd_info[cur_rd_info_idx].children);
4633 if (cur_tx_depth > 0) {
4634 const int y_odd = (row / cur_tx_bh) % 2;
4635 const int x_odd = (col / cur_tx_bw) % 2;
4636 const int child_idx = y_odd ? (x_odd ? 3 : 2) : (x_odd ? 1 : 0);
4637 dst_rd_info[parent_idx_buf[row * bw + col]].children[child_idx] =
4638 &dst_rd_info[cur_rd_info_idx];
4639 }
4640 for (int i = row; i < row + cur_tx_bh; ++i)
4641 memset(parent_idx_buf + i * bw + col, cur_rd_info_idx, cur_tx_bw);
4642 ++cur_rd_info_idx;
4643 }
4644 }
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004645 cur_tx_size = sub_tx_size_map[1][cur_tx_size];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004646 ++cur_tx_depth;
4647 }
4648 return 1;
4649}
4650
Hui Su991dd222017-11-27 16:32:00 -08004651static const uint32_t skip_pred_threshold[3][BLOCK_SIZES_ALL] = {
4652 {
4653 0, 0, 0, 50, 50, 50, 55, 47, 47, 53, 53, 53, 0, 0, 0, 0,
4654#if CONFIG_EXT_PARTITION
4655 0, 0, 0,
4656#endif
4657 50, 50, 55, 55, 53, 53,
4658#if CONFIG_EXT_PARTITION
4659 0, 0,
4660#endif
4661 },
4662 {
4663 0, 0, 0, 69, 69, 69, 67, 68, 68, 53, 53, 53, 0, 0, 0, 0,
4664#if CONFIG_EXT_PARTITION
4665 0, 0, 0,
4666#endif
4667 69, 69, 67, 67, 53, 53,
4668#if CONFIG_EXT_PARTITION
4669 0, 0,
4670#endif
4671 },
4672 {
4673 0, 0, 0, 70, 73, 73, 70, 73, 73, 58, 58, 58, 0, 0, 0, 0,
4674#if CONFIG_EXT_PARTITION
4675 0, 0, 0,
4676#endif
4677 70, 70, 70, 70, 58, 58,
4678#if CONFIG_EXT_PARTITION
4679 0, 0,
4680#endif
4681 }
4682};
4683
Alexander Bokov8829a242017-08-31 18:07:05 -07004684// Uses simple features on top of DCT coefficients to quickly predict
4685// whether optimal RD decision is to skip encoding the residual.
Hui Su3889c6d2017-12-04 17:02:44 -08004686// The sse value is stored in dist.
4687static int predict_skip_flag(MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *dist) {
Hui Su991dd222017-11-27 16:32:00 -08004688 const int max_tx_size =
4689 get_max_rect_tx_size(bsize, is_inter_block(&x->e_mbd.mi[0]->mbmi));
4690 const int tx_h = tx_size_high[max_tx_size];
4691 const int tx_w = tx_size_wide[max_tx_size];
4692 if (tx_h > 16 || tx_w > 16) return 0;
4693
Alexander Bokov8829a242017-08-31 18:07:05 -07004694 const int bw = block_size_wide[bsize];
4695 const int bh = block_size_high[bsize];
Alexander Bokov80eedf22017-11-02 12:48:52 -07004696 const MACROBLOCKD *xd = &x->e_mbd;
Hui Su3889c6d2017-12-04 17:02:44 -08004697 const uint32_t dc_q = (uint32_t)av1_dc_quant_QTX(x->qindex, 0, xd->bd);
4698
4699 *dist = pixel_diff_dist(x, 0, x->plane[0].src_diff, bw, 0, 0, bsize, bsize);
4700 const int64_t mse = *dist / bw / bh;
4701 // Normalized quantizer takes the transform upscaling factor (8 for tx size
4702 // smaller than 32) into account.
4703 const uint32_t normalized_dc_q = dc_q >> 3;
4704 const int64_t mse_thresh = (int64_t)normalized_dc_q * normalized_dc_q / 8;
4705 // Predict not to skip when mse is larger than threshold.
4706 if (mse > mse_thresh) return 0;
4707
Alexander Bokovf93feec2017-10-11 14:55:50 -07004708 DECLARE_ALIGNED(32, tran_low_t, DCT_coefs[32 * 32]);
Alexander Bokov8829a242017-08-31 18:07:05 -07004709 TxfmParam param;
4710 param.tx_type = DCT_DCT;
Hui Su991dd222017-11-27 16:32:00 -08004711 param.tx_size = max_tx_size;
Alexander Bokov80eedf22017-11-02 12:48:52 -07004712 param.bd = xd->bd;
Monty Montgomery26b8a992017-11-10 22:45:23 -05004713 param.is_hbd = get_bitdepth_data_path_index(xd);
Alexander Bokov8829a242017-08-31 18:07:05 -07004714 param.lossless = 0;
Sarah Parker90024e42017-10-06 16:50:47 -07004715 const struct macroblockd_plane *const pd = &xd->plane[0];
4716 const BLOCK_SIZE plane_bsize =
4717 get_plane_block_size(xd->mi[0]->mbmi.sb_type, pd);
4718 // TODO(sarahparker) This assumes reduced_tx_set_used == 0. I will do a
4719 // follow up refactor to make the actual value of reduced_tx_set_used
4720 // within this function.
4721 param.tx_set_type = get_ext_tx_set_type(param.tx_size, plane_bsize,
4722 is_inter_block(&xd->mi[0]->mbmi), 0);
Hui Su3889c6d2017-12-04 17:02:44 -08004723 const uint32_t ac_q = (uint32_t)av1_ac_quant_QTX(x->qindex, 0, xd->bd);
Hui Su991dd222017-11-27 16:32:00 -08004724 uint32_t max_quantized_coef = 0;
4725 const int16_t *src_diff = x->plane[0].src_diff;
4726 for (int row = 0; row < bh; row += tx_h) {
4727 for (int col = 0; col < bw; col += tx_w) {
Alexander Bokov70109022017-10-11 15:09:24 -07004728#if CONFIG_TXMG
Hui Su991dd222017-11-27 16:32:00 -08004729 av1_highbd_fwd_txfm(src_diff + col, DCT_coefs, bw, &param);
Alexander Bokov70109022017-10-11 15:09:24 -07004730#else // CONFIG_TXMG
Hui Su991dd222017-11-27 16:32:00 -08004731 if (param.is_hbd)
4732 av1_highbd_fwd_txfm(src_diff + col, DCT_coefs, bw, &param);
4733 else
4734 av1_fwd_txfm(src_diff + col, DCT_coefs, bw, &param);
Alexander Bokov70109022017-10-11 15:09:24 -07004735#endif // CONFIG_TXMG
Alexander Bokov8829a242017-08-31 18:07:05 -07004736
Hui Su991dd222017-11-27 16:32:00 -08004737 // Operating on TX domain, not pixels; we want the QTX quantizers
4738 for (int i = 0; i < tx_w * tx_h; ++i) {
4739 uint32_t cur_quantized_coef =
Hui Su3889c6d2017-12-04 17:02:44 -08004740 (100 * (uint32_t)abs(DCT_coefs[i])) / (i ? ac_q : dc_q);
Hui Su991dd222017-11-27 16:32:00 -08004741 if (cur_quantized_coef > max_quantized_coef)
4742 max_quantized_coef = cur_quantized_coef;
4743 }
4744 }
4745 src_diff += tx_h * bw;
Alexander Bokov8829a242017-08-31 18:07:05 -07004746 }
Alexander Bokov80eedf22017-11-02 12:48:52 -07004747 const int bd_idx = (xd->bd == 8) ? 0 : ((xd->bd == 10) ? 1 : 2);
Hui Su991dd222017-11-27 16:32:00 -08004748 return max_quantized_coef < skip_pred_threshold[bd_idx][bsize];
Alexander Bokov8829a242017-08-31 18:07:05 -07004749}
4750
4751// Used to set proper context for early termination with skip = 1.
4752static void set_skip_flag(const AV1_COMP *cpi, MACROBLOCK *x,
Hui Su3889c6d2017-12-04 17:02:44 -08004753 RD_STATS *rd_stats, int bsize, int64_t dist) {
Alexander Bokov8829a242017-08-31 18:07:05 -07004754 MACROBLOCKD *const xd = &x->e_mbd;
4755 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4756 const int n4 = bsize_to_num_blk(bsize);
Yue Chen0797a202017-10-27 17:24:56 -07004757 const TX_SIZE tx_size = get_max_rect_tx_size(bsize, is_inter_block(mbmi));
Alexander Bokov8829a242017-08-31 18:07:05 -07004758 mbmi->tx_type = DCT_DCT;
Angie Chiang04838132017-11-30 14:25:15 -08004759#if CONFIG_TXK_SEL
4760 memset(mbmi->txk_type, DCT_DCT,
4761 sizeof(mbmi->txk_type[0]) *
4762 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
4763#endif
Alexander Bokov8829a242017-08-31 18:07:05 -07004764 for (int idy = 0; idy < xd->n8_h; ++idy)
4765 for (int idx = 0; idx < xd->n8_w; ++idx)
4766 mbmi->inter_tx_size[idy][idx] = tx_size;
4767 mbmi->tx_size = tx_size;
4768 mbmi->min_tx_size = get_min_tx_size(tx_size);
4769 memset(x->blk_skip[0], 1, sizeof(uint8_t) * n4);
4770 rd_stats->skip = 1;
4771
Yue Chen171c17d2017-10-16 18:08:22 -07004772 (void)cpi;
4773
Alexander Bokov8829a242017-08-31 18:07:05 -07004774 // Rate.
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08004775 const int tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Alexander Bokov8829a242017-08-31 18:07:05 -07004776 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4777 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
4778 av1_get_entropy_contexts(bsize, 0, &xd->plane[0], ctxa, ctxl);
Angie Chiang4639e082017-11-30 15:35:45 -08004779#if CONFIG_LV_MAP
4780 TXB_CTX txb_ctx;
4781 // Because plane is 0, plane_bsize equal to bsize
4782 get_txb_ctx(bsize, tx_size, 0, ctxa, ctxl, &txb_ctx);
4783 int rate = x->coeff_costs[tx_size_ctx][PLANE_TYPE_Y]
4784 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
4785#else
Alexander Bokov8829a242017-08-31 18:07:05 -07004786 int coeff_ctx = get_entropy_context(tx_size, ctxa, ctxl);
4787 int rate = x->token_head_costs[tx_size_ctx][PLANE_TYPE_Y][1][0][coeff_ctx][0];
Angie Chiang4639e082017-11-30 15:35:45 -08004788#endif
Alexander Bokov8829a242017-08-31 18:07:05 -07004789 if (tx_size > TX_4X4) {
4790 int ctx = txfm_partition_context(
4791 xd->above_txfm_context, xd->left_txfm_context, mbmi->sb_type, tx_size);
Yue Chen171c17d2017-10-16 18:08:22 -07004792 rate += x->txfm_partition_cost[ctx][0];
Alexander Bokov8829a242017-08-31 18:07:05 -07004793 }
4794#if !CONFIG_TXK_SEL
Alexander Bokov8829a242017-08-31 18:07:05 -07004795 const AV1_COMMON *cm = &cpi->common;
4796 const int ext_tx_set = get_ext_tx_set(max_txsize_lookup[bsize], bsize, 1,
4797 cm->reduced_tx_set_used);
4798 if (get_ext_tx_types(mbmi->min_tx_size, bsize, 1, cm->reduced_tx_set_used) >
4799 1 &&
4800 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
4801 if (ext_tx_set > 0)
4802 rate +=
4803 x->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->min_tx_size]]
4804 [mbmi->tx_type];
4805 }
Alexander Bokov8829a242017-08-31 18:07:05 -07004806#endif // CONFIG_TXK_SEL
4807 rd_stats->rate = rate;
Alexander Bokov8829a242017-08-31 18:07:05 -07004808#if CONFIG_HIGHBITDEPTH
4809 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
Hui Su3889c6d2017-12-04 17:02:44 -08004810 dist = ROUND_POWER_OF_TWO(dist, (xd->bd - 8) * 2);
Alexander Bokov8829a242017-08-31 18:07:05 -07004811#endif // CONFIG_HIGHBITDEPTH
Hui Su3889c6d2017-12-04 17:02:44 -08004812 rd_stats->dist = rd_stats->sse = (dist << 4);
Alexander Bokov8829a242017-08-31 18:07:05 -07004813}
4814
Angie Chiangb5dda482016-11-02 16:19:58 -07004815static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
Yue Chen25dc0702017-10-18 23:36:06 -07004816 RD_STATS *rd_stats, BLOCK_SIZE bsize, int mi_row,
4817 int mi_col, int64_t ref_best_rd) {
Jingning Han2b0eeb12017-02-23 15:55:37 -08004818 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004819 const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
4820 MACROBLOCKD *const xd = &x->e_mbd;
4821 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4822 int64_t rd = INT64_MAX;
4823 int64_t best_rd = INT64_MAX;
4824 TX_TYPE tx_type, best_tx_type = DCT_DCT;
4825 const int is_inter = is_inter_block(mbmi);
4826 TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004827 TX_SIZE best_tx = max_txsize_rect_lookup[1][bsize];
Jingning Hane67b38a2016-11-04 10:30:00 -07004828 TX_SIZE best_min_tx_size = TX_SIZES_ALL;
Jingning Han9ca05b72017-01-03 14:41:36 -08004829 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
Jingning Hane3b81bc2017-06-23 11:43:52 -07004830 TX_TYPE txk_start = DCT_DCT;
4831#if CONFIG_TXK_SEL
4832 TX_TYPE txk_end = DCT_DCT + 1;
4833#else
4834 TX_TYPE txk_end = TX_TYPES;
4835#endif
Angie Chiangf1cb0752017-04-10 16:01:20 -07004836 const int n4 = bsize_to_num_blk(bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004837 int idx, idy;
4838 int prune = 0;
Sarah Parker90024e42017-10-06 16:50:47 -07004839 // Get the tx_size 1 level down
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004840 TX_SIZE min_tx_size = sub_tx_size_map[1][max_txsize_rect_lookup[1][bsize]];
Hui Suddbcde22017-09-18 17:22:02 -07004841 const TxSetType tx_set_type = get_ext_tx_set_type(
Sarah Parker90024e42017-10-06 16:50:47 -07004842 min_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
Jingning Han3de53532017-12-07 13:40:32 -08004843 int within_border = mi_row >= xd->tile.mi_row_start &&
4844 (mi_row + mi_size_high[bsize] < xd->tile.mi_row_end) &&
4845 mi_col >= xd->tile.mi_col_start &&
4846 (mi_col + mi_size_wide[bsize] < xd->tile.mi_col_end);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004847
Angie Chiangc0feea82016-11-03 15:36:18 -07004848 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004849
Hui Su1ddf2312017-08-19 15:21:34 -07004850 const uint32_t hash = get_block_residue_hash(x, bsize);
4851 TX_RD_RECORD *tx_rd_record = &x->tx_rd_record;
4852
Yue Chen25dc0702017-10-18 23:36:06 -07004853 if (ref_best_rd != INT64_MAX && within_border) {
Hui Su1ddf2312017-08-19 15:21:34 -07004854 for (int i = 0; i < tx_rd_record->num; ++i) {
4855 const int index = (tx_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN;
4856 // If there is a match in the tx_rd_record, fetch the RD decision and
4857 // terminate early.
4858 if (tx_rd_record->tx_rd_info[index].hash_value == hash) {
4859 TX_RD_INFO *tx_rd_info = &tx_rd_record->tx_rd_info[index];
4860 fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x);
4861 return;
4862 }
4863 }
4864 }
4865
Alexander Bokov80eedf22017-11-02 12:48:52 -07004866 // If we predict that skip is the optimal RD decision - set the respective
4867 // context and terminate early.
Hui Su3889c6d2017-12-04 17:02:44 -08004868 int64_t dist;
Alexander Bokov80eedf22017-11-02 12:48:52 -07004869 if (is_inter && cpi->sf.tx_type_search.use_skip_flag_prediction &&
Hui Su3889c6d2017-12-04 17:02:44 -08004870 predict_skip_flag(x, bsize, &dist)) {
4871 set_skip_flag(cpi, x, rd_stats, bsize, dist);
Hui Su89ef4932017-11-28 10:54:31 -08004872 // Save the RD search results into tx_rd_record.
4873 if (within_border) save_tx_rd_info(n4, hash, x, rd_stats, tx_rd_record);
Alexander Bokov80eedf22017-11-02 12:48:52 -07004874 return;
Alexander Bokov8829a242017-08-31 18:07:05 -07004875 }
4876
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004877 // Precompute residual hashes and find existing or add new RD records to
4878 // store and reuse rate and distortion values to speed up TX size search.
4879 TX_SIZE_RD_INFO_NODE matched_rd_info[16 + 64 + 256];
4880 int found_rd_info = 0;
4881 if (ref_best_rd != INT64_MAX && within_border) {
4882 found_rd_info =
4883 find_tx_size_rd_records(x, bsize, mi_row, mi_col, matched_rd_info);
4884 }
4885
Alexander Bokov0c7eb102017-09-07 18:49:00 -07004886 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
4887 !x->use_default_inter_tx_type && !xd->lossless[mbmi->segment_id]) {
Alexander Bokov79a37242017-09-29 11:25:55 -07004888 prune = prune_tx(cpi, bsize, x, xd, tx_set_type,
4889 cpi->sf.tx_type_search.use_tx_size_pruning);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07004890 }
Alexander Bokov8829a242017-08-31 18:07:05 -07004891
Rupert Swarbrickde2ea942017-10-09 15:21:21 +01004892 int found = 0;
4893
Alexander Bokov79a37242017-09-29 11:25:55 -07004894 int tx_split_prune_flag = 0;
4895 if (is_inter && cpi->sf.tx_type_search.prune_mode >= PRUNE_2D_ACCURATE)
4896 tx_split_prune_flag = ((prune >> TX_TYPES) & 1);
4897
Jingning Hane3b81bc2017-06-23 11:43:52 -07004898 for (tx_type = txk_start; tx_type < txk_end; ++tx_type) {
Angie Chiangb5dda482016-11-02 16:19:58 -07004899 RD_STATS this_rd_stats;
Angie Chiangc0feea82016-11-03 15:36:18 -07004900 av1_init_rd_stats(&this_rd_stats);
Hui Suddbcde22017-09-18 17:22:02 -07004901 if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
Sarah Parker90024e42017-10-06 16:50:47 -07004902 (void)prune;
Sebastien Alaiwan3bac9922017-11-02 12:34:41 +01004903 // TODO(sarahparker) This speed feature has been temporarily disabled
4904 // with ext-tx because it is not compatible with the current
4905 // search method. It will be fixed in a followup.
4906 /*
4907 if (is_inter) {
4908 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
4909 if (!do_tx_type_search(tx_type, prune,
4910 cpi->sf.tx_type_search.prune_mode))
4911 continue;
4912 }
4913 } else {
4914 if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
4915 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
4916 }
4917 }
4918 */
Yaowu Xuc27fc142016-08-22 16:08:15 -07004919 if (is_inter && x->use_default_inter_tx_type &&
4920 tx_type != get_default_tx_type(0, xd, 0, max_tx_size))
4921 continue;
4922
Jingning Hane67b38a2016-11-04 10:30:00 -07004923 if (xd->lossless[mbmi->segment_id])
4924 if (tx_type != DCT_DCT) continue;
4925
Debargha Mukherjee51666862017-10-24 14:29:13 -07004926 rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, mi_row, mi_col,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004927 ref_best_rd, tx_type, tx_split_prune_flag,
4928 found_rd_info ? matched_rd_info : NULL);
Sarah Parker90024e42017-10-06 16:50:47 -07004929 // If the current tx_type is not included in the tx_set for the smallest
4930 // tx size found, then all vartx partitions were actually transformed with
4931 // DCT_DCT and we should avoid picking it.
4932 const TxSetType min_tx_set_type = get_ext_tx_set_type(
4933 mbmi->min_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
4934 if (!av1_ext_tx_used[min_tx_set_type][tx_type]) continue;
Sarah Parker90024e42017-10-06 16:50:47 -07004935
Hui Suda816a12017-08-18 14:46:02 -07004936 ref_best_rd = AOMMIN(rd, ref_best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004937 if (rd < best_rd) {
4938 best_rd = rd;
Angie Chiangb5dda482016-11-02 16:19:58 -07004939 *rd_stats = this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004940 best_tx_type = mbmi->tx_type;
4941 best_tx = mbmi->tx_size;
Jingning Hane67b38a2016-11-04 10:30:00 -07004942 best_min_tx_size = mbmi->min_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004943 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
Rupert Swarbrickde2ea942017-10-09 15:21:21 +01004944 found = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004945 for (idy = 0; idy < xd->n8_h; ++idy)
4946 for (idx = 0; idx < xd->n8_w; ++idx)
4947 best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
4948 }
4949 }
4950
Rupert Swarbrickde2ea942017-10-09 15:21:21 +01004951 // We should always find at least one candidate unless ref_best_rd is less
4952 // than INT64_MAX (in which case, all the calls to select_tx_size_fix_type
4953 // might have failed to find something better)
4954 assert(IMPLIES(!found, ref_best_rd != INT64_MAX));
4955 if (!found) return;
4956
4957 // We found a candidate transform to use. Copy our results from the "best"
4958 // array into mbmi.
Yaowu Xuc27fc142016-08-22 16:08:15 -07004959 mbmi->tx_type = best_tx_type;
4960 for (idy = 0; idy < xd->n8_h; ++idy)
4961 for (idx = 0; idx < xd->n8_w; ++idx)
4962 mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
4963 mbmi->tx_size = best_tx;
Jingning Hane67b38a2016-11-04 10:30:00 -07004964 mbmi->min_tx_size = best_min_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004965 memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
Hui Su1ddf2312017-08-19 15:21:34 -07004966
4967 // Save the RD search results into tx_rd_record.
Hui Su89ef4932017-11-28 10:54:31 -08004968 if (within_border) save_tx_rd_info(n4, hash, x, rd_stats, tx_rd_record);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004969}
4970
Yaowu Xuf883b422016-08-30 14:01:10 -07004971static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
Yaowu Xuc27fc142016-08-22 16:08:15 -07004972 int blk_col, int plane, int block, TX_SIZE tx_size,
4973 BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
Debargha Mukherjee51666862017-10-24 14:29:13 -07004974 ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats,
4975 int fast) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07004976 MACROBLOCKD *const xd = &x->e_mbd;
4977 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004978 struct macroblockd_plane *const pd = &xd->plane[plane];
4979 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
4980 const int tx_row = blk_row >> (1 - pd->subsampling_y);
4981 const int tx_col = blk_col >> (1 - pd->subsampling_x);
4982 TX_SIZE plane_tx_size;
Jingning Han18482fe2016-11-02 17:01:58 -07004983 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4984 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004985
Jingning Hand3fada82016-11-22 10:46:55 -08004986 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004987
Yaowu Xuc27fc142016-08-22 16:08:15 -07004988 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4989
Debargha Mukherjee2f123402016-08-30 17:43:38 -07004990 plane_tx_size =
4991 plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
4992 : mbmi->inter_tx_size[tx_row][tx_col];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004993
Debargha Mukherjee891a8772017-11-22 10:09:37 -08004994 if (tx_size == plane_tx_size
4995#if DISABLE_VARTX_FOR_CHROMA
4996 || pd->subsampling_x || pd->subsampling_y
4997#endif // DISABLE_VARTX_FOR_CHROMA
4998 ) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07004999 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
5000 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
Yaowu Xuf883b422016-08-30 14:01:10 -07005001 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07005002 plane_bsize, ta, tl, rd_stats, fast, NULL);
Jingning Han328d57b2017-07-07 14:40:17 -07005003 av1_set_txb_context(x, plane, block, tx_size, ta, tl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005004 } else {
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08005005 const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08005006 assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
5007 assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
5008 const int bsw = tx_size_wide_unit[sub_txs];
5009 const int bsh = tx_size_high_unit[sub_txs];
5010 const int step = bsh * bsw;
5011 assert(bsw > 0 && bsh > 0);
5012 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
5013 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
5014 const int offsetr = blk_row + row;
5015 const int offsetc = blk_col + col;
5016 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
5017 tx_block_rd(cpi, x, offsetr, offsetc, plane, block, sub_txs,
5018 plane_bsize, above_ctx, left_ctx, rd_stats, fast);
5019 block += step;
5020 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005021 }
5022 }
5023}
5024
5025// Return value 0: early termination triggered, no valid rd cost available;
5026// 1: rd cost values are valid.
Debargha Mukherjee51666862017-10-24 14:29:13 -07005027int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
5028 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005029 MACROBLOCKD *const xd = &x->e_mbd;
5030 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5031 int plane;
5032 int is_cost_valid = 1;
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08005033 int64_t this_rd = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005034
5035 if (ref_best_rd < 0) is_cost_valid = 0;
5036
Angie Chiangc0feea82016-11-03 15:36:18 -07005037 av1_init_rd_stats(rd_stats);
Yue Chena1e48dc2016-08-29 17:29:33 -07005038
Jingning Han9ce464c2017-02-20 15:36:30 -08005039 if (x->skip_chroma_rd) return is_cost_valid;
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005040 const BLOCK_SIZE bsizec = scale_chroma_bsize(
5041 bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
Jingning Han9ce464c2017-02-20 15:36:30 -08005042
Yaowu Xuc27fc142016-08-22 16:08:15 -07005043 if (is_inter_block(mbmi) && is_cost_valid) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005044 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005045 av1_subtract_plane(x, bsizec, plane);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005046 }
5047
Debargha Mukherjee51666862017-10-24 14:29:13 -07005048 if (is_cost_valid) {
5049 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
5050 const struct macroblockd_plane *const pd = &xd->plane[plane];
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005051 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsizec, pd);
Debargha Mukherjee51666862017-10-24 14:29:13 -07005052 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
5053 const int mi_height =
5054 block_size_high[plane_bsize] >> tx_size_high_log2[0];
Debargha Mukherjee19619882017-11-22 13:13:14 -08005055 TX_SIZE max_tx_size = get_vartx_max_txsize(
Debargha Mukherjee891a8772017-11-22 10:09:37 -08005056 xd, plane_bsize, pd->subsampling_x || pd->subsampling_y);
Debargha Mukherjee19619882017-11-22 13:13:14 -08005057#if DISABLE_VARTX_FOR_CHROMA == 2
5058 // If the luma transform size is split at least one level, split the
5059 // chroma by one level. Otherwise use the largest possible trasnform size
5060 // for chroma.
5061 if (pd->subsampling_x || pd->subsampling_y) {
5062 const TX_SIZE l_max_tx_size = get_vartx_max_txsize(xd, bsizec, 0);
5063 const int is_split =
5064 (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
5065 txsize_to_bsize[l_max_tx_size] == bsizec);
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08005066 if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
Debargha Mukherjee19619882017-11-22 13:13:14 -08005067 }
5068#endif // DISABLE_VARTX_FOR_CHROMA == 2
Debargha Mukherjee51666862017-10-24 14:29:13 -07005069 const int bh = tx_size_high_unit[max_tx_size];
5070 const int bw = tx_size_wide_unit[max_tx_size];
5071 int idx, idy;
5072 int block = 0;
5073 const int step = bh * bw;
5074 ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
5075 ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
5076 RD_STATS pn_rd_stats;
5077 av1_init_rd_stats(&pn_rd_stats);
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005078 av1_get_entropy_contexts(bsizec, 0, pd, ta, tl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005079
Debargha Mukherjee51666862017-10-24 14:29:13 -07005080 for (idy = 0; idy < mi_height; idy += bh) {
5081 for (idx = 0; idx < mi_width; idx += bw) {
5082 tx_block_rd(cpi, x, idy, idx, plane, block, max_tx_size, plane_bsize,
5083 ta, tl, &pn_rd_stats, fast);
5084 block += step;
5085 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005086 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005087
Debargha Mukherjee51666862017-10-24 14:29:13 -07005088 if (pn_rd_stats.rate == INT_MAX) {
5089 is_cost_valid = 0;
5090 break;
5091 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005092
Debargha Mukherjee51666862017-10-24 14:29:13 -07005093 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005094
Debargha Mukherjee51666862017-10-24 14:29:13 -07005095 this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist),
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08005096 RDCOST(x->rdmult, rd_stats->zero_rate, rd_stats->sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07005097
Debargha Mukherjee51666862017-10-24 14:29:13 -07005098 if (this_rd > ref_best_rd) {
5099 is_cost_valid = 0;
5100 break;
5101 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005102 }
5103 }
5104
5105 if (!is_cost_valid) {
5106 // reset cost value
Angie Chiangc0feea82016-11-03 15:36:18 -07005107 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005108 }
5109
5110 return is_cost_valid;
5111}
Yaowu Xuc27fc142016-08-22 16:08:15 -07005112
hui su83c26632017-01-24 17:19:06 -08005113static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
5114 int dc_mode_cost,
5115 uint8_t *best_palette_color_map,
5116 MB_MODE_INFO *const best_mbmi,
5117 int64_t *best_rd, int *rate,
5118 int *rate_tokenonly, int64_t *distortion,
5119 int *skippable) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005120 MACROBLOCKD *const xd = &x->e_mbd;
5121 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005122 assert(!is_inter_block(mbmi));
hui sude0c70a2017-01-09 17:12:17 -08005123 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005124 const BLOCK_SIZE bsize = mbmi->sb_type;
Urvang Joshic9e71d42017-08-09 18:58:33 -07005125 assert(bsize >= BLOCK_8X8);
Angie Chiang284d7772016-11-08 11:06:45 -08005126 int this_rate;
5127 int64_t this_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005128 int colors_u, colors_v, colors;
5129 const int src_stride = x->plane[1].src.stride;
5130 const uint8_t *const src_u = x->plane[1].src.buf;
5131 const uint8_t *const src_v = x->plane[2].src.buf;
hui sude0c70a2017-01-09 17:12:17 -08005132 uint8_t *const color_map = xd->plane[1].color_index_map;
Angie Chiang284d7772016-11-08 11:06:45 -08005133 RD_STATS tokenonly_rd_stats;
Urvang Joshi56ba91b2017-01-10 13:22:09 -08005134 int plane_block_width, plane_block_height, rows, cols;
5135 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
5136 &plane_block_height, &rows, &cols);
Hui Su473cf892017-11-08 18:14:31 -08005137 if (rows * cols > MAX_PALETTE_SQUARE) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005138
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005139 mbmi->uv_mode = UV_DC_PRED;
hui su5db97432016-10-14 16:10:14 -07005140#if CONFIG_FILTER_INTRA
5141 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
5142#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07005143
Hui Su4d51bed2017-11-29 15:52:40 -08005144 int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005145#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005146 if (cpi->common.use_highbitdepth) {
Yaowu Xuf883b422016-08-30 14:01:10 -07005147 colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols,
Hui Su4d51bed2017-11-29 15:52:40 -08005148 cpi->common.bit_depth, count_buf);
Yaowu Xuf883b422016-08-30 14:01:10 -07005149 colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols,
Hui Su4d51bed2017-11-29 15:52:40 -08005150 cpi->common.bit_depth, count_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005151 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005152#endif // CONFIG_HIGHBITDEPTH
Hui Su4d51bed2017-11-29 15:52:40 -08005153 colors_u = av1_count_colors(src_u, src_stride, rows, cols, count_buf);
5154 colors_v = av1_count_colors(src_v, src_stride, rows, cols, count_buf);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005155#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005156 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005157#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005158
hui su33567b22017-04-30 16:40:19 -07005159#if CONFIG_PALETTE_DELTA_ENCODING
hui su33567b22017-04-30 16:40:19 -07005160 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
Hui Su3748bc22017-08-23 11:30:41 -07005161 const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
hui su33567b22017-04-30 16:40:19 -07005162#endif // CONFIG_PALETTE_DELTA_ENCODING
5163
Yaowu Xuc27fc142016-08-22 16:08:15 -07005164 colors = colors_u > colors_v ? colors_u : colors_v;
5165 if (colors > 1 && colors <= 64) {
Hui Suc3769e52017-11-16 23:13:42 -08005166 aom_clear_system_state();
Yaowu Xuc27fc142016-08-22 16:08:15 -07005167 int r, c, n, i, j;
5168 const int max_itr = 50;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005169 float lb_u, ub_u, val_u;
5170 float lb_v, ub_v, val_v;
5171 float *const data = x->palette_buffer->kmeans_data_buf;
5172 float centroids[2 * PALETTE_MAX_SIZE];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005173
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005174#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005175 uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
5176 uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
5177 if (cpi->common.use_highbitdepth) {
5178 lb_u = src_u16[0];
5179 ub_u = src_u16[0];
5180 lb_v = src_v16[0];
5181 ub_v = src_v16[0];
5182 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005183#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005184 lb_u = src_u[0];
5185 ub_u = src_u[0];
5186 lb_v = src_v[0];
5187 ub_v = src_v[0];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005188#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005189 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005190#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005191
Yaowu Xuc27fc142016-08-22 16:08:15 -07005192 for (r = 0; r < rows; ++r) {
5193 for (c = 0; c < cols; ++c) {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005194#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005195 if (cpi->common.use_highbitdepth) {
5196 val_u = src_u16[r * src_stride + c];
5197 val_v = src_v16[r * src_stride + c];
5198 data[(r * cols + c) * 2] = val_u;
5199 data[(r * cols + c) * 2 + 1] = val_v;
5200 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005201#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005202 val_u = src_u[r * src_stride + c];
5203 val_v = src_v[r * src_stride + c];
5204 data[(r * cols + c) * 2] = val_u;
5205 data[(r * cols + c) * 2 + 1] = val_v;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005206#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005207 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005208#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005209 if (val_u < lb_u)
5210 lb_u = val_u;
5211 else if (val_u > ub_u)
5212 ub_u = val_u;
5213 if (val_v < lb_v)
5214 lb_v = val_v;
5215 else if (val_v > ub_v)
5216 ub_v = val_v;
5217 }
5218 }
5219
5220 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
5221 --n) {
5222 for (i = 0; i < n; ++i) {
5223 centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
5224 centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
5225 }
Yaowu Xuf883b422016-08-30 14:01:10 -07005226 av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
hui sud13c24a2017-04-07 16:13:07 -07005227#if CONFIG_PALETTE_DELTA_ENCODING
hui su33567b22017-04-30 16:40:19 -07005228 optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
hui sud13c24a2017-04-07 16:13:07 -07005229 // Sort the U channel colors in ascending order.
5230 for (i = 0; i < 2 * (n - 1); i += 2) {
5231 int min_idx = i;
5232 float min_val = centroids[i];
5233 for (j = i + 2; j < 2 * n; j += 2)
5234 if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
5235 if (min_idx != i) {
5236 float temp_u = centroids[i], temp_v = centroids[i + 1];
5237 centroids[i] = centroids[min_idx];
5238 centroids[i + 1] = centroids[min_idx + 1];
5239 centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
5240 }
5241 }
5242 av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
5243#endif // CONFIG_PALETTE_DELTA_ENCODING
Urvang Joshi56ba91b2017-01-10 13:22:09 -08005244 extend_palette_color_map(color_map, cols, rows, plane_block_width,
5245 plane_block_height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005246 pmi->palette_size[1] = n;
5247 for (i = 1; i < 3; ++i) {
5248 for (j = 0; j < n; ++j) {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005249#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005250 if (cpi->common.use_highbitdepth)
5251 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
5252 (int)centroids[j * 2 + i - 1], cpi->common.bit_depth);
5253 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005254#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005255 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
5256 clip_pixel((int)centroids[j * 2 + i - 1]);
5257 }
5258 }
5259
Angie Chiang284d7772016-11-08 11:06:45 -08005260 super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
5261 if (tokenonly_rd_stats.rate == INT_MAX) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005262 this_rate =
Angie Chiang284d7772016-11-08 11:06:45 -08005263 tokenonly_rd_stats.rate + dc_mode_cost +
Yue Chenb23d00a2017-07-28 17:01:21 -07005264 x->palette_uv_size_cost[bsize - BLOCK_8X8][n - PALETTE_MIN_SIZE] +
Yaowu Xuc27fc142016-08-22 16:08:15 -07005265 write_uniform_cost(n, color_map[0]) +
Yue Chendab2ca92017-10-16 17:48:48 -07005266 x->palette_uv_mode_cost[pmi->palette_size[0] > 0][1];
hui su33567b22017-04-30 16:40:19 -07005267 this_rate += av1_palette_color_cost_uv(pmi,
5268#if CONFIG_PALETTE_DELTA_ENCODING
5269 color_cache, n_cache,
5270#endif // CONFIG_PALETTE_DELTA_ENCODING
5271 cpi->common.bit_depth);
Sarah Parker99e7daa2017-08-29 10:30:13 -07005272 this_rate +=
5273 av1_cost_color_map(x, 1, 0, bsize, mbmi->tx_size, PALETTE_MAP);
Urvang Joshi70006e42017-06-14 16:08:55 -07005274 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005275 if (this_rd < *best_rd) {
5276 *best_rd = this_rd;
hui su83c26632017-01-24 17:19:06 -08005277 *best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005278 memcpy(best_palette_color_map, color_map,
Urvang Joshi56ba91b2017-01-10 13:22:09 -08005279 plane_block_width * plane_block_height *
5280 sizeof(best_palette_color_map[0]));
Yaowu Xuc27fc142016-08-22 16:08:15 -07005281 *rate = this_rate;
Angie Chiang284d7772016-11-08 11:06:45 -08005282 *distortion = tokenonly_rd_stats.dist;
5283 *rate_tokenonly = tokenonly_rd_stats.rate;
5284 *skippable = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005285 }
5286 }
5287 }
hui su83c26632017-01-24 17:19:06 -08005288 if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
hui sude0c70a2017-01-09 17:12:17 -08005289 memcpy(color_map, best_palette_color_map,
Luc Trudeau0401e892017-08-31 00:37:11 -04005290 plane_block_width * plane_block_height *
5291 sizeof(best_palette_color_map[0]));
hui sude0c70a2017-01-09 17:12:17 -08005292 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005293}
5294
hui su5db97432016-10-14 16:10:14 -07005295#if CONFIG_EXT_INTRA
hui su45dc5972016-12-08 17:42:50 -08005296// Run RD calculation with given chroma intra prediction angle., and return
5297// the RD cost. Update the best mode info. if the RD cost is the best so far.
5298static int64_t pick_intra_angle_routine_sbuv(
5299 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
5300 int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
5301 int *best_angle_delta, int64_t *best_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005302 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005303 assert(!is_inter_block(mbmi));
Angie Chiang284d7772016-11-08 11:06:45 -08005304 int this_rate;
5305 int64_t this_rd;
5306 RD_STATS tokenonly_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005307
hui su45dc5972016-12-08 17:42:50 -08005308 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
5309 return INT64_MAX;
Angie Chiang284d7772016-11-08 11:06:45 -08005310 this_rate = tokenonly_rd_stats.rate + rate_overhead;
Joe Young3ca43bf2017-10-06 15:12:46 -07005311#if CONFIG_EXT_INTRA_MOD
5312 this_rate += x->angle_delta_cost[mbmi->uv_mode - V_PRED]
5313 [mbmi->angle_delta[1] + MAX_ANGLE_DELTA];
5314#endif // CONFIG_EXT_INTRA_MOD
Urvang Joshi70006e42017-06-14 16:08:55 -07005315 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005316 if (this_rd < *best_rd) {
5317 *best_rd = this_rd;
5318 *best_angle_delta = mbmi->angle_delta[1];
5319 *rate = this_rate;
hui su45dc5972016-12-08 17:42:50 -08005320 rd_stats->rate = tokenonly_rd_stats.rate;
5321 rd_stats->dist = tokenonly_rd_stats.dist;
5322 rd_stats->skip = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005323 }
hui su45dc5972016-12-08 17:42:50 -08005324 return this_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005325}
5326
hui su45dc5972016-12-08 17:42:50 -08005327// With given chroma directional intra prediction mode, pick the best angle
5328// delta. Return true if a RD cost that is smaller than the input one is found.
Urvang Joshi52648442016-10-13 17:27:51 -07005329static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
Urvang Joshi52648442016-10-13 17:27:51 -07005330 BLOCK_SIZE bsize, int rate_overhead,
hui su45dc5972016-12-08 17:42:50 -08005331 int64_t best_rd, int *rate,
5332 RD_STATS *rd_stats) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005333 MACROBLOCKD *const xd = &x->e_mbd;
5334 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005335 assert(!is_inter_block(mbmi));
hui su45dc5972016-12-08 17:42:50 -08005336 int i, angle_delta, best_angle_delta = 0;
hui su0a6731f2017-04-26 15:23:47 -07005337 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005338
hui su45dc5972016-12-08 17:42:50 -08005339 rd_stats->rate = INT_MAX;
5340 rd_stats->skip = 0;
5341 rd_stats->dist = INT64_MAX;
hui su0a6731f2017-04-26 15:23:47 -07005342 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005343
hui su0a6731f2017-04-26 15:23:47 -07005344 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
hui su45dc5972016-12-08 17:42:50 -08005345 for (i = 0; i < 2; ++i) {
5346 best_rd_in = (best_rd == INT64_MAX)
5347 ? INT64_MAX
5348 : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
5349 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
5350 this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
5351 best_rd_in, rate, rd_stats,
5352 &best_angle_delta, &best_rd);
5353 rd_cost[2 * angle_delta + i] = this_rd;
5354 if (angle_delta == 0) {
5355 if (this_rd == INT64_MAX) return 0;
5356 rd_cost[1] = this_rd;
5357 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005358 }
5359 }
hui su45dc5972016-12-08 17:42:50 -08005360 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005361
hui su45dc5972016-12-08 17:42:50 -08005362 assert(best_rd != INT64_MAX);
hui su0a6731f2017-04-26 15:23:47 -07005363 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
hui su45dc5972016-12-08 17:42:50 -08005364 int64_t rd_thresh;
5365 for (i = 0; i < 2; ++i) {
5366 int skip_search = 0;
5367 rd_thresh = best_rd + (best_rd >> 5);
5368 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
5369 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
5370 skip_search = 1;
5371 if (!skip_search) {
5372 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
Yue Chenb0f808b2017-04-26 11:55:14 -07005373 pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
5374 rate, rd_stats, &best_angle_delta,
5375 &best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005376 }
5377 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005378 }
5379
5380 mbmi->angle_delta[1] = best_angle_delta;
hui su45dc5972016-12-08 17:42:50 -08005381 return rd_stats->rate != INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005382}
5383#endif // CONFIG_EXT_INTRA
5384
David Michael Barr2510f642017-07-11 23:39:20 +09005385#if CONFIG_CFL
David Michael Barr1f8d0952017-10-11 17:46:39 +09005386static void txfm_rd_in_plane_once(MACROBLOCK *const x,
5387 const AV1_COMP *const cpi, BLOCK_SIZE bsize,
David Michael Barr91345862017-10-14 22:20:36 +09005388 TX_SIZE tx_size, int plane, int64_t best_rd,
5389 int64_t *dist, int *rate) {
David Michael Barr1f8d0952017-10-11 17:46:39 +09005390 RD_STATS rd_stats;
5391 av1_init_rd_stats(&rd_stats);
David Michael Barr91345862017-10-14 22:20:36 +09005392 txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane, bsize, tx_size,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005393 cpi->sf.use_fast_coef_costing);
5394 *dist = rd_stats.dist;
5395 *rate = rd_stats.rate;
Luc Trudeau056d1f42017-09-15 17:38:14 -04005396}
David Michael Barr2510f642017-07-11 23:39:20 +09005397
David Michael Barr1f8d0952017-10-11 17:46:39 +09005398static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
David Michael Barr91345862017-10-14 22:20:36 +09005399 BLOCK_SIZE bsize, TX_SIZE tx_size,
5400 int64_t best_rd) {
David Michael Barr2510f642017-07-11 23:39:20 +09005401 MACROBLOCKD *const xd = &x->e_mbd;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005402 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5403 bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
5404 xd->plane[AOM_PLANE_U].subsampling_y);
David Michael Barr2510f642017-07-11 23:39:20 +09005405
David Michael Barr1f8d0952017-10-11 17:46:39 +09005406 int rates[CFL_PRED_PLANES][CFL_MAGS_SIZE];
5407 int64_t dists[CFL_PRED_PLANES][CFL_MAGS_SIZE];
5408 mbmi->cfl_alpha_idx = 0;
5409 mbmi->cfl_alpha_signs = CFL_SIGN_ZERO * CFL_SIGNS + CFL_SIGN_POS - 1;
David Michael Barr91345862017-10-14 22:20:36 +09005410 txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U, best_rd,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005411 &dists[CFL_PRED_U][0], &rates[CFL_PRED_U][0]);
5412 mbmi->cfl_alpha_signs = CFL_SIGN_POS * CFL_SIGNS + CFL_SIGN_ZERO - 1;
David Michael Barr91345862017-10-14 22:20:36 +09005413 txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V, best_rd,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005414 &dists[CFL_PRED_V][0], &rates[CFL_PRED_V][0]);
David Michael Barr2510f642017-07-11 23:39:20 +09005415
David Michael Barrf6eaa152017-07-19 19:42:28 +09005416 for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
David Michael Barr1f8d0952017-10-11 17:46:39 +09005417 mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c;
5418 for (int sign = CFL_SIGN_NEG; sign < CFL_SIGNS; sign++) {
5419 const int m = c * 2 + 1 + (sign == CFL_SIGN_NEG);
5420 mbmi->cfl_alpha_signs = sign * CFL_SIGNS + sign - 1;
David Michael Barr91345862017-10-14 22:20:36 +09005421 txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U, best_rd,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005422 &dists[CFL_PRED_U][m], &rates[CFL_PRED_U][m]);
David Michael Barr91345862017-10-14 22:20:36 +09005423 txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V, best_rd,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005424 &dists[CFL_PRED_V][m], &rates[CFL_PRED_V][m]);
5425 }
David Michael Barr2510f642017-07-11 23:39:20 +09005426 }
5427
Luc Trudeau4c5df102017-07-08 14:43:27 -04005428 int64_t dist;
David Michael Barr2510f642017-07-11 23:39:20 +09005429 int64_t cost;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005430 int64_t best_cost = INT64_MAX;
David Michael Barr91345862017-10-14 22:20:36 +09005431 int best_rate_overhead = INT_MAX;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005432#if CONFIG_DEBUG
David Michael Barr91345862017-10-14 22:20:36 +09005433 int best_rate = INT_MAX;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005434#endif // CONFIG_DEBUG
David Michael Barr2510f642017-07-11 23:39:20 +09005435
David Michael Barr2510f642017-07-11 23:39:20 +09005436 int ind = 0;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005437 int signs = 0;
David Michael Barr2510f642017-07-11 23:39:20 +09005438
David Michael Barrf6eaa152017-07-19 19:42:28 +09005439 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
5440 const int sign_u = CFL_SIGN_U(joint_sign);
5441 const int sign_v = CFL_SIGN_V(joint_sign);
5442 const int size_u = (sign_u == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
5443 const int size_v = (sign_v == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
5444 for (int u = 0; u < size_u; u++) {
David Michael Barr91345862017-10-14 22:20:36 +09005445 const int idx_u = ((sign_u == CFL_SIGN_ZERO) ? 0 : u * 2 + 1) +
5446 (sign_u == CFL_SIGN_NEG);
5447 if (rates[CFL_PRED_U][idx_u] == INT_MAX) continue;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005448 for (int v = 0; v < size_v; v++) {
David Michael Barr91345862017-10-14 22:20:36 +09005449 const int idx_v = ((sign_v == CFL_SIGN_ZERO) ? 0 : v * 2 + 1) +
5450 (sign_v == CFL_SIGN_NEG);
5451 if (rates[CFL_PRED_V][idx_v] == INT_MAX) continue;
5452 dist = dists[CFL_PRED_U][idx_u] + dists[CFL_PRED_V][idx_v];
David Michael Barr1f8d0952017-10-11 17:46:39 +09005453 int rate_overhead = x->cfl_cost[joint_sign][CFL_PRED_U][u] +
5454 x->cfl_cost[joint_sign][CFL_PRED_V][v];
5455 int rate = x->intra_uv_mode_cost[mbmi->mode][UV_CFL_PRED] +
David Michael Barr91345862017-10-14 22:20:36 +09005456 rate_overhead + rates[CFL_PRED_U][idx_u] +
5457 rates[CFL_PRED_V][idx_v];
David Michael Barrf6eaa152017-07-19 19:42:28 +09005458 cost = RDCOST(x->rdmult, rate, dist);
David Michael Barr2510f642017-07-11 23:39:20 +09005459 if (cost < best_cost) {
5460 best_cost = cost;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005461 best_rate_overhead = rate_overhead;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005462 ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
5463 signs = joint_sign;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005464#if CONFIG_DEBUG
5465 best_rate = rate;
5466#endif // CONFIG_DEBUG
David Michael Barr2510f642017-07-11 23:39:20 +09005467 }
5468 }
5469 }
5470 }
5471
5472 mbmi->cfl_alpha_idx = ind;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005473 mbmi->cfl_alpha_signs = signs;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005474#if CONFIG_DEBUG
Luc Trudeau1e84af52017-11-25 15:00:28 -05005475 xd->cfl.rate = best_rate;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005476#endif // CONFIG_DEBUG
5477 return best_rate_overhead;
David Michael Barr2510f642017-07-11 23:39:20 +09005478}
5479#endif // CONFIG_CFL
5480
hui sueaddeee2017-05-30 12:19:38 -07005481static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005482 mbmi->uv_mode = UV_DC_PRED;
hui sueaddeee2017-05-30 12:19:38 -07005483 mbmi->palette_mode_info.palette_size[1] = 0;
hui sueaddeee2017-05-30 12:19:38 -07005484#if CONFIG_FILTER_INTRA
5485 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
5486#endif // CONFIG_FILTER_INTRA
5487}
5488
Urvang Joshi52648442016-10-13 17:27:51 -07005489static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
5490 int *rate, int *rate_tokenonly,
5491 int64_t *distortion, int *skippable,
5492 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005493 MACROBLOCKD *xd = &x->e_mbd;
5494 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005495 assert(!is_inter_block(mbmi));
hui su83c26632017-01-24 17:19:06 -08005496 MB_MODE_INFO best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005497 int64_t best_rd = INT64_MAX, this_rd;
hui sude0c70a2017-01-09 17:12:17 -08005498 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
Hui Sue87fb232017-10-05 15:00:15 -07005499 const int try_palette =
5500 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
hui su5db97432016-10-14 16:10:14 -07005501
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005502 for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
hui su8a516a82017-07-06 10:00:36 -07005503 int this_rate;
5504 RD_STATS tokenonly_rd_stats;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005505 UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
hui su83c26632017-01-24 17:19:06 -08005506#if CONFIG_EXT_INTRA
5507 const int is_directional_mode =
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005508 av1_is_directional_mode(get_uv_mode(mode), mbmi->sb_type);
hui su83c26632017-01-24 17:19:06 -08005509#endif // CONFIG_EXT_INTRA
Urvang Joshifeb925f2016-12-05 10:37:29 -08005510 if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
5511 (1 << mode)))
5512 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005513
5514 mbmi->uv_mode = mode;
David Michael Barr2510f642017-07-11 23:39:20 +09005515#if CONFIG_CFL
5516 int cfl_alpha_rate = 0;
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005517 if (mode == UV_CFL_PRED) {
Luc Trudeau4d6ea542017-11-22 21:24:42 -05005518 if (!is_cfl_allowed(mbmi)) continue;
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005519 assert(!is_directional_mode);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005520 const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
David Michael Barr91345862017-10-14 22:20:36 +09005521 cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, bsize, uv_tx_size, best_rd);
5522 if (cfl_alpha_rate == INT_MAX) continue;
David Michael Barr2510f642017-07-11 23:39:20 +09005523 }
5524#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07005525#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07005526 mbmi->angle_delta[1] = 0;
Joe Young830d4ce2017-05-30 17:48:13 -07005527 if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005528 const int rate_overhead = x->intra_uv_mode_cost[mbmi->mode][mode] +
Joe Young3ca43bf2017-10-06 15:12:46 -07005529#if CONFIG_EXT_INTRA_MOD
5530 0;
5531#else
hui su0a6731f2017-04-26 15:23:47 -07005532 write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
Joe Young3ca43bf2017-10-06 15:12:46 -07005533#endif // CONFIG_EXT_INTRA_MOD
hui su45dc5972016-12-08 17:42:50 -08005534 if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
5535 &this_rate, &tokenonly_rd_stats))
Yaowu Xuc27fc142016-08-22 16:08:15 -07005536 continue;
5537 } else {
hui su83c26632017-01-24 17:19:06 -08005538#endif // CONFIG_EXT_INTRA
Angie Chiang284d7772016-11-08 11:06:45 -08005539 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005540 continue;
Yushin Cho77bba8d2016-11-04 16:36:56 -07005541 }
hui su83c26632017-01-24 17:19:06 -08005542#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07005543 }
hui su83c26632017-01-24 17:19:06 -08005544#endif // CONFIG_EXT_INTRA
Angie Chiang284d7772016-11-08 11:06:45 -08005545 this_rate =
Yue Chenb23d00a2017-07-28 17:01:21 -07005546 tokenonly_rd_stats.rate + x->intra_uv_mode_cost[mbmi->mode][mode];
hui su83c26632017-01-24 17:19:06 -08005547
Luc Trudeaudff41922017-07-07 09:47:58 -04005548#if CONFIG_CFL
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005549 if (mode == UV_CFL_PRED) {
Luc Trudeau4d6ea542017-11-22 21:24:42 -05005550 assert(is_cfl_allowed(mbmi));
David Michael Barr2510f642017-07-11 23:39:20 +09005551 this_rate += cfl_alpha_rate;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005552#if CONFIG_DEBUG
Luc Trudeau1e84af52017-11-25 15:00:28 -05005553 assert(xd->cfl.rate == this_rate);
David Michael Barr1f8d0952017-10-11 17:46:39 +09005554#endif // CONFIG_DEBUG
Luc Trudeaudff41922017-07-07 09:47:58 -04005555 }
5556#endif
hui su83c26632017-01-24 17:19:06 -08005557#if CONFIG_EXT_INTRA
Joe Young830d4ce2017-05-30 17:48:13 -07005558 if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07005559#if CONFIG_EXT_INTRA_MOD
5560 this_rate += x->angle_delta_cost[mode - V_PRED]
5561 [mbmi->angle_delta[1] + MAX_ANGLE_DELTA];
5562#else
hui su0a6731f2017-04-26 15:23:47 -07005563 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
5564 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
Joe Young3ca43bf2017-10-06 15:12:46 -07005565#endif // CONFIG_EXT_INTRA_MOD
hui su45dc5972016-12-08 17:42:50 -08005566 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005567#endif // CONFIG_EXT_INTRA
Yue Chen57b8ff62017-10-10 23:37:31 -07005568
Rupert Swarbrick6f9cd942017-08-02 15:57:18 +01005569 if (try_palette && mode == UV_DC_PRED)
Yue Chendab2ca92017-10-16 17:48:48 -07005570 this_rate += x->palette_uv_mode_cost[pmi->palette_size[0] > 0][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005571
Urvang Joshi70006e42017-06-14 16:08:55 -07005572 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005573
5574 if (this_rd < best_rd) {
hui su83c26632017-01-24 17:19:06 -08005575 best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005576 best_rd = this_rd;
5577 *rate = this_rate;
Angie Chiang284d7772016-11-08 11:06:45 -08005578 *rate_tokenonly = tokenonly_rd_stats.rate;
5579 *distortion = tokenonly_rd_stats.dist;
5580 *skippable = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005581 }
5582 }
5583
Rupert Swarbrick6f9cd942017-08-02 15:57:18 +01005584 if (try_palette) {
hui su8a516a82017-07-06 10:00:36 -07005585 uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
hui su83c26632017-01-24 17:19:06 -08005586 rd_pick_palette_intra_sbuv(cpi, x,
Yue Chenb23d00a2017-07-28 17:01:21 -07005587 x->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED],
hui su83c26632017-01-24 17:19:06 -08005588 best_palette_color_map, &best_mbmi, &best_rd,
5589 rate, rate_tokenonly, distortion, skippable);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005590 }
5591
hui su83c26632017-01-24 17:19:06 -08005592 *mbmi = best_mbmi;
Urvang Joshifeb925f2016-12-05 10:37:29 -08005593 // Make sure we actually chose a mode
5594 assert(best_rd < INT64_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005595 return best_rd;
5596}
5597
Urvang Joshi52648442016-10-13 17:27:51 -07005598static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
Luc Trudeau9d4cbb82017-07-27 17:01:32 -04005599 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
5600 int *rate_uv, int *rate_uv_tokenonly,
5601 int64_t *dist_uv, int *skip_uv,
5602 UV_PREDICTION_MODE *mode_uv) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005603 MACROBLOCKD *xd = &x->e_mbd;
5604 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005605 // Use an estimated rd for uv_intra based on DC_PRED if the
5606 // appropriate speed flag is set.
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005607 init_sbuv_mode(mbmi);
Jingning Han9ce464c2017-02-20 15:36:30 -08005608 if (x->skip_chroma_rd) {
5609 *rate_uv = 0;
5610 *rate_uv_tokenonly = 0;
5611 *dist_uv = 0;
5612 *skip_uv = 1;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005613 *mode_uv = UV_DC_PRED;
Jingning Han9ce464c2017-02-20 15:36:30 -08005614 return;
5615 }
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005616 bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
5617 xd->plane[AOM_PLANE_U].subsampling_y);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005618#if CONFIG_CFL
5619 // Only store reconstructed luma when there's chroma RDO. When there's no
5620 // chroma RDO, the reconstructed luma will be stored in encode_superblock().
Luc Trudeau1e84af52017-11-25 15:00:28 -05005621 xd->cfl.store_y = !x->skip_chroma_rd;
5622 if (xd->cfl.store_y) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005623 // Perform one extra call to txfm_rd_in_plane(), with the values chosen
5624 // during luma RDO, so we can store reconstructed luma values
5625 RD_STATS this_rd_stats;
5626 txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
5627 mbmi->sb_type, mbmi->tx_size,
5628 cpi->sf.use_fast_coef_costing);
Luc Trudeau1e84af52017-11-25 15:00:28 -05005629 xd->cfl.store_y = 0;
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005630 }
5631#endif // CONFIG_CFL
Luc Trudeau9d4cbb82017-07-27 17:01:32 -04005632 rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
5633 bsize, max_tx_size);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005634 *mode_uv = mbmi->uv_mode;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005635}
5636
Yue Chenb23d00a2017-07-28 17:01:21 -07005637static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode,
Yaowu Xuc27fc142016-08-22 16:08:15 -07005638 int16_t mode_context) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005639 if (is_inter_compound_mode(mode)) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005640 return x
clang-format55ce9e02017-02-15 22:27:12 -08005641 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005642 }
David Barkercb03dc32017-04-07 13:05:09 +01005643
David Barkercb03dc32017-04-07 13:05:09 +01005644 int mode_cost = 0;
5645 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
5646 int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
5647
5648 assert(is_inter_mode(mode));
5649
5650 if (mode == NEWMV) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005651 mode_cost = x->newmv_mode_cost[mode_ctx][0];
David Barkercb03dc32017-04-07 13:05:09 +01005652 return mode_cost;
5653 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07005654 mode_cost = x->newmv_mode_cost[mode_ctx][1];
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005655 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
David Barkercb03dc32017-04-07 13:05:09 +01005656
5657 if (is_all_zero_mv) return mode_cost;
5658
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005659 if (mode == GLOBALMV) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005660 mode_cost += x->zeromv_mode_cost[mode_ctx][0];
David Barkercb03dc32017-04-07 13:05:09 +01005661 return mode_cost;
5662 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07005663 mode_cost += x->zeromv_mode_cost[mode_ctx][1];
David Barkercb03dc32017-04-07 13:05:09 +01005664 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
5665
5666 if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
5667 if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
5668 if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
5669
Yue Chenb23d00a2017-07-28 17:01:21 -07005670 mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
David Barkercb03dc32017-04-07 13:05:09 +01005671 return mode_cost;
5672 }
5673 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005674}
5675
Sarah Parker6fdc8532016-11-16 17:47:13 -08005676static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
5677 COMPOUND_TYPE comp_type) {
Debargha Mukherjeec5f735f2017-04-26 03:25:28 +00005678 (void)bsize;
Sarah Parker6fdc8532016-11-16 17:47:13 -08005679 switch (comp_type) {
5680 case COMPOUND_AVERAGE: return 0;
5681 case COMPOUND_WEDGE: return get_interinter_wedge_bits(bsize);
Sarah Parker569edda2016-12-14 14:57:38 -08005682 case COMPOUND_SEG: return 1;
Sarah Parker6fdc8532016-11-16 17:47:13 -08005683 default: assert(0); return 0;
5684 }
5685}
Sarah Parker6fdc8532016-11-16 17:47:13 -08005686
Yaowu Xuc27fc142016-08-22 16:08:15 -07005687typedef struct {
5688 int eobs;
5689 int brate;
5690 int byrate;
5691 int64_t bdist;
5692 int64_t bsse;
5693 int64_t brdcost;
5694 int_mv mvs[2];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005695 int_mv pred_mv[2];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005696 int_mv ref_mv[2];
Jingning Han276c2942016-12-05 12:37:02 -08005697
Yaowu Xuc27fc142016-08-22 16:08:15 -07005698 ENTROPY_CONTEXT ta[2];
5699 ENTROPY_CONTEXT tl[2];
5700} SEG_RDSTAT;
5701
5702typedef struct {
5703 int_mv *ref_mv[2];
5704 int_mv mvp;
5705
5706 int64_t segment_rd;
5707 int r;
5708 int64_t d;
5709 int64_t sse;
5710 int segment_yrate;
5711 PREDICTION_MODE modes[4];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005712 SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005713 int mvthresh;
5714} BEST_SEG_INFO;
5715
Alex Converse0fa0f422017-04-24 12:51:14 -07005716static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
5717 return (mv->row >> 3) < mv_limits->row_min ||
5718 (mv->row >> 3) > mv_limits->row_max ||
5719 (mv->col >> 3) < mv_limits->col_min ||
5720 (mv->col >> 3) > mv_limits->col_max;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005721}
5722
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005723// Check if NEARESTMV/NEARMV/GLOBALMV is the cheapest way encode zero motion.
Yaowu Xuc27fc142016-08-22 16:08:15 -07005724// TODO(aconverse): Find out if this is still productive then clean up or remove
5725static int check_best_zero_mv(
Yue Chenb23d00a2017-07-28 17:01:21 -07005726 const AV1_COMP *const cpi, const MACROBLOCK *const x,
5727 const int16_t mode_context[TOTAL_REFS_PER_FRAME],
Yaowu Xuc27fc142016-08-22 16:08:15 -07005728 const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
Yaowu Xuc27fc142016-08-22 16:08:15 -07005729 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
David Barker45390c12017-02-20 14:44:40 +00005730 const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
5731 int mi_row, int mi_col) {
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005732 int_mv zeromv[2] = { {.as_int = 0 } };
Sarah Parkerc2d38712017-01-24 15:15:41 -08005733 int comp_pred_mode = ref_frames[1] > INTRA_FRAME;
David Barker45390c12017-02-20 14:44:40 +00005734 (void)mi_row;
5735 (void)mi_col;
Zoe Liubc030ee2017-07-31 15:20:46 -07005736 (void)cpi;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005737 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005738 for (int cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
Sarah Parkerc2d38712017-01-24 15:15:41 -08005739 zeromv[cur_frm].as_int =
5740 gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
Sarah Parkerae7c4582017-02-28 16:30:30 -08005741 cpi->common.allow_high_precision_mv, bsize,
RogerZhou3b635242017-09-19 10:06:46 -07005742 mi_col, mi_row, block
5743#if CONFIG_AMVR
5744 ,
RogerZhou10a03802017-10-26 11:49:48 -07005745 cpi->common.cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07005746#endif
5747 )
Sarah Parkerc2d38712017-01-24 15:15:41 -08005748 .as_int;
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005749 }
Sarah Parkerc2d38712017-01-24 15:15:41 -08005750 }
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005751
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005752 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
5753 this_mode == GLOBALMV) &&
Sarah Parkerc2d38712017-01-24 15:15:41 -08005754 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07005755 (ref_frames[1] <= INTRA_FRAME ||
Sarah Parkerc2d38712017-01-24 15:15:41 -08005756 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005757 int16_t rfc =
Yaowu Xuf883b422016-08-30 14:01:10 -07005758 av1_mode_context_analyzer(mode_context, ref_frames, bsize, block);
Yue Chenb23d00a2017-07-28 17:01:21 -07005759 int c1 = cost_mv_ref(x, NEARMV, rfc);
5760 int c2 = cost_mv_ref(x, NEARESTMV, rfc);
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005761 int c3 = cost_mv_ref(x, GLOBALMV, rfc);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005762
Yaowu Xuc27fc142016-08-22 16:08:15 -07005763 if (this_mode == NEARMV) {
5764 if (c1 > c3) return 0;
5765 } else if (this_mode == NEARESTMV) {
5766 if (c2 > c3) return 0;
5767 } else {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005768 assert(this_mode == GLOBALMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005769 if (ref_frames[1] <= INTRA_FRAME) {
5770 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
5771 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
5772 return 0;
5773 } else {
5774 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
5775 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
5776 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
5777 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
5778 return 0;
5779 }
5780 }
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005781 } else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005782 this_mode == GLOBAL_GLOBALMV) &&
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005783 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
5784 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005785 int16_t rfc = compound_mode_context[ref_frames[0]];
Yue Chenb23d00a2017-07-28 17:01:21 -07005786 int c2 = cost_mv_ref(x, NEAREST_NEARESTMV, rfc);
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005787 int c3 = cost_mv_ref(x, GLOBAL_GLOBALMV, rfc);
Yue Chenb23d00a2017-07-28 17:01:21 -07005788 int c5 = cost_mv_ref(x, NEAR_NEARMV, rfc);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005789
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -07005790 if (this_mode == NEAREST_NEARESTMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005791 if (c2 > c3) return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005792 } else if (this_mode == NEAR_NEARMV) {
5793 if (c5 > c3) return 0;
5794 } else {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005795 assert(this_mode == GLOBAL_GLOBALMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005796 if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
5797 frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
Yaowu Xuc27fc142016-08-22 16:08:15 -07005798 (c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 &&
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -07005799 frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0))
Yaowu Xuc27fc142016-08-22 16:08:15 -07005800 return 0;
5801 }
5802 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005803 return 1;
5804}
5805
Urvang Joshi52648442016-10-13 17:27:51 -07005806static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005807 BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
5808 int mi_col, int_mv *ref_mv_sub8x8[2],
5809 const uint8_t *mask, int mask_stride,
5810 int *rate_mv, const int block) {
Yaowu Xuf883b422016-08-30 14:01:10 -07005811 const AV1_COMMON *const cm = &cpi->common;
Jingning Hanae5cfde2016-11-30 12:01:44 -08005812 const int pw = block_size_wide[bsize];
5813 const int ph = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005814 MACROBLOCKD *xd = &x->e_mbd;
5815 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005816 // This function should only ever be called for compound modes
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005817 assert(has_second_ref(mbmi));
Zoe Liu122f3942017-04-25 11:18:38 -07005818 const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
Yaowu Xuc27fc142016-08-22 16:08:15 -07005819 int_mv ref_mv[2];
5820 int ite, ref;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005821 struct scale_factors sf;
James Zern89a015b2017-08-08 12:39:00 -04005822 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005823 const int ic = block & 1;
5824 const int ir = (block - ic) >> 1;
Jingning Hancb637672017-06-22 09:14:40 -07005825 struct macroblockd_plane *const pd = &xd->plane[0];
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005826 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
5827 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
5828 int is_global[2];
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005829 for (ref = 0; ref < 2; ++ref) {
Luc Trudeauf3bf8b12017-12-08 14:38:41 -05005830 const WarpedMotionParams *const wm =
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005831 &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
5832 is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
5833 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005834
5835 // Do joint motion search in compound mode to get more accurate mv.
5836 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
5837 int last_besterr[2] = { INT_MAX, INT_MAX };
5838 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
Zoe Liu122f3942017-04-25 11:18:38 -07005839 av1_get_scaled_ref_frame(cpi, refs[0]),
5840 av1_get_scaled_ref_frame(cpi, refs[1])
Yaowu Xuc27fc142016-08-22 16:08:15 -07005841 };
5842
5843// Prediction buffer from second frame.
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005844#if CONFIG_HIGHBITDEPTH
Cheng Chenefc55fd2017-10-10 12:08:28 -07005845 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
5846 uint8_t *second_pred;
Cheng Chenefc55fd2017-10-10 12:08:28 -07005847#else // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005848 DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005849#endif // CONFIG_HIGHBITDEPTH
Jingning Han61418bb2017-01-23 17:12:48 -08005850 (void)ref_mv_sub8x8;
Jingning Han61418bb2017-01-23 17:12:48 -08005851
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005852 for (ref = 0; ref < 2; ++ref) {
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07005853 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005854
5855 if (scaled_ref_frame[ref]) {
5856 int i;
5857 // Swap out the reference frame for a version that's been scaled to
5858 // match the resolution of the current frame, allowing the existing
5859 // motion search code to be used without additional modifications.
5860 for (i = 0; i < MAX_MB_PLANE; i++)
5861 backup_yv12[ref][i] = xd->plane[i].pre[ref];
Yaowu Xuf883b422016-08-30 14:01:10 -07005862 av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
5863 NULL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005864 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005865 }
5866
5867// Since we have scaled the reference frames to match the size of the current
5868// frame we must use a unit scaling factor during mode selection.
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005869#if CONFIG_HIGHBITDEPTH
Yaowu Xuf883b422016-08-30 14:01:10 -07005870 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5871 cm->height, cm->use_highbitdepth);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005872#else
Yaowu Xuf883b422016-08-30 14:01:10 -07005873 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5874 cm->height);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005875#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005876
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005877 // Allow joint search multiple times iteratively for each reference frame
5878 // and break out of the search loop if it couldn't find a better mv.
5879 for (ite = 0; ite < 4; ite++) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005880 struct buf_2d ref_yv12[2];
5881 int bestsme = INT_MAX;
5882 int sadpb = x->sadperbit16;
5883 MV *const best_mv = &x->best_mv.as_mv;
5884 int search_range = 3;
5885
Alex Converse0fa0f422017-04-24 12:51:14 -07005886 MvLimits tmp_mv_limits = x->mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005887 int id = ite % 2; // Even iterations search in the first reference frame,
5888 // odd iterations search in the second. The predictor
5889 // found for the 'other' reference frame is factored in.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005890 const int plane = 0;
David Barkere64d51a2017-06-09 14:52:42 +01005891 ConvolveParams conv_params = get_conv_params(!id, 0, plane);
Cheng Chenefc55fd2017-10-10 12:08:28 -07005892#if CONFIG_JNT_COMP
Cheng Chen8b1732a2017-11-22 18:38:49 -08005893 conv_params.use_jnt_comp_avg = 0;
Cheng Chenefc55fd2017-10-10 12:08:28 -07005894#endif
Sarah Parker4c10a3c2017-04-10 19:37:59 -07005895 WarpTypesAllowed warp_types;
Sarah Parker4c10a3c2017-04-10 19:37:59 -07005896 warp_types.global_warp_allowed = is_global[!id];
Sarah Parker4c10a3c2017-04-10 19:37:59 -07005897 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005898
5899 // Initialized here because of compiler problem in Visual Studio.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005900 ref_yv12[0] = xd->plane[plane].pre[0];
5901 ref_yv12[1] = xd->plane[plane].pre[1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005902
Yaowu Xuc27fc142016-08-22 16:08:15 -07005903// Get the prediction block from the 'other' reference frame.
Cheng Chenefc55fd2017-10-10 12:08:28 -07005904#if CONFIG_JNT_COMP
5905 InterpFilters interp_filters = EIGHTTAP_REGULAR;
5906#endif // CONFIG_JNT_COMP
Zoe Liu85b66462017-04-20 14:28:19 -07005907
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005908#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005909 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5910 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
Yaowu Xuf883b422016-08-30 14:01:10 -07005911 av1_highbd_build_inter_predictor(
Yaowu Xuc27fc142016-08-22 16:08:15 -07005912 ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
Zoe Liu85b66462017-04-20 14:28:19 -07005913 &frame_mv[refs[!id]].as_mv,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005914#if CONFIG_JNT_COMP
5915 &sf, pw, ph, 0, interp_filters,
5916#else
Rupert Swarbrick27e90292017-09-28 17:46:50 +01005917 &sf, pw, ph, 0, mbmi->interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005918#endif // CONFIG_JNT_COMP
Sebastien Alaiwan48795802017-10-30 12:07:13 +01005919 &warp_types, p_col, p_row, plane, MV_PRECISION_Q3, mi_col * MI_SIZE,
5920 mi_row * MI_SIZE, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005921 } else {
5922 second_pred = (uint8_t *)second_pred_alloc_16;
Zoe Liu76fcff72017-04-24 17:50:53 -07005923#endif // CONFIG_HIGHBITDEPTH
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005924 av1_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
5925 second_pred, pw, &frame_mv[refs[!id]].as_mv,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005926#if CONFIG_JNT_COMP
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005927 &sf, pw, ph, &conv_params, interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005928#else
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005929 &sf, pw, ph, &conv_params, mbmi->interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005930#endif // CONFIG_JNT_COMP
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005931 &warp_types, p_col, p_row, plane, !id,
5932 MV_PRECISION_Q3, mi_col * MI_SIZE,
5933 mi_row * MI_SIZE, xd);
Zoe Liu76fcff72017-04-24 17:50:53 -07005934#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005935 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005936#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005937
Cheng Chenefc55fd2017-10-10 12:08:28 -07005938#if CONFIG_JNT_COMP
5939 const int order_idx = id != 0;
Cheng Chenf78632e2017-10-20 15:30:51 -07005940 av1_jnt_comp_weight_assign(cm, mbmi, order_idx, &xd->jcp_param.fwd_offset,
Cheng Chen8263f802017-11-14 15:50:00 -08005941 &xd->jcp_param.bck_offset,
5942 &xd->jcp_param.use_jnt_comp_avg, 1);
Cheng Chenefc55fd2017-10-10 12:08:28 -07005943#endif // CONFIG_JNT_COMP
5944
Yaowu Xuc27fc142016-08-22 16:08:15 -07005945 // Do compound motion search on the current reference frame.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005946 if (id) xd->plane[plane].pre[0] = ref_yv12[id];
Alex Converse0fa0f422017-04-24 12:51:14 -07005947 av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005948
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005949 // Use the mv result from the single mode as mv predictor.
5950 // Use the mv result from the single mode as mv predictor.
5951 *best_mv = frame_mv[refs[id]].as_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005952
5953 best_mv->col >>= 3;
5954 best_mv->row >>= 3;
5955
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005956 av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005957
5958 // Small-range full-pixel motion search.
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005959 bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
5960 &cpi->fn_ptr[bsize], mask, mask_stride,
5961 id, &ref_mv[id].as_mv, second_pred);
David Barkerc155e012017-05-11 13:54:54 +01005962 if (bestsme < INT_MAX) {
David Barkerc155e012017-05-11 13:54:54 +01005963 if (mask)
5964 bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
5965 second_pred, mask, mask_stride, id,
5966 &cpi->fn_ptr[bsize], 1);
5967 else
David Barkerc155e012017-05-11 13:54:54 +01005968 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
5969 second_pred, &cpi->fn_ptr[bsize], 1);
5970 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005971
Alex Converse0fa0f422017-04-24 12:51:14 -07005972 x->mv_limits = tmp_mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005973
RogerZhou3b635242017-09-19 10:06:46 -07005974#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -07005975 if (cpi->common.cur_frame_force_integer_mv) {
RogerZhou3b635242017-09-19 10:06:46 -07005976 x->best_mv.as_mv.row *= 8;
5977 x->best_mv.as_mv.col *= 8;
5978 }
RogerZhou10a03802017-10-26 11:49:48 -07005979 if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0)
RogerZhou3b635242017-09-19 10:06:46 -07005980#else
Cheng Chen1483a712017-10-08 13:07:02 -07005981 if (bestsme < INT_MAX)
RogerZhou3b635242017-09-19 10:06:46 -07005982#endif
Cheng Chen1483a712017-10-08 13:07:02 -07005983 {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005984 int dis; /* TODO: use dis in distortion calculation later. */
5985 unsigned int sse;
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07005986 bestsme = cpi->find_fractional_mv_step(
5987 x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
5988 x->errorperbit, &cpi->fn_ptr[bsize], 0,
5989 cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005990 &dis, &sse, second_pred, mask, mask_stride, id, pw, ph,
5991 cpi->sf.use_upsampled_references);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005992 }
5993
5994 // Restore the pointer to the first (possibly scaled) prediction buffer.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005995 if (id) xd->plane[plane].pre[0] = ref_yv12[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005996
5997 if (bestsme < last_besterr[id]) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005998 frame_mv[refs[id]].as_mv = *best_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005999 last_besterr[id] = bestsme;
6000 } else {
6001 break;
6002 }
6003 }
6004
6005 *rate_mv = 0;
6006
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006007 for (ref = 0; ref < 2; ++ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006008 if (scaled_ref_frame[ref]) {
6009 // Restore the prediction frame pointers to their unscaled versions.
6010 int i;
6011 for (i = 0; i < MAX_MB_PLANE; i++)
6012 xd->plane[i].pre[ref] = backup_yv12[ref][i];
6013 }
Zoe Liu85b66462017-04-20 14:28:19 -07006014
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006015 av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
Zoe Liu85b66462017-04-20 14:28:19 -07006016
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006017 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
6018 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
6019 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006020 }
6021}
6022
Zoe Liuc082bbc2017-05-17 13:31:37 -07006023static void estimate_ref_frame_costs(
Yue Chen170678a2017-10-17 13:43:10 -07006024 const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x,
6025 int segment_id, unsigned int *ref_costs_single,
Zoe Liuc082bbc2017-05-17 13:31:37 -07006026#if CONFIG_EXT_COMP_REFS
6027 unsigned int (*ref_costs_comp)[TOTAL_REFS_PER_FRAME],
6028#else
6029 unsigned int *ref_costs_comp,
6030#endif // CONFIG_EXT_COMP_REFS
6031 aom_prob *comp_mode_p) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006032 int seg_ref_active =
6033 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6034 if (seg_ref_active) {
6035 memset(ref_costs_single, 0,
6036 TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
Zoe Liuc082bbc2017-05-17 13:31:37 -07006037#if CONFIG_EXT_COMP_REFS
6038 int ref_frame;
6039 for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
6040 memset(ref_costs_comp[ref_frame], 0,
6041 TOTAL_REFS_PER_FRAME * sizeof((*ref_costs_comp)[0]));
6042#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07006043 memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
Zoe Liuc082bbc2017-05-17 13:31:37 -07006044#endif // CONFIG_EXT_COMP_REFS
6045
Yaowu Xuc27fc142016-08-22 16:08:15 -07006046 *comp_mode_p = 128;
6047 } else {
Yue Chen170678a2017-10-17 13:43:10 -07006048 int intra_inter_ctx = av1_get_intra_inter_context(xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006049
6050 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
Sebastien Alaiwanfeca25a2017-11-29 11:22:23 +01006051 *comp_mode_p = av1_get_reference_mode_prob(cm, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006052 } else {
6053 *comp_mode_p = 128;
6054 }
6055
Yue Chen170678a2017-10-17 13:43:10 -07006056 ref_costs_single[INTRA_FRAME] = x->intra_inter_cost[intra_inter_ctx][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006057
Zoe Liud4d8b862017-12-06 10:56:01 -08006058 unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1];
6059
6060#if !CONFIG_REF_ADAPT
Yaowu Xuc27fc142016-08-22 16:08:15 -07006061 if (cm->reference_mode != COMPOUND_REFERENCE) {
Zoe Liud4d8b862017-12-06 10:56:01 -08006062#endif // !CONFIG_REF_ADAPT
Yaowu Xuf883b422016-08-30 14:01:10 -07006063 aom_prob ref_single_p1 = av1_get_pred_prob_single_ref_p1(cm, xd);
6064 aom_prob ref_single_p2 = av1_get_pred_prob_single_ref_p2(cm, xd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006065 aom_prob ref_single_p3 = av1_get_pred_prob_single_ref_p3(cm, xd);
6066 aom_prob ref_single_p4 = av1_get_pred_prob_single_ref_p4(cm, xd);
6067 aom_prob ref_single_p5 = av1_get_pred_prob_single_ref_p5(cm, xd);
Zoe Liue9b15e22017-07-19 15:53:01 -07006068 aom_prob ref_single_p6 = av1_get_pred_prob_single_ref_p6(cm, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006069
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006070 ref_costs_single[LAST_FRAME] = ref_costs_single[LAST2_FRAME] =
6071 ref_costs_single[LAST3_FRAME] = ref_costs_single[BWDREF_FRAME] =
6072 ref_costs_single[ALTREF2_FRAME] = ref_costs_single[GOLDEN_FRAME] =
6073 ref_costs_single[ALTREF_FRAME] = base_cost;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006074
Zoe Liufcf5fa22017-06-26 16:00:38 -07006075 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
6076 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p1, 0);
6077 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p1, 0);
6078 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 0);
6079 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
Zoe Liue9b15e22017-07-19 15:53:01 -07006080 ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p1, 1);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006081 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006082
Zoe Liufcf5fa22017-06-26 16:00:38 -07006083 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p3, 0);
6084 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p3, 0);
6085 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p3, 1);
6086 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p3, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006087
Zoe Liufcf5fa22017-06-26 16:00:38 -07006088 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p2, 0);
Zoe Liue9b15e22017-07-19 15:53:01 -07006089 ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p2, 0);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006090 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006091
Zoe Liufcf5fa22017-06-26 16:00:38 -07006092 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p4, 0);
6093 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p4, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006094
Zoe Liufcf5fa22017-06-26 16:00:38 -07006095 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
6096 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
Zoe Liue9b15e22017-07-19 15:53:01 -07006097
Zoe Liue9b15e22017-07-19 15:53:01 -07006098 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p6, 0);
6099 ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p6, 1);
Zoe Liud4d8b862017-12-06 10:56:01 -08006100#if !CONFIG_REF_ADAPT
Yaowu Xuc27fc142016-08-22 16:08:15 -07006101 } else {
6102 ref_costs_single[LAST_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006103 ref_costs_single[LAST2_FRAME] = 512;
6104 ref_costs_single[LAST3_FRAME] = 512;
6105 ref_costs_single[BWDREF_FRAME] = 512;
Zoe Liue9b15e22017-07-19 15:53:01 -07006106 ref_costs_single[ALTREF2_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006107 ref_costs_single[GOLDEN_FRAME] = 512;
6108 ref_costs_single[ALTREF_FRAME] = 512;
6109 }
Zoe Liud4d8b862017-12-06 10:56:01 -08006110#endif // !CONFIG_REF_ADAPT
Yaowu Xuc27fc142016-08-22 16:08:15 -07006111
6112 if (cm->reference_mode != SINGLE_REFERENCE) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006113 aom_prob ref_comp_p = av1_get_pred_prob_comp_ref_p(cm, xd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006114 aom_prob ref_comp_p1 = av1_get_pred_prob_comp_ref_p1(cm, xd);
6115 aom_prob ref_comp_p2 = av1_get_pred_prob_comp_ref_p2(cm, xd);
6116 aom_prob bwdref_comp_p = av1_get_pred_prob_comp_bwdref_p(cm, xd);
Zoe Liue9b15e22017-07-19 15:53:01 -07006117 aom_prob bwdref_comp_p1 = av1_get_pred_prob_comp_bwdref_p1(cm, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006118
Zoe Liuc082bbc2017-05-17 13:31:37 -07006119#if CONFIG_EXT_COMP_REFS
6120 aom_prob comp_ref_type_p = av1_get_comp_reference_type_prob(cm, xd);
6121 unsigned int ref_bicomp_costs[TOTAL_REFS_PER_FRAME] = { 0 };
6122
6123 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
6124 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
Zoe Liuc082bbc2017-05-17 13:31:37 -07006125 base_cost + av1_cost_bit(comp_ref_type_p, 1);
Zoe Liu3ac20932017-08-30 16:35:55 -07006126 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
Zoe Liuac889702017-08-23 14:22:58 -07006127 ref_bicomp_costs[ALTREF_FRAME] = 0;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006128
6129 ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
6130 ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
6131 ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
6132 ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
6133
Zoe Liu87818282017-11-26 17:09:59 -08006134 ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 0);
6135 ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 1);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006136
6137 ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
6138 ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
6139
6140 ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
Zoe Liuac889702017-08-23 14:22:58 -07006141 ref_bicomp_costs[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006142 ref_bicomp_costs[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
6143
Zoe Liuac889702017-08-23 14:22:58 -07006144 ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p1, 0);
6145 ref_bicomp_costs[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p1, 1);
Zoe Liuac889702017-08-23 14:22:58 -07006146
6147 int ref0, ref1;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006148 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
Zoe Liuac889702017-08-23 14:22:58 -07006149 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
6150 ref_costs_comp[ref0][ref1] =
6151 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
6152 }
Zoe Liuc082bbc2017-05-17 13:31:37 -07006153 }
6154
6155 aom_prob uni_comp_ref_p = av1_get_pred_prob_uni_comp_ref_p(cm, xd);
6156 aom_prob uni_comp_ref_p1 = av1_get_pred_prob_uni_comp_ref_p1(cm, xd);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006157 aom_prob uni_comp_ref_p2 = av1_get_pred_prob_uni_comp_ref_p2(cm, xd);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006158
6159 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
6160 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
6161 av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 0);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006162 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
6163 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
6164 av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
6165 av1_cost_bit(uni_comp_ref_p2, 0);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006166 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
6167 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
Zoe Liufcf5fa22017-06-26 16:00:38 -07006168 av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
6169 av1_cost_bit(uni_comp_ref_p2, 1);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006170
6171 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
6172 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
6173 av1_cost_bit(uni_comp_ref_p, 1);
6174
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006175#else // !CONFIG_EXT_COMP_REFS
Zoe Liuc082bbc2017-05-17 13:31:37 -07006176
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006177 ref_costs_comp[LAST_FRAME] = ref_costs_comp[LAST2_FRAME] =
6178 ref_costs_comp[LAST3_FRAME] = ref_costs_comp[GOLDEN_FRAME] =
6179 base_cost;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006180
Zoe Liu3ac20932017-08-30 16:35:55 -07006181 ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF2_FRAME] =
6182 ref_costs_comp[ALTREF_FRAME] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006183
Zoe Liufcf5fa22017-06-26 16:00:38 -07006184 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
6185 ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
6186 ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
6187 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006188
Zoe Liu87818282017-11-26 17:09:59 -08006189 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 0);
6190 ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006191
Zoe Liufcf5fa22017-06-26 16:00:38 -07006192 ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
6193 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006194
Zoe Liufcf5fa22017-06-26 16:00:38 -07006195 // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
6196 // more bit.
6197 ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
Zoe Liue9b15e22017-07-19 15:53:01 -07006198 ref_costs_comp[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006199 ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
Zoe Liue9b15e22017-07-19 15:53:01 -07006200
Zoe Liue9b15e22017-07-19 15:53:01 -07006201 ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p1, 0);
6202 ref_costs_comp[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p1, 1);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006203#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07006204 } else {
Zoe Liuc082bbc2017-05-17 13:31:37 -07006205#if CONFIG_EXT_COMP_REFS
Zoe Liuac889702017-08-23 14:22:58 -07006206 int ref0, ref1;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006207 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
Zoe Liuac889702017-08-23 14:22:58 -07006208 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
6209 ref_costs_comp[ref0][ref1] = 512;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006210 }
6211 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
Zoe Liufcf5fa22017-06-26 16:00:38 -07006212 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006213 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
6214 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006215#else // !CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07006216 ref_costs_comp[LAST_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006217 ref_costs_comp[LAST2_FRAME] = 512;
6218 ref_costs_comp[LAST3_FRAME] = 512;
6219 ref_costs_comp[BWDREF_FRAME] = 512;
Zoe Liue9b15e22017-07-19 15:53:01 -07006220 ref_costs_comp[ALTREF2_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006221 ref_costs_comp[ALTREF_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006222 ref_costs_comp[GOLDEN_FRAME] = 512;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006223#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07006224 }
6225 }
6226}
6227
6228static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
6229 int mode_index,
6230 int64_t comp_pred_diff[REFERENCE_MODES],
6231 int skippable) {
6232 MACROBLOCKD *const xd = &x->e_mbd;
6233
6234 // Take a snapshot of the coding context so it can be
6235 // restored if we decide to encode this way
6236 ctx->skip = x->skip;
6237 ctx->skippable = skippable;
6238 ctx->best_mode_index = mode_index;
6239 ctx->mic = *xd->mi[0];
6240 ctx->mbmi_ext = *x->mbmi_ext;
6241 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
6242 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
6243 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
6244}
6245
clang-format55ce9e02017-02-15 22:27:12 -08006246static void setup_buffer_inter(
6247 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
6248 BLOCK_SIZE block_size, int mi_row, int mi_col,
6249 int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
6250 int_mv frame_near_mv[TOTAL_REFS_PER_FRAME],
6251 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006252 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006253 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
6254 MACROBLOCKD *const xd = &x->e_mbd;
6255 MODE_INFO *const mi = xd->mi[0];
6256 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
6257 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
6258 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
6259
6260 assert(yv12 != NULL);
6261
6262 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
6263 // use the UV scaling factors.
Yaowu Xuf883b422016-08-30 14:01:10 -07006264 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006265
6266 // Gets an initial list of candidate vectors from neighbours and orders them
Sebastien Alaiwane140c502017-04-27 09:52:34 +02006267 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
6268 mbmi_ext->ref_mv_stack[ref_frame],
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006269 mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
6270 NULL, NULL, mbmi_ext->mode_context);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006271
RogerZhou3b635242017-09-19 10:06:46 -07006272// Candidate refinement carried out at encoder and decoder
6273#if CONFIG_AMVR
6274 av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
6275 &frame_nearest_mv[ref_frame], &frame_near_mv[ref_frame],
RogerZhou10a03802017-10-26 11:49:48 -07006276 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07006277#else
Yaowu Xuf883b422016-08-30 14:01:10 -07006278 av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
6279 &frame_nearest_mv[ref_frame],
6280 &frame_near_mv[ref_frame]);
RogerZhou3b635242017-09-19 10:06:46 -07006281#endif
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07006282 // Further refinement that is encode side only to test the top few candidates
6283 // in full and choose the best as the centre point for subsequent searches.
6284 // The current implementation doesn't support scaling.
Jingning Han271bb2c2016-12-14 12:34:46 -08006285 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
6286 block_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006287}
6288
Urvang Joshi52648442016-10-13 17:27:51 -07006289static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
6290 BLOCK_SIZE bsize, int mi_row, int mi_col,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006291 int ref_idx, int *rate_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006292 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xuf883b422016-08-30 14:01:10 -07006293 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006294 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6295 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
6296 int bestsme = INT_MAX;
6297 int step_param;
6298 int sadpb = x->sadperbit16;
6299 MV mvp_full;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006300 int ref = mbmi->ref_frame[ref_idx];
Zoe Liu7f24e1b2017-03-17 17:42:05 -07006301 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006302
Alex Converse0fa0f422017-04-24 12:51:14 -07006303 MvLimits tmp_mv_limits = x->mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006304 int cost_list[5];
6305
6306 const YV12_BUFFER_CONFIG *scaled_ref_frame =
Yaowu Xuf883b422016-08-30 14:01:10 -07006307 av1_get_scaled_ref_frame(cpi, ref);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006308
6309 MV pred_mv[3];
6310 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
6311 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
6312 pred_mv[2] = x->pred_mv[ref];
6313
Yaowu Xuc27fc142016-08-22 16:08:15 -07006314 if (scaled_ref_frame) {
6315 int i;
6316 // Swap out the reference frame for a version that's been scaled to
6317 // match the resolution of the current frame, allowing the existing
6318 // motion search code to be used without additional modifications.
6319 for (i = 0; i < MAX_MB_PLANE; i++)
6320 backup_yv12[i] = xd->plane[i].pre[ref_idx];
6321
Yaowu Xuf883b422016-08-30 14:01:10 -07006322 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006323 }
6324
Alex Converse0fa0f422017-04-24 12:51:14 -07006325 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
Yaowu Xu4306b6e2016-09-27 12:55:32 -07006326
Yaowu Xu4306b6e2016-09-27 12:55:32 -07006327 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
Yaowu Xu4306b6e2016-09-27 12:55:32 -07006328
Yaowu Xuc27fc142016-08-22 16:08:15 -07006329 // Work out the size of the first step in the mv step search.
Yaowu Xuf883b422016-08-30 14:01:10 -07006330 // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
Yaowu Xuc27fc142016-08-22 16:08:15 -07006331 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
James Zern89a015b2017-08-08 12:39:00 -04006332 // Take the weighted average of the step_params based on the last frame's
Yaowu Xuc27fc142016-08-22 16:08:15 -07006333 // max mv magnitude and that based on the best ref mvs of the current
6334 // block for the given reference.
6335 step_param =
Yaowu Xuf883b422016-08-30 14:01:10 -07006336 (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
Yaowu Xuc27fc142016-08-22 16:08:15 -07006337 2;
6338 } else {
6339 step_param = cpi->mv_step_param;
6340 }
6341
6342 if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
6343 int boffset =
6344 2 * (b_width_log2_lookup[cm->sb_size] -
Yaowu Xuf883b422016-08-30 14:01:10 -07006345 AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
6346 step_param = AOMMAX(step_param, boffset);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006347 }
6348
6349 if (cpi->sf.adaptive_motion_search) {
6350 int bwl = b_width_log2_lookup[bsize];
6351 int bhl = b_height_log2_lookup[bsize];
6352 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
6353
Debargha Mukherjee27be8742017-10-07 23:51:10 -07006354 if (tlevel < 5) {
6355 step_param += 2;
6356 step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 1);
6357 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006358
6359 // prev_mv_sad is not setup for dynamically scaled frames.
Debargha Mukherjee7166f222017-09-05 21:32:42 -07006360 if (cpi->oxcf.resize_mode != RESIZE_RANDOM) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006361 int i;
6362 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
6363 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
6364 x->pred_mv[ref].row = 0;
6365 x->pred_mv[ref].col = 0;
6366 x->best_mv.as_int = INVALID_MV;
6367
6368 if (scaled_ref_frame) {
Urvang Joshi454280d2016-10-14 16:51:44 -07006369 int j;
6370 for (j = 0; j < MAX_MB_PLANE; ++j)
6371 xd->plane[j].pre[ref_idx] = backup_yv12[j];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006372 }
6373 return;
6374 }
6375 }
6376 }
6377 }
6378
Alex Converse0fa0f422017-04-24 12:51:14 -07006379 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006380
Yue Chene9638cc2016-10-10 12:37:54 -07006381 if (mbmi->motion_mode != SIMPLE_TRANSLATION)
6382 mvp_full = mbmi->mv[0].as_mv;
6383 else
Yue Chene9638cc2016-10-10 12:37:54 -07006384 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006385
6386 mvp_full.col >>= 3;
6387 mvp_full.row >>= 3;
6388
6389 x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
6390
Yue Chene9638cc2016-10-10 12:37:54 -07006391 switch (mbmi->motion_mode) {
6392 case SIMPLE_TRANSLATION:
RogerZhoucc5d35d2017-08-07 22:20:15 -07006393#if CONFIG_HASH_ME
Yue Chene9638cc2016-10-10 12:37:54 -07006394 bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
6395 sadpb, cond_cost_list(cpi, cost_list),
RogerZhoucc5d35d2017-08-07 22:20:15 -07006396 &ref_mv, INT_MAX, 1, (MI_SIZE * mi_col),
RogerZhoud15e7c12017-09-26 08:49:28 -07006397 (MI_SIZE * mi_row), 0);
RogerZhoucc5d35d2017-08-07 22:20:15 -07006398#else
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01006399 bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
6400 sadpb, cond_cost_list(cpi, cost_list),
6401 &ref_mv, INT_MAX, 1);
RogerZhoucc5d35d2017-08-07 22:20:15 -07006402#endif
Yue Chene9638cc2016-10-10 12:37:54 -07006403 break;
6404 case OBMC_CAUSAL:
6405 bestsme = av1_obmc_full_pixel_diamond(
6406 cpi, x, &mvp_full, step_param, sadpb,
6407 MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
6408 &(x->best_mv.as_mv), 0);
6409 break;
James Zern88896732017-06-23 15:55:09 -07006410 default: assert(0 && "Invalid motion mode!\n");
Yue Chene9638cc2016-10-10 12:37:54 -07006411 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006412
Alex Converse0fa0f422017-04-24 12:51:14 -07006413 x->mv_limits = tmp_mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006414
RogerZhou3b635242017-09-19 10:06:46 -07006415#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -07006416 if (cpi->common.cur_frame_force_integer_mv) {
RogerZhou3b635242017-09-19 10:06:46 -07006417 x->best_mv.as_mv.row *= 8;
6418 x->best_mv.as_mv.col *= 8;
6419 }
RogerZhou10a03802017-10-26 11:49:48 -07006420 if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0) {
RogerZhou3b635242017-09-19 10:06:46 -07006421#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07006422 if (bestsme < INT_MAX) {
RogerZhou3b635242017-09-19 10:06:46 -07006423#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07006424 int dis; /* TODO: use dis in distortion calculation later. */
Yue Chene9638cc2016-10-10 12:37:54 -07006425 switch (mbmi->motion_mode) {
6426 case SIMPLE_TRANSLATION:
Yue Chene9638cc2016-10-10 12:37:54 -07006427 if (cpi->sf.use_upsampled_references) {
6428 int best_mv_var;
6429 const int try_second = x->second_best_mv.as_int != INVALID_MV &&
6430 x->second_best_mv.as_int != x->best_mv.as_int;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006431 const int pw = block_size_wide[bsize];
6432 const int ph = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006433
Yue Chene9638cc2016-10-10 12:37:54 -07006434 best_mv_var = cpi->find_fractional_mv_step(
Yaowu Xuc27fc142016-08-22 16:08:15 -07006435 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
6436 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6437 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006438 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
6439 0, 0, pw, ph, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006440
Yue Chene9638cc2016-10-10 12:37:54 -07006441 if (try_second) {
Alex Converse0fa0f422017-04-24 12:51:14 -07006442 const int minc =
6443 AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
6444 const int maxc =
6445 AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
6446 const int minr =
6447 AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
6448 const int maxr =
6449 AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
Yue Chene9638cc2016-10-10 12:37:54 -07006450 int this_var;
6451 MV best_mv = x->best_mv.as_mv;
6452
6453 x->best_mv = x->second_best_mv;
6454 if (x->best_mv.as_mv.row * 8 <= maxr &&
6455 x->best_mv.as_mv.row * 8 >= minr &&
6456 x->best_mv.as_mv.col * 8 <= maxc &&
6457 x->best_mv.as_mv.col * 8 >= minc) {
6458 this_var = cpi->find_fractional_mv_step(
6459 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
6460 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6461 cpi->sf.mv.subpel_iters_per_step,
6462 cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006463 &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1);
Yue Chene9638cc2016-10-10 12:37:54 -07006464 if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
6465 x->best_mv.as_mv = best_mv;
6466 }
6467 }
Yue Chene9638cc2016-10-10 12:37:54 -07006468 } else {
6469 cpi->find_fractional_mv_step(
6470 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
6471 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6472 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006473 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
6474 0, 0, 0, 0, 0);
Yue Chene9638cc2016-10-10 12:37:54 -07006475 }
Yue Chene9638cc2016-10-10 12:37:54 -07006476 break;
6477 case OBMC_CAUSAL:
6478 av1_find_best_obmc_sub_pixel_tree_up(
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006479 x, &x->best_mv.as_mv, &ref_mv, cm->allow_high_precision_mv,
6480 x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6481 cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis,
6482 &x->pred_sse[ref], 0, cpi->sf.use_upsampled_references);
Yue Chene9638cc2016-10-10 12:37:54 -07006483 break;
James Zern88896732017-06-23 15:55:09 -07006484 default: assert(0 && "Invalid motion mode!\n");
Yaowu Xuc27fc142016-08-22 16:08:15 -07006485 }
6486 }
Yaowu Xuf883b422016-08-30 14:01:10 -07006487 *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
6488 x->mvcost, MV_COST_WEIGHT);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006489
Yue Chene9638cc2016-10-10 12:37:54 -07006490 if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
Yue Chene9638cc2016-10-10 12:37:54 -07006491 x->pred_mv[ref] = x->best_mv.as_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006492
6493 if (scaled_ref_frame) {
6494 int i;
6495 for (i = 0; i < MAX_MB_PLANE; i++)
6496 xd->plane[i].pre[ref_idx] = backup_yv12[i];
6497 }
6498}
6499
David Barkerac37fa32016-12-02 12:30:21 +00006500static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006501 int i;
6502 for (i = 0; i < MAX_MB_PLANE; i++) {
David Barkerac37fa32016-12-02 12:30:21 +00006503 xd->plane[i].dst.buf = dst.plane[i];
6504 xd->plane[i].dst.stride = dst.stride[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006505 }
6506}
6507
David Barker8dd9b572017-05-12 16:31:38 +01006508static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
David Barkerf19f35f2017-05-22 16:33:22 +01006509 BLOCK_SIZE bsize, const MV *other_mv,
David Barker8dd9b572017-05-12 16:31:38 +01006510 int mi_row, int mi_col, const int block,
6511 int ref_idx, uint8_t *second_pred) {
6512 const AV1_COMMON *const cm = &cpi->common;
6513 const int pw = block_size_wide[bsize];
6514 const int ph = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006515 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006516 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
David Barker8dd9b572017-05-12 16:31:38 +01006517 const int other_ref = mbmi->ref_frame[!ref_idx];
David Barker8dd9b572017-05-12 16:31:38 +01006518 struct scale_factors sf;
David Barker8dd9b572017-05-12 16:31:38 +01006519 struct macroblockd_plane *const pd = &xd->plane[0];
James Zern89a015b2017-08-08 12:39:00 -04006520 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
David Barker8dd9b572017-05-12 16:31:38 +01006521 const int ic = block & 1;
6522 const int ir = (block - ic) >> 1;
6523 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
6524 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
Luc Trudeauf3bf8b12017-12-08 14:38:41 -05006525 const WarpedMotionParams *const wm = &xd->global_motion[other_ref];
David Barker8dd9b572017-05-12 16:31:38 +01006526 int is_global = is_global_mv_block(xd->mi[0], block, wm->wmtype);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006527
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006528 // This function should only ever be called for compound modes
David Barker8dd9b572017-05-12 16:31:38 +01006529 assert(has_second_ref(mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07006530
David Barker8dd9b572017-05-12 16:31:38 +01006531 struct buf_2d backup_yv12[MAX_MB_PLANE];
6532 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
6533 av1_get_scaled_ref_frame(cpi, other_ref);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006534
6535 if (scaled_ref_frame) {
David Barker8dd9b572017-05-12 16:31:38 +01006536 int i;
6537 // Swap out the reference frame for a version that's been scaled to
6538 // match the resolution of the current frame, allowing the existing
6539 // motion search code to be used without additional modifications.
6540 for (i = 0; i < MAX_MB_PLANE; i++)
6541 backup_yv12[i] = xd->plane[i].pre[!ref_idx];
6542 av1_setup_pre_planes(xd, !ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
6543 }
6544
6545// Since we have scaled the reference frames to match the size of the current
6546// frame we must use a unit scaling factor during mode selection.
6547#if CONFIG_HIGHBITDEPTH
6548 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
6549 cm->height, cm->use_highbitdepth);
6550#else
6551 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
6552 cm->height);
6553#endif // CONFIG_HIGHBITDEPTH
6554
6555 struct buf_2d ref_yv12;
6556
6557 const int plane = 0;
David Barkere64d51a2017-06-09 14:52:42 +01006558 ConvolveParams conv_params = get_conv_params(!ref_idx, 0, plane);
David Barker8dd9b572017-05-12 16:31:38 +01006559 WarpTypesAllowed warp_types;
David Barker8dd9b572017-05-12 16:31:38 +01006560 warp_types.global_warp_allowed = is_global;
David Barker8dd9b572017-05-12 16:31:38 +01006561 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
David Barker8dd9b572017-05-12 16:31:38 +01006562
6563 // Initialized here because of compiler problem in Visual Studio.
6564 ref_yv12 = xd->plane[plane].pre[!ref_idx];
6565
6566// Get the prediction block from the 'other' reference frame.
6567#if CONFIG_HIGHBITDEPTH
6568 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6569 av1_highbd_build_inter_predictor(
David Barkerf19f35f2017-05-22 16:33:22 +01006570 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
Sebastien Alaiwan48795802017-10-30 12:07:13 +01006571 0, mbmi->interp_filters, &warp_types, p_col, p_row, plane,
6572 MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
David Barker8dd9b572017-05-12 16:31:38 +01006573 } else {
6574#endif // CONFIG_HIGHBITDEPTH
6575 av1_build_inter_predictor(
David Barkerf19f35f2017-05-22 16:33:22 +01006576 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
Sebastien Alaiwan48795802017-10-30 12:07:13 +01006577 &conv_params, mbmi->interp_filters, &warp_types, p_col, p_row, plane,
6578 !ref_idx, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
David Barker8dd9b572017-05-12 16:31:38 +01006579#if CONFIG_HIGHBITDEPTH
6580 }
6581#endif // CONFIG_HIGHBITDEPTH
6582
Cheng Chenefc55fd2017-10-10 12:08:28 -07006583#if CONFIG_JNT_COMP
Cheng Chenf78632e2017-10-20 15:30:51 -07006584 av1_jnt_comp_weight_assign(cm, mbmi, 0, &xd->jcp_param.fwd_offset,
Cheng Chen8263f802017-11-14 15:50:00 -08006585 &xd->jcp_param.bck_offset,
6586 &xd->jcp_param.use_jnt_comp_avg, 1);
Cheng Chenefc55fd2017-10-10 12:08:28 -07006587#endif // CONFIG_JNT_COMP
6588
David Barker8dd9b572017-05-12 16:31:38 +01006589 if (scaled_ref_frame) {
6590 // Restore the prediction frame pointers to their unscaled versions.
6591 int i;
6592 for (i = 0; i < MAX_MB_PLANE; i++)
6593 xd->plane[i].pre[!ref_idx] = backup_yv12[i];
6594 }
6595}
6596
6597// Search for the best mv for one component of a compound,
6598// given that the other component is fixed.
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006599static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
6600 BLOCK_SIZE bsize, MV *this_mv,
6601 int mi_row, int mi_col,
6602 const uint8_t *second_pred,
6603 const uint8_t *mask, int mask_stride,
6604 int *rate_mv, int ref_idx) {
David Barker8dd9b572017-05-12 16:31:38 +01006605 const int pw = block_size_wide[bsize];
6606 const int ph = block_size_high[bsize];
6607 MACROBLOCKD *xd = &x->e_mbd;
6608 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6609 const int ref = mbmi->ref_frame[ref_idx];
David Barkerf19f35f2017-05-22 16:33:22 +01006610 int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
David Barker8dd9b572017-05-12 16:31:38 +01006611 struct macroblockd_plane *const pd = &xd->plane[0];
6612
6613 struct buf_2d backup_yv12[MAX_MB_PLANE];
David Barker8dd9b572017-05-12 16:31:38 +01006614 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
6615 av1_get_scaled_ref_frame(cpi, ref);
6616
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006617 // Check that this is either an interinter or an interintra block
Ryan Lei1d1df182017-06-15 11:38:59 -07006618 assert(has_second_ref(mbmi) ||
6619 (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
David Barker8dd9b572017-05-12 16:31:38 +01006620
David Barker8dd9b572017-05-12 16:31:38 +01006621 if (scaled_ref_frame) {
6622 int i;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006623 // Swap out the reference frame for a version that's been scaled to
6624 // match the resolution of the current frame, allowing the existing
6625 // motion search code to be used without additional modifications.
6626 for (i = 0; i < MAX_MB_PLANE; i++)
6627 backup_yv12[i] = xd->plane[i].pre[ref_idx];
Yaowu Xuf883b422016-08-30 14:01:10 -07006628 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006629 }
6630
David Barker8dd9b572017-05-12 16:31:38 +01006631 struct buf_2d orig_yv12;
6632 int bestsme = INT_MAX;
6633 int sadpb = x->sadperbit16;
6634 MV *const best_mv = &x->best_mv.as_mv;
6635 int search_range = 3;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006636
David Barker8dd9b572017-05-12 16:31:38 +01006637 MvLimits tmp_mv_limits = x->mv_limits;
David Barker8dd9b572017-05-12 16:31:38 +01006638
6639 // Initialized here because of compiler problem in Visual Studio.
6640 if (ref_idx) {
David Barkerf19f35f2017-05-22 16:33:22 +01006641 orig_yv12 = pd->pre[0];
6642 pd->pre[0] = pd->pre[ref_idx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006643 }
6644
David Barker8dd9b572017-05-12 16:31:38 +01006645 // Do compound motion search on the current reference frame.
6646 av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
6647
6648 // Use the mv result from the single mode as mv predictor.
David Barkerf19f35f2017-05-22 16:33:22 +01006649 *best_mv = *this_mv;
David Barker8dd9b572017-05-12 16:31:38 +01006650
6651 best_mv->col >>= 3;
6652 best_mv->row >>= 3;
6653
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006654 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
David Barker8dd9b572017-05-12 16:31:38 +01006655
6656 // Small-range full-pixel motion search.
6657 bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
6658 &cpi->fn_ptr[bsize], mask, mask_stride,
6659 ref_idx, &ref_mv.as_mv, second_pred);
6660 if (bestsme < INT_MAX) {
6661 if (mask)
6662 bestsme =
6663 av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
6664 mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
6665 else
6666 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
6667 &cpi->fn_ptr[bsize], 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006668 }
6669
Alex Converse0fa0f422017-04-24 12:51:14 -07006670 x->mv_limits = tmp_mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006671
RogerZhou3b635242017-09-19 10:06:46 -07006672#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -07006673 if (cpi->common.cur_frame_force_integer_mv) {
RogerZhou3b635242017-09-19 10:06:46 -07006674 x->best_mv.as_mv.row *= 8;
6675 x->best_mv.as_mv.col *= 8;
6676 }
RogerZhou10a03802017-10-26 11:49:48 -07006677 if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0) {
RogerZhou3b635242017-09-19 10:06:46 -07006678#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07006679 if (bestsme < INT_MAX) {
RogerZhou3b635242017-09-19 10:06:46 -07006680#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07006681 int dis; /* TODO: use dis in distortion calculation later. */
David Barker8dd9b572017-05-12 16:31:38 +01006682 unsigned int sse;
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006683 bestsme = cpi->find_fractional_mv_step(
6684 x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
6685 &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
6686 x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
6687 ref_idx, pw, ph, cpi->sf.use_upsampled_references);
David Barker8dd9b572017-05-12 16:31:38 +01006688 }
6689
6690 // Restore the pointer to the first (possibly scaled) prediction buffer.
David Barkerf19f35f2017-05-22 16:33:22 +01006691 if (ref_idx) pd->pre[0] = orig_yv12;
David Barker8dd9b572017-05-12 16:31:38 +01006692
Yue Chenf03907a2017-05-31 12:04:04 -07006693 if (bestsme < INT_MAX) *this_mv = *best_mv;
David Barker8dd9b572017-05-12 16:31:38 +01006694
6695 *rate_mv = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006696
6697 if (scaled_ref_frame) {
David Barker8dd9b572017-05-12 16:31:38 +01006698 // Restore the prediction frame pointers to their unscaled versions.
6699 int i;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006700 for (i = 0; i < MAX_MB_PLANE; i++)
6701 xd->plane[i].pre[ref_idx] = backup_yv12[i];
6702 }
David Barker8dd9b572017-05-12 16:31:38 +01006703
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006704 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
David Barkerf19f35f2017-05-22 16:33:22 +01006705 *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
6706 x->mvcost, MV_COST_WEIGHT);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006707}
6708
David Barker8dd9b572017-05-12 16:31:38 +01006709// Wrapper for compound_single_motion_search, for the common case
6710// where the second prediction is also an inter mode.
6711static void compound_single_motion_search_interinter(
6712 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
David Barkerf19f35f2017-05-22 16:33:22 +01006713 int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
6714 const int block, int ref_idx) {
6715 MACROBLOCKD *xd = &x->e_mbd;
6716 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6717
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006718 // This function should only ever be called for compound modes
David Barkerf19f35f2017-05-22 16:33:22 +01006719 assert(has_second_ref(mbmi));
David Barker8dd9b572017-05-12 16:31:38 +01006720
6721// Prediction buffer from second frame.
6722#if CONFIG_HIGHBITDEPTH
David Barker8dd9b572017-05-12 16:31:38 +01006723 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
6724 uint8_t *second_pred;
6725 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
6726 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
6727 else
6728 second_pred = (uint8_t *)second_pred_alloc_16;
6729#else
6730 DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
6731#endif // CONFIG_HIGHBITDEPTH
6732
David Barkerf19f35f2017-05-22 16:33:22 +01006733 MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
6734 const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
6735
6736 build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
David Barker8dd9b572017-05-12 16:31:38 +01006737 ref_idx, second_pred);
6738
David Barkerf19f35f2017-05-22 16:33:22 +01006739 compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006740 second_pred, mask, mask_stride, rate_mv,
David Barkerf19f35f2017-05-22 16:33:22 +01006741 ref_idx);
David Barker8dd9b572017-05-12 16:31:38 +01006742}
6743
Sarah Parker6fdc8532016-11-16 17:47:13 -08006744static void do_masked_motion_search_indexed(
David Barkerc155e012017-05-11 13:54:54 +01006745 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
Sarah Parkerb9f757c2017-01-06 17:12:24 -08006746 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
Zoe Liu7f24e1b2017-03-17 17:42:05 -07006747 int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006748 // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
6749 MACROBLOCKD *xd = &x->e_mbd;
6750 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6751 BLOCK_SIZE sb_type = mbmi->sb_type;
6752 const uint8_t *mask;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006753 const int mask_stride = block_size_wide[bsize];
Sarah Parker569edda2016-12-14 14:57:38 -08006754
Sarah Parkerb9f757c2017-01-06 17:12:24 -08006755 mask = av1_get_compound_type_mask(comp_data, sb_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006756
David Barker8dd9b572017-05-12 16:31:38 +01006757 int_mv frame_mv[TOTAL_REFS_PER_FRAME];
6758 MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
Yaowu Xuc27fc142016-08-22 16:08:15 -07006759
David Barker8dd9b572017-05-12 16:31:38 +01006760 frame_mv[rf[0]].as_int = cur_mv[0].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006761 frame_mv[rf[1]].as_int = cur_mv[1].as_int;
David Barkerf19f35f2017-05-22 16:33:22 +01006762 if (which == 0 || which == 1) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006763 compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row,
6764 mi_col, mask, mask_stride, rate_mv,
6765 0, which);
David Barkerf19f35f2017-05-22 16:33:22 +01006766 } else if (which == 2) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006767 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask,
6768 mask_stride, rate_mv, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006769 }
David Barker8dd9b572017-05-12 16:31:38 +01006770 tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006771 tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006772}
Yaowu Xuc27fc142016-08-22 16:08:15 -07006773
James Zern89a015b2017-08-08 12:39:00 -04006774// In some situations we want to discount the apparent cost of a new motion
Yaowu Xuc27fc142016-08-22 16:08:15 -07006775// vector. Where there is a subtle motion field and especially where there is
6776// low spatial complexity then it can be hard to cover the cost of a new motion
6777// vector in a single block, even if that motion vector reduces distortion.
6778// However, once established that vector may be usable through the nearest and
6779// near mv modes to reduce distortion in subsequent blocks and also improve
6780// visual quality.
Urvang Joshi52648442016-10-13 17:27:51 -07006781static int discount_newmv_test(const AV1_COMP *const cpi, int this_mode,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006782 int_mv this_mv,
6783 int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
6784 int ref_frame) {
6785 return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
6786 (this_mv.as_int != 0) &&
6787 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
6788 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
6789 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
6790 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
6791}
6792
Yaowu Xu671f2bd2016-09-30 15:07:57 -07006793#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
6794#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
Yaowu Xuc27fc142016-08-22 16:08:15 -07006795
6796// TODO(jingning): this mv clamping function should be block size dependent.
6797static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
6798 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
6799 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
6800 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
6801 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
6802}
6803
Yaowu Xuf883b422016-08-30 14:01:10 -07006804static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006805 const BLOCK_SIZE bsize, const uint8_t *pred0,
6806 int stride0, const uint8_t *pred1, int stride1) {
6807 const struct macroblock_plane *const p = &x->plane[0];
6808 const uint8_t *src = p->src.buf;
6809 int src_stride = p->src.stride;
6810 const int f_index = bsize - BLOCK_8X8;
Jingning Han61418bb2017-01-23 17:12:48 -08006811 const int bw = block_size_wide[bsize];
6812 const int bh = block_size_high[bsize];
Yue Chenf03907a2017-05-31 12:04:04 -07006813 uint32_t esq[2][4];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006814 int64_t tl, br;
6815
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006816#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006817 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6818 pred0 = CONVERT_TO_BYTEPTR(pred0);
6819 pred1 = CONVERT_TO_BYTEPTR(pred1);
6820 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006821#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006822
Yue Chenf03907a2017-05-31 12:04:04 -07006823 cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
6824 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
6825 &esq[0][1]);
6826 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6827 pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
6828 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6829 pred0 + bh / 2 * stride0 + bw / 2, stride0,
6830 &esq[0][3]);
6831 cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
6832 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
6833 &esq[1][1]);
6834 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6835 pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
6836 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6837 pred1 + bh / 2 * stride1 + bw / 2, stride0,
6838 &esq[1][3]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006839
6840 tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
6841 (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
6842 br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
6843 (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
6844 return (tl + br > 0);
6845}
Yaowu Xuc27fc142016-08-22 16:08:15 -07006846
6847#if !CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -07006848static InterpFilter predict_interp_filter(
Yaowu Xuf883b422016-08-30 14:01:10 -07006849 const AV1_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006850 const int mi_row, const int mi_col,
James Zern7b9407a2016-05-18 23:48:05 -07006851 InterpFilter (*single_filter)[TOTAL_REFS_PER_FRAME]) {
6852 InterpFilter best_filter = SWITCHABLE;
Yaowu Xuf883b422016-08-30 14:01:10 -07006853 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006854 const MACROBLOCKD *xd = &x->e_mbd;
6855 int bsl = mi_width_log2_lookup[bsize];
6856 int pred_filter_search =
6857 cpi->sf.cb_pred_filter_search
6858 ? (((mi_row + mi_col) >> bsl) +
6859 get_chessboard_index(cm->current_video_frame)) &
6860 0x1
6861 : 0;
6862 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6863 const int is_comp_pred = has_second_ref(mbmi);
6864 const int this_mode = mbmi->mode;
6865 int refs[2] = { mbmi->ref_frame[0],
6866 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
Yaowu Xuc27fc142016-08-22 16:08:15 -07006867 if (pred_filter_search) {
James Zern7b9407a2016-05-18 23:48:05 -07006868 InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01006869 if (xd->up_available)
6870 af = av1_extract_interp_filter(
6871 xd->mi[-xd->mi_stride]->mbmi.interp_filters, 0);
6872 if (xd->left_available)
6873 lf = av1_extract_interp_filter(xd->mi[-1]->mbmi.interp_filters, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006874
Zoe Liu7f24e1b2017-03-17 17:42:05 -07006875 if ((this_mode != NEWMV && this_mode != NEW_NEWMV) || (af == lf))
Yaowu Xuc27fc142016-08-22 16:08:15 -07006876 best_filter = af;
6877 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006878 if (is_comp_pred) {
6879 if (cpi->sf.adaptive_mode_search) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006880 switch (this_mode) {
6881 case NEAREST_NEARESTMV:
6882 if (single_filter[NEARESTMV][refs[0]] ==
6883 single_filter[NEARESTMV][refs[1]])
6884 best_filter = single_filter[NEARESTMV][refs[0]];
6885 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006886 case NEAR_NEARMV:
6887 if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]])
6888 best_filter = single_filter[NEARMV][refs[0]];
6889 break;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07006890 case GLOBAL_GLOBALMV:
6891 if (single_filter[GLOBALMV][refs[0]] ==
6892 single_filter[GLOBALMV][refs[1]])
6893 best_filter = single_filter[GLOBALMV][refs[0]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006894 break;
6895 case NEW_NEWMV:
6896 if (single_filter[NEWMV][refs[0]] == single_filter[NEWMV][refs[1]])
6897 best_filter = single_filter[NEWMV][refs[0]];
6898 break;
6899 case NEAREST_NEWMV:
6900 if (single_filter[NEARESTMV][refs[0]] ==
6901 single_filter[NEWMV][refs[1]])
6902 best_filter = single_filter[NEARESTMV][refs[0]];
6903 break;
6904 case NEAR_NEWMV:
6905 if (single_filter[NEARMV][refs[0]] == single_filter[NEWMV][refs[1]])
6906 best_filter = single_filter[NEARMV][refs[0]];
6907 break;
6908 case NEW_NEARESTMV:
6909 if (single_filter[NEWMV][refs[0]] ==
6910 single_filter[NEARESTMV][refs[1]])
6911 best_filter = single_filter[NEWMV][refs[0]];
6912 break;
6913 case NEW_NEARMV:
6914 if (single_filter[NEWMV][refs[0]] == single_filter[NEARMV][refs[1]])
6915 best_filter = single_filter[NEWMV][refs[0]];
6916 break;
6917 default:
6918 if (single_filter[this_mode][refs[0]] ==
6919 single_filter[this_mode][refs[1]])
6920 best_filter = single_filter[this_mode][refs[0]];
6921 break;
6922 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006923 }
6924 }
Angie Chiang75c22092016-10-25 12:19:16 -07006925 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
6926 best_filter = EIGHTTAP_REGULAR;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006927 }
6928 return best_filter;
6929}
Fergus Simpson4063a682017-02-28 16:52:22 -08006930#endif // !CONFIG_DUAL_FILTER
Yaowu Xuc27fc142016-08-22 16:08:15 -07006931
Yaowu Xuc27fc142016-08-22 16:08:15 -07006932// Choose the best wedge index and sign
Yaowu Xuf883b422016-08-30 14:01:10 -07006933static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006934 const BLOCK_SIZE bsize, const uint8_t *const p0,
6935 const uint8_t *const p1, int *const best_wedge_sign,
6936 int *const best_wedge_index) {
6937 const MACROBLOCKD *const xd = &x->e_mbd;
6938 const struct buf_2d *const src = &x->plane[0].src;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006939 const int bw = block_size_wide[bsize];
6940 const int bh = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006941 const int N = bw * bh;
6942 int rate;
6943 int64_t dist;
6944 int64_t rd, best_rd = INT64_MAX;
6945 int wedge_index;
6946 int wedge_sign;
6947 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
6948 const uint8_t *mask;
6949 uint64_t sse;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006950#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006951 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6952 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
6953#else
6954 const int bd_round = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006955#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006956
6957 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
6958 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6959 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6960 DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
6961
6962 int64_t sign_limit;
6963
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006964#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006965 if (hbd) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006966 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006967 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006968 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006969 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006970 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006971 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6972 } else // NOLINT
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006973#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006974 {
Yaowu Xuf883b422016-08-30 14:01:10 -07006975 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
6976 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6977 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006978 }
6979
Yaowu Xuf883b422016-08-30 14:01:10 -07006980 sign_limit = ((int64_t)aom_sum_squares_i16(r0, N) -
6981 (int64_t)aom_sum_squares_i16(r1, N)) *
Yaowu Xuc27fc142016-08-22 16:08:15 -07006982 (1 << WEDGE_WEIGHT_BITS) / 2;
6983
Jingning Han61418bb2017-01-23 17:12:48 -08006984 if (N < 64)
6985 av1_wedge_compute_delta_squares_c(ds, r0, r1, N);
6986 else
6987 av1_wedge_compute_delta_squares(ds, r0, r1, N);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006988
6989 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006990 mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
Jingning Han61418bb2017-01-23 17:12:48 -08006991
6992 // TODO(jingning): Make sse2 functions support N = 16 case
6993 if (N < 64)
6994 wedge_sign = av1_wedge_sign_from_residuals_c(ds, mask, N, sign_limit);
6995 else
6996 wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006997
Yaowu Xuf883b422016-08-30 14:01:10 -07006998 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
Jingning Han61418bb2017-01-23 17:12:48 -08006999 if (N < 64)
7000 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
7001 else
7002 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007003 sse = ROUND_POWER_OF_TWO(sse, bd_round);
7004
7005 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
Urvang Joshi70006e42017-06-14 16:08:55 -07007006 rd = RDCOST(x->rdmult, rate, dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007007
7008 if (rd < best_rd) {
7009 *best_wedge_index = wedge_index;
7010 *best_wedge_sign = wedge_sign;
7011 best_rd = rd;
7012 }
7013 }
7014
7015 return best_rd;
7016}
7017
7018// Choose the best wedge index the specified sign
7019static int64_t pick_wedge_fixed_sign(
Yaowu Xuf883b422016-08-30 14:01:10 -07007020 const AV1_COMP *const cpi, const MACROBLOCK *const x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007021 const BLOCK_SIZE bsize, const uint8_t *const p0, const uint8_t *const p1,
7022 const int wedge_sign, int *const best_wedge_index) {
7023 const MACROBLOCKD *const xd = &x->e_mbd;
7024 const struct buf_2d *const src = &x->plane[0].src;
Jingning Hanae5cfde2016-11-30 12:01:44 -08007025 const int bw = block_size_wide[bsize];
7026 const int bh = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07007027 const int N = bw * bh;
7028 int rate;
7029 int64_t dist;
7030 int64_t rd, best_rd = INT64_MAX;
7031 int wedge_index;
7032 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
7033 const uint8_t *mask;
7034 uint64_t sse;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007035#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07007036 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
7037 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
7038#else
7039 const int bd_round = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007040#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07007041
7042 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
7043 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
7044
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007045#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07007046 if (hbd) {
Yaowu Xuf883b422016-08-30 14:01:10 -07007047 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007048 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07007049 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007050 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
7051 } else // NOLINT
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007052#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07007053 {
Yaowu Xuf883b422016-08-30 14:01:10 -07007054 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
7055 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007056 }
7057
7058 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
Yaowu Xuf883b422016-08-30 14:01:10 -07007059 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
Jingning Han61418bb2017-01-23 17:12:48 -08007060 if (N < 64)
7061 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
7062 else
7063 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007064 sse = ROUND_POWER_OF_TWO(sse, bd_round);
7065
7066 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
Urvang Joshi70006e42017-06-14 16:08:55 -07007067 rd = RDCOST(x->rdmult, rate, dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007068
7069 if (rd < best_rd) {
7070 *best_wedge_index = wedge_index;
7071 best_rd = rd;
7072 }
7073 }
7074
7075 return best_rd;
7076}
7077
Yaowu Xuf883b422016-08-30 14:01:10 -07007078static int64_t pick_interinter_wedge(const AV1_COMP *const cpi,
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007079 MACROBLOCK *const x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007080 const BLOCK_SIZE bsize,
7081 const uint8_t *const p0,
7082 const uint8_t *const p1) {
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007083 MACROBLOCKD *const xd = &x->e_mbd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07007084 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Jingning Hanae5cfde2016-11-30 12:01:44 -08007085 const int bw = block_size_wide[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07007086
7087 int64_t rd;
7088 int wedge_index = -1;
7089 int wedge_sign = 0;
7090
Sarah Parker42d96102017-01-31 21:05:27 -08007091 assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07007092 assert(cpi->common.allow_masked_compound);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007093
7094 if (cpi->sf.fast_wedge_sign_estimate) {
7095 wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
7096 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, wedge_sign, &wedge_index);
7097 } else {
7098 rd = pick_wedge(cpi, x, bsize, p0, p1, &wedge_sign, &wedge_index);
7099 }
7100
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007101 mbmi->wedge_sign = wedge_sign;
7102 mbmi->wedge_index = wedge_index;
Yaowu Xuc27fc142016-08-22 16:08:15 -07007103 return rd;
7104}
7105
Sarah Parkerddcea392017-04-25 15:57:22 -07007106static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007107 MACROBLOCK *const x, const BLOCK_SIZE bsize,
Sarah Parkerddcea392017-04-25 15:57:22 -07007108 const uint8_t *const p0,
7109 const uint8_t *const p1) {
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007110 MACROBLOCKD *const xd = &x->e_mbd;
Sarah Parker569edda2016-12-14 14:57:38 -08007111 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7112 const struct buf_2d *const src = &x->plane[0].src;
7113 const int bw = block_size_wide[bsize];
7114 const int bh = block_size_high[bsize];
7115 const int N = bw * bh;
7116 int rate;
7117 uint64_t sse;
7118 int64_t dist;
Debargha Mukherjeec30934b2017-04-25 01:23:51 -07007119 int64_t rd0;
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007120 SEG_MASK_TYPE cur_mask_type;
7121 int64_t best_rd = INT64_MAX;
7122 SEG_MASK_TYPE best_mask_type = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007123#if CONFIG_HIGHBITDEPTH
Sarah Parker569edda2016-12-14 14:57:38 -08007124 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
7125 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
7126#else
7127 const int bd_round = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007128#endif // CONFIG_HIGHBITDEPTH
Sarah Parker569edda2016-12-14 14:57:38 -08007129 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
7130 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
7131 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
7132
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007133#if CONFIG_HIGHBITDEPTH
Sarah Parker569edda2016-12-14 14:57:38 -08007134 if (hbd) {
7135 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
7136 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
7137 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
7138 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
7139 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
7140 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
7141 } else // NOLINT
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007142#endif // CONFIG_HIGHBITDEPTH
Sarah Parker569edda2016-12-14 14:57:38 -08007143 {
7144 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
7145 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
7146 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
7147 }
7148
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007149 // try each mask type and its inverse
7150 for (cur_mask_type = 0; cur_mask_type < SEG_MASK_TYPES; cur_mask_type++) {
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007151// build mask and inverse
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007152#if CONFIG_HIGHBITDEPTH
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007153 if (hbd)
7154 build_compound_seg_mask_highbd(
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007155 xd->seg_mask, cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw,
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007156 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
7157 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007158#endif // CONFIG_HIGHBITDEPTH
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007159 build_compound_seg_mask(xd->seg_mask, cur_mask_type, p0, bw, p1, bw,
7160 bsize, bh, bw);
Sarah Parker569edda2016-12-14 14:57:38 -08007161
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007162 // compute rd for mask
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007163 sse = av1_wedge_sse_from_residuals(r1, d10, xd->seg_mask, N);
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007164 sse = ROUND_POWER_OF_TWO(sse, bd_round);
Sarah Parker569edda2016-12-14 14:57:38 -08007165
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007166 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
Urvang Joshi70006e42017-06-14 16:08:55 -07007167 rd0 = RDCOST(x->rdmult, rate, dist);
Sarah Parker569edda2016-12-14 14:57:38 -08007168
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007169 if (rd0 < best_rd) {
7170 best_mask_type = cur_mask_type;
7171 best_rd = rd0;
7172 }
7173 }
Sarah Parker569edda2016-12-14 14:57:38 -08007174
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007175 // make final mask
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007176 mbmi->mask_type = best_mask_type;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007177#if CONFIG_HIGHBITDEPTH
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007178 if (hbd)
7179 build_compound_seg_mask_highbd(
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007180 xd->seg_mask, mbmi->mask_type, CONVERT_TO_BYTEPTR(p0), bw,
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007181 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
7182 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007183#endif // CONFIG_HIGHBITDEPTH
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007184 build_compound_seg_mask(xd->seg_mask, mbmi->mask_type, p0, bw, p1, bw,
7185 bsize, bh, bw);
Sarah Parker569edda2016-12-14 14:57:38 -08007186
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007187 return best_rd;
Sarah Parker569edda2016-12-14 14:57:38 -08007188}
Sarah Parker569edda2016-12-14 14:57:38 -08007189
Yaowu Xuf883b422016-08-30 14:01:10 -07007190static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007191 const MACROBLOCK *const x,
7192 const BLOCK_SIZE bsize,
7193 const uint8_t *const p0,
7194 const uint8_t *const p1) {
7195 const MACROBLOCKD *const xd = &x->e_mbd;
7196 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7197
7198 int64_t rd;
7199 int wedge_index = -1;
7200
7201 assert(is_interintra_wedge_used(bsize));
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07007202 assert(cpi->common.allow_interintra_compound);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007203
7204 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index);
7205
7206 mbmi->interintra_wedge_sign = 0;
7207 mbmi->interintra_wedge_index = wedge_index;
7208 return rd;
7209}
Sarah Parker6fdc8532016-11-16 17:47:13 -08007210
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007211static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
Sarah Parkerddcea392017-04-25 15:57:22 -07007212 const BLOCK_SIZE bsize,
7213 const uint8_t *const p0,
7214 const uint8_t *const p1) {
7215 const COMPOUND_TYPE compound_type =
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007216 x->e_mbd.mi[0]->mbmi.interinter_compound_type;
Sarah Parkerddcea392017-04-25 15:57:22 -07007217 switch (compound_type) {
Sarah Parkerddcea392017-04-25 15:57:22 -07007218 case COMPOUND_WEDGE: return pick_interinter_wedge(cpi, x, bsize, p0, p1);
Sarah Parkerddcea392017-04-25 15:57:22 -07007219 case COMPOUND_SEG: return pick_interinter_seg(cpi, x, bsize, p0, p1);
Sarah Parkerddcea392017-04-25 15:57:22 -07007220 default: assert(0); return 0;
7221 }
7222}
7223
David Barkerc155e012017-05-11 13:54:54 +01007224static int interinter_compound_motion_search(
7225 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
7226 const BLOCK_SIZE bsize, const int this_mode, int mi_row, int mi_col) {
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007227 MACROBLOCKD *const xd = &x->e_mbd;
Sarah Parker6fdc8532016-11-16 17:47:13 -08007228 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7229 int_mv tmp_mv[2];
David Barkerc155e012017-05-11 13:54:54 +01007230 int tmp_rate_mv = 0;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007231 const INTERINTER_COMPOUND_DATA compound_data = {
Debargha Mukherjee371968c2017-10-29 12:30:04 -07007232 mbmi->wedge_index, mbmi->wedge_sign, mbmi->mask_type, xd->seg_mask,
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007233 mbmi->interinter_compound_type
7234 };
Zoe Liu85b66462017-04-20 14:28:19 -07007235
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007236 if (this_mode == NEW_NEWMV) {
David Barkerc155e012017-05-11 13:54:54 +01007237 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
7238 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007239 mbmi->mv[0].as_int = tmp_mv[0].as_int;
7240 mbmi->mv[1].as_int = tmp_mv[1].as_int;
7241 } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
David Barkerc155e012017-05-11 13:54:54 +01007242 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
7243 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007244 mbmi->mv[0].as_int = tmp_mv[0].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007245 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
David Barkerc155e012017-05-11 13:54:54 +01007246 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
7247 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007248 mbmi->mv[1].as_int = tmp_mv[1].as_int;
7249 }
7250 return tmp_rate_mv;
7251}
7252
Sarah Parkerddcea392017-04-25 15:57:22 -07007253static int64_t build_and_cost_compound_type(
Sarah Parker569edda2016-12-14 14:57:38 -08007254 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
7255 const BLOCK_SIZE bsize, const int this_mode, int rs2, int rate_mv,
7256 BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, uint8_t **preds1,
7257 int *strides, int mi_row, int mi_col) {
Debargha Mukherjeead8be032017-05-09 15:28:45 -07007258 const AV1_COMMON *const cm = &cpi->common;
Sarah Parker569edda2016-12-14 14:57:38 -08007259 MACROBLOCKD *xd = &x->e_mbd;
7260 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7261 int rate_sum;
7262 int64_t dist_sum;
7263 int64_t best_rd_cur = INT64_MAX;
7264 int64_t rd = INT64_MAX;
7265 int tmp_skip_txfm_sb;
7266 int64_t tmp_skip_sse_sb;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007267 const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type;
Sarah Parker569edda2016-12-14 14:57:38 -08007268
Sarah Parkerddcea392017-04-25 15:57:22 -07007269 best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1);
Urvang Joshi70006e42017-06-14 16:08:55 -07007270 best_rd_cur += RDCOST(x->rdmult, rs2 + rate_mv, 0);
Sarah Parker569edda2016-12-14 14:57:38 -08007271
Sarah Parker2e604882017-01-17 17:31:25 -08007272 if (have_newmv_in_inter_mode(this_mode) &&
Sarah Parkerddcea392017-04-25 15:57:22 -07007273 use_masked_motion_search(compound_type)) {
David Barkerc155e012017-05-11 13:54:54 +01007274 *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
7275 this_mode, mi_row, mi_col);
Debargha Mukherjeead8be032017-05-09 15:28:45 -07007276 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007277 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
7278 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07007279 rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
Zoe Liu4d44f5a2016-12-14 17:46:19 -08007280 if (rd >= best_rd_cur) {
Sarah Parker6fdc8532016-11-16 17:47:13 -08007281 mbmi->mv[0].as_int = cur_mv[0].as_int;
7282 mbmi->mv[1].as_int = cur_mv[1].as_int;
7283 *out_rate_mv = rate_mv;
Sebastien Alaiwan0cf54d42017-10-16 16:10:04 +02007284 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
7285 preds1, strides);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007286 }
7287 av1_subtract_plane(x, bsize, 0);
7288 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7289 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7290 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07007291 rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007292 best_rd_cur = rd;
7293
7294 } else {
Sebastien Alaiwan0cf54d42017-10-16 16:10:04 +02007295 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
7296 preds1, strides);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007297 av1_subtract_plane(x, bsize, 0);
7298 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7299 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7300 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07007301 rd = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007302 best_rd_cur = rd;
7303 }
7304 return best_rd_cur;
7305}
Yaowu Xuc27fc142016-08-22 16:08:15 -07007306
Fergus Simpson073c6f32017-02-17 12:13:48 -08007307typedef struct {
Fergus Simpson073c6f32017-02-17 12:13:48 -08007308 // Inter prediction buffers and respective strides
7309 uint8_t *above_pred_buf[MAX_MB_PLANE];
7310 int above_pred_stride[MAX_MB_PLANE];
7311 uint8_t *left_pred_buf[MAX_MB_PLANE];
7312 int left_pred_stride[MAX_MB_PLANE];
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007313 int_mv *single_newmv;
Fergus Simpson073c6f32017-02-17 12:13:48 -08007314 // Pointer to array of motion vectors to use for each ref and their rates
7315 // Should point to first of 2 arrays in 2D array
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007316 int *single_newmv_rate;
Fergus Simpson073c6f32017-02-17 12:13:48 -08007317 // Pointer to array of predicted rate-distortion
7318 // Should point to first of 2 arrays in 2D array
7319 int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME];
Fergus Simpson3424c2d2017-03-09 11:48:15 -08007320 InterpFilter single_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
Fergus Simpson073c6f32017-02-17 12:13:48 -08007321} HandleInterModeArgs;
7322
Fergus Simpson45509632017-02-22 15:30:50 -08007323static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
7324 const BLOCK_SIZE bsize,
7325 int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
7326 const int mi_row, const int mi_col,
7327 int *const rate_mv, int_mv *const single_newmv,
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08007328 HandleInterModeArgs *const args) {
Fergus Simpson45509632017-02-22 15:30:50 -08007329 const MACROBLOCKD *const xd = &x->e_mbd;
7330 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7331 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
7332 const int is_comp_pred = has_second_ref(mbmi);
7333 const PREDICTION_MODE this_mode = mbmi->mode;
Fergus Simpson45509632017-02-22 15:30:50 -08007334 const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
Fergus Simpson45509632017-02-22 15:30:50 -08007335 int_mv *const frame_mv = mode_mv[this_mode];
7336 const int refs[2] = { mbmi->ref_frame[0],
7337 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
7338 int i;
7339
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08007340 (void)args;
Fergus Simpson45509632017-02-22 15:30:50 -08007341
7342 if (is_comp_pred) {
Fergus Simpson45509632017-02-22 15:30:50 -08007343 for (i = 0; i < 2; ++i) {
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007344 single_newmv[refs[i]].as_int = args->single_newmv[refs[i]].as_int;
Fergus Simpson45509632017-02-22 15:30:50 -08007345 }
7346
7347 if (this_mode == NEW_NEWMV) {
7348 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
7349 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
7350
7351 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007352 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL,
7353 0, rate_mv, 0);
Fergus Simpson45509632017-02-22 15:30:50 -08007354 } else {
7355 *rate_mv = 0;
7356 for (i = 0; i < 2; ++i) {
Zoe Liu122f3942017-04-25 11:18:38 -07007357 av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
Fergus Simpson45509632017-02-22 15:30:50 -08007358 *rate_mv += av1_mv_bit_cost(
7359 &frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
7360 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
7361 }
7362 }
7363 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
7364 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
David Barker8dd9b572017-05-12 16:31:38 +01007365 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
7366 frame_mv[refs[0]].as_int =
7367 mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007368 compound_single_motion_search_interinter(
7369 cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
David Barker8dd9b572017-05-12 16:31:38 +01007370 } else {
7371 av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
7372 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
7373 &mbmi_ext->ref_mvs[refs[1]][0].as_mv,
7374 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
7375 }
Fergus Simpson45509632017-02-22 15:30:50 -08007376 } else {
David Barkercb03dc32017-04-07 13:05:09 +01007377 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
Fergus Simpson45509632017-02-22 15:30:50 -08007378 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
David Barker8dd9b572017-05-12 16:31:38 +01007379 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
7380 frame_mv[refs[1]].as_int =
7381 mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007382 compound_single_motion_search_interinter(
7383 cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
David Barker8dd9b572017-05-12 16:31:38 +01007384 } else {
7385 av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
7386 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
7387 &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
7388 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
7389 }
Fergus Simpson45509632017-02-22 15:30:50 -08007390 }
Fergus Simpson45509632017-02-22 15:30:50 -08007391 } else {
Fergus Simpson45509632017-02-22 15:30:50 -08007392 if (is_comp_interintra_pred) {
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007393 x->best_mv = args->single_newmv[refs[0]];
7394 *rate_mv = args->single_newmv_rate[refs[0]];
Fergus Simpson45509632017-02-22 15:30:50 -08007395 } else {
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007396 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv);
7397 args->single_newmv[refs[0]] = x->best_mv;
7398 args->single_newmv_rate[refs[0]] = *rate_mv;
Fergus Simpson45509632017-02-22 15:30:50 -08007399 }
Fergus Simpson45509632017-02-22 15:30:50 -08007400
7401 if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
7402
7403 frame_mv[refs[0]] = x->best_mv;
7404 xd->mi[0]->bmi[0].as_mv[0] = x->best_mv;
7405
7406 // Estimate the rate implications of a new mv but discount this
7407 // under certain circumstances where we want to help initiate a weak
7408 // motion field, where the distortion gain for a single block may not
7409 // be enough to overcome the cost of a new mv.
7410 if (discount_newmv_test(cpi, this_mode, x->best_mv, mode_mv, refs[0])) {
7411 *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1);
7412 }
7413 }
7414
7415 return 0;
7416}
7417
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007418int64_t interpolation_filter_search(
7419 MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
7420 int mi_row, int mi_col, const BUFFER_SET *const tmp_dst,
7421 BUFFER_SET *const orig_dst,
7422 InterpFilter (*const single_filter)[TOTAL_REFS_PER_FRAME],
7423 int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb,
7424 int64_t *const skip_sse_sb) {
7425 const AV1_COMMON *cm = &cpi->common;
7426 MACROBLOCKD *const xd = &x->e_mbd;
7427 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7428 int i;
7429 int tmp_rate;
7430 int64_t tmp_dist;
7431
7432 (void)single_filter;
7433
7434 InterpFilter assign_filter = SWITCHABLE;
7435
7436 if (cm->interp_filter == SWITCHABLE) {
7437#if !CONFIG_DUAL_FILTER
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07007438 assign_filter = av1_is_interp_needed(xd)
7439 ? predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
7440 single_filter)
7441 : cm->interp_filter;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007442#endif // !CONFIG_DUAL_FILTER
7443 } else {
7444 assign_filter = cm->interp_filter;
7445 }
7446
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07007447 set_default_interp_filters(mbmi, assign_filter);
7448
Yue Chenb23d00a2017-07-28 17:01:21 -07007449 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
Jingning Hanc44009c2017-05-06 11:36:49 -07007450 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007451 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
7452 skip_txfm_sb, skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07007453 *rd = RDCOST(x->rdmult, *switchable_rate + tmp_rate, tmp_dist);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007454
7455 if (assign_filter == SWITCHABLE) {
7456 // do interp_filter search
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07007457 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd)) {
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007458#if CONFIG_DUAL_FILTER
7459 const int filter_set_size = DUAL_FILTER_SET_SIZE;
7460#else
7461 const int filter_set_size = SWITCHABLE_FILTERS;
7462#endif // CONFIG_DUAL_FILTER
7463 int best_in_temp = 0;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007464 InterpFilters best_filters = mbmi->interp_filters;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007465 restore_dst_buf(xd, *tmp_dst);
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007466
7467#if CONFIG_DUAL_FILTER // Speed feature use_fast_interpolation_filter_search
7468 if (cpi->sf.use_fast_interpolation_filter_search) {
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007469 int tmp_skip_sb = 0;
7470 int64_t tmp_skip_sse = INT64_MAX;
7471 int tmp_rs;
7472 int64_t tmp_rd;
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007473
7474 // default to (R,R): EIGHTTAP_REGULARxEIGHTTAP_REGULAR
7475 int best_dual_mode = 0;
7476 // Find best of {R}x{R,Sm,Sh}
7477 // EIGHTTAP_REGULAR mode is calculated beforehand
7478 for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
7479 tmp_skip_sb = 0;
7480 tmp_skip_sse = INT64_MAX;
7481
7482 mbmi->interp_filters =
7483 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
7484
7485 tmp_rs = av1_get_switchable_rate(cm, x, xd);
7486 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
7487 bsize);
7488 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7489 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7490 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
7491
7492 if (tmp_rd < *rd) {
7493 best_dual_mode = i;
7494
7495 *rd = tmp_rd;
7496 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
7497 best_filters = mbmi->interp_filters;
7498 *skip_txfm_sb = tmp_skip_sb;
7499 *skip_sse_sb = tmp_skip_sse;
7500 best_in_temp = !best_in_temp;
7501 if (best_in_temp) {
7502 restore_dst_buf(xd, *orig_dst);
7503 } else {
7504 restore_dst_buf(xd, *tmp_dst);
7505 }
7506 }
7507 }
7508
7509 // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes
7510 for (i = best_dual_mode + SWITCHABLE_FILTERS; i < filter_set_size;
7511 i += SWITCHABLE_FILTERS) {
7512 tmp_skip_sb = 0;
7513 tmp_skip_sse = INT64_MAX;
7514
7515 mbmi->interp_filters =
7516 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
7517
7518 tmp_rs = av1_get_switchable_rate(cm, x, xd);
7519 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
7520 bsize);
7521 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7522 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7523 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
7524
7525 if (tmp_rd < *rd) {
7526 *rd = tmp_rd;
7527 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
7528 best_filters = mbmi->interp_filters;
7529 *skip_txfm_sb = tmp_skip_sb;
7530 *skip_sse_sb = tmp_skip_sse;
7531 best_in_temp = !best_in_temp;
7532 if (best_in_temp) {
7533 restore_dst_buf(xd, *orig_dst);
7534 } else {
7535 restore_dst_buf(xd, *tmp_dst);
7536 }
7537 }
7538 }
7539 } else {
7540#endif // CONFIG_DUAL_FILTER Speed feature use_fast_interpolation_filter_search
7541 // EIGHTTAP_REGULAR mode is calculated beforehand
7542 for (i = 1; i < filter_set_size; ++i) {
7543 int tmp_skip_sb = 0;
7544 int64_t tmp_skip_sse = INT64_MAX;
7545 int tmp_rs;
7546 int64_t tmp_rd;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007547#if CONFIG_DUAL_FILTER
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007548 mbmi->interp_filters =
7549 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007550#else
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007551 mbmi->interp_filters = av1_broadcast_interp_filter((InterpFilter)i);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007552#endif // CONFIG_DUAL_FILTER
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007553 tmp_rs = av1_get_switchable_rate(cm, x, xd);
7554 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
7555 bsize);
7556 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7557 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7558 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007559
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007560 if (tmp_rd < *rd) {
7561 *rd = tmp_rd;
7562 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
7563 best_filters = mbmi->interp_filters;
7564 *skip_txfm_sb = tmp_skip_sb;
7565 *skip_sse_sb = tmp_skip_sse;
7566 best_in_temp = !best_in_temp;
7567 if (best_in_temp) {
7568 restore_dst_buf(xd, *orig_dst);
7569 } else {
7570 restore_dst_buf(xd, *tmp_dst);
7571 }
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007572 }
7573 }
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007574#if CONFIG_DUAL_FILTER // Speed feature use_fast_interpolation_filter_search
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007575 }
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007576#endif // CONFIG_DUAL_FILTER Speed feature use_fast_interpolation_filter_search
7577
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007578 if (best_in_temp) {
7579 restore_dst_buf(xd, *tmp_dst);
7580 } else {
7581 restore_dst_buf(xd, *orig_dst);
7582 }
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007583 mbmi->interp_filters = best_filters;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007584 } else {
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007585 assert(mbmi->interp_filters ==
7586 av1_broadcast_interp_filter(EIGHTTAP_REGULAR));
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007587 }
7588 }
7589
7590 return 0;
7591}
7592
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007593#if CONFIG_DUAL_FILTER
7594static InterpFilters condition_interp_filters_on_mv(
7595 InterpFilters interp_filters, const MACROBLOCKD *xd) {
7596 InterpFilter filters[2];
7597 for (int i = 0; i < 2; ++i)
7598 filters[i] = (has_subpel_mv_component(xd->mi[0], xd, i))
7599 ? av1_extract_interp_filter(interp_filters, i)
7600 : EIGHTTAP_REGULAR;
7601
7602 return av1_make_interp_filters(filters[0], filters[1]);
7603}
7604#endif
7605
Yunqing Wang876a8b02017-11-13 17:13:27 -08007606#if CONFIG_EXT_WARPED_MOTION
7607static int handle_zero_mv(const AV1_COMMON *const cm, MACROBLOCK *const x,
7608 BLOCK_SIZE bsize, int mi_col, int mi_row) {
7609 MACROBLOCKD *xd = &x->e_mbd;
7610 MODE_INFO *mi = xd->mi[0];
7611 MB_MODE_INFO *mbmi = &mi->mbmi;
7612 int skip = 0;
7613
7614 // Handle the special case of 0 MV.
7615 if (mbmi->ref_frame[0] > INTRA_FRAME && mbmi->ref_frame[1] <= INTRA_FRAME) {
7616 int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
7617 int16_t mode_ctx = x->mbmi_ext->mode_context[ref_frame_type];
7618 if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
7619 int_mv zeromv;
7620 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[0];
7621 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ref],
7622 cm->allow_high_precision_mv, bsize,
7623 mi_col, mi_row, 0
7624#if CONFIG_AMVR
7625 ,
RogerZhoud904a352017-11-22 15:01:30 -08007626 cm->cur_frame_force_integer_mv
Yunqing Wang876a8b02017-11-13 17:13:27 -08007627#endif
7628 )
7629 .as_int;
7630 if (mbmi->mv[0].as_int == zeromv.as_int && mbmi->mode != GLOBALMV) {
7631 skip = 1;
7632 }
7633 }
7634 }
7635 return skip;
7636}
7637#endif // CONFIG_EXT_WARPED_MOTION
7638
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007639// TODO(afergs): Refactor the MBMI references in here - there's four
7640// TODO(afergs): Refactor optional args - add them to a struct or remove
7641static int64_t motion_mode_rd(
7642 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
7643 RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
7644 int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
7645 int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
7646 const int *refs, int rate_mv,
Wei-Ting Lin85a8f702017-06-22 13:55:15 -07007647 // only used when WARPED_MOTION is on?
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02007648 int_mv *const single_newmv, int rate2_bmc_nocoeff,
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01007649 MB_MODE_INFO *best_bmc_mbmi, int rate_mv_bmc, int rs, int *skip_txfm_sb,
7650 int64_t *skip_sse_sb, BUFFER_SET *orig_dst) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007651 const AV1_COMMON *const cm = &cpi->common;
7652 MACROBLOCKD *xd = &x->e_mbd;
7653 MODE_INFO *mi = xd->mi[0];
7654 MB_MODE_INFO *mbmi = &mi->mbmi;
7655 const int is_comp_pred = has_second_ref(mbmi);
7656 const PREDICTION_MODE this_mode = mbmi->mode;
7657
7658 (void)mode_mv;
7659 (void)mi_row;
7660 (void)mi_col;
7661 (void)args;
7662 (void)refs;
7663 (void)rate_mv;
7664 (void)is_comp_pred;
7665 (void)this_mode;
7666
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007667 MOTION_MODE motion_mode, last_motion_mode_allowed;
7668 int rate2_nocoeff = 0, best_xskip, best_disable_skip = 0;
7669 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
7670 MB_MODE_INFO base_mbmi, best_mbmi;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007671 uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007672
Yunqing Wang97d6a372017-10-09 14:15:15 -07007673#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -07007674 int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
Yunqing Wang876a8b02017-11-13 17:13:27 -08007675 int pts_mv0[SAMPLES_ARRAY_SIZE], pts_wm[SAMPLES_ARRAY_SIZE];
Yunqing Wang1bc82862017-06-28 15:49:48 -07007676 int total_samples;
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007677 int best_cand = -1;
Yunqing Wang1bc82862017-06-28 15:49:48 -07007678#else
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007679 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
Yunqing Wang97d6a372017-10-09 14:15:15 -07007680#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007681
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007682 av1_invalid_rd_stats(&best_rd_stats);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007683
7684 if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007685 aom_clear_system_state();
Yunqing Wang97d6a372017-10-09 14:15:15 -07007686#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -07007687 mbmi->num_proj_ref[0] =
Yunqing Wang876a8b02017-11-13 17:13:27 -08007688 findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0, pts_wm);
Yunqing Wang1bc82862017-06-28 15:49:48 -07007689 total_samples = mbmi->num_proj_ref[0];
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007690
7691 // Find a warped neighbor.
7692 int cand;
7693 int best_weight = 0;
7694
7695 // if (this_mode == NEARESTMV)
7696 for (cand = 0; cand < mbmi->num_proj_ref[0]; cand++) {
7697 if (pts_wm[cand * 2 + 1] > best_weight) {
7698 best_weight = pts_wm[cand * 2 + 1];
7699 best_cand = cand;
7700 }
7701 }
7702 mbmi->wm_ctx = best_cand;
7703 best_bmc_mbmi->wm_ctx = mbmi->wm_ctx;
Yunqing Wang1bc82862017-06-28 15:49:48 -07007704#else
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007705 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
Yunqing Wang97d6a372017-10-09 14:15:15 -07007706#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007707 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007708 rate2_nocoeff = rd_stats->rate;
Sebastien Alaiwan1f56b8e2017-10-31 17:37:16 +01007709 last_motion_mode_allowed = motion_mode_allowed(0, xd->global_motion, xd, mi);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007710 base_mbmi = *mbmi;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007711
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007712 int64_t best_rd = INT64_MAX;
7713 for (motion_mode = SIMPLE_TRANSLATION;
7714 motion_mode <= last_motion_mode_allowed; motion_mode++) {
7715 int64_t tmp_rd = INT64_MAX;
7716 int tmp_rate;
7717 int64_t tmp_dist;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007718 int tmp_rate2 =
7719 motion_mode != SIMPLE_TRANSLATION ? rate2_bmc_nocoeff : rate2_nocoeff;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007720
7721 *mbmi = base_mbmi;
7722 mbmi->motion_mode = motion_mode;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007723 if (mbmi->motion_mode == OBMC_CAUSAL) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007724 *mbmi = *best_bmc_mbmi;
7725 mbmi->motion_mode = OBMC_CAUSAL;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007726 if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007727 int tmp_rate_mv = 0;
7728
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02007729 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, &tmp_rate_mv);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007730 mbmi->mv[0].as_int = x->best_mv.as_int;
7731 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7732 refs[0])) {
7733 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
7734 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007735 tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007736#if CONFIG_DUAL_FILTER
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007737 mbmi->interp_filters =
7738 condition_interp_filters_on_mv(mbmi->interp_filters, xd);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007739#endif // CONFIG_DUAL_FILTER
Jingning Hanc44009c2017-05-06 11:36:49 -07007740 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007741 } else {
Jingning Hanc44009c2017-05-06 11:36:49 -07007742 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007743 }
7744 av1_build_obmc_inter_prediction(
7745 cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
7746 args->left_pred_buf, args->left_pred_stride);
7747 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7748 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7749 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007750
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007751 if (mbmi->motion_mode == WARPED_CAUSAL) {
Yunqing Wang97d6a372017-10-09 14:15:15 -07007752#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -07007753 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
Yunqing Wang97d6a372017-10-09 14:15:15 -07007754#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007755 *mbmi = *best_bmc_mbmi;
7756 mbmi->motion_mode = WARPED_CAUSAL;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007757 mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007758 mbmi->interp_filters = av1_broadcast_interp_filter(
7759 av1_unswitchable_filter(cm->interp_filter));
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007760
Yunqing Wang97d6a372017-10-09 14:15:15 -07007761#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007762 if (this_mode == NEARESTMV && best_cand != -1) {
Yunqing Wang876a8b02017-11-13 17:13:27 -08007763 MODE_INFO *best_mi = xd->mi[pts_wm[2 * best_cand]];
7764 assert(best_mi->mbmi.motion_mode == WARPED_CAUSAL);
7765 mbmi->wm_params[0] = best_mi->mbmi.wm_params[0];
7766
7767 // Handle the special case of 0 MV.
7768 if (handle_zero_mv(cm, x, bsize, mi_col, mi_row)) continue;
Yunqing Wang68f3ccd2017-05-23 14:43:54 -07007769
Jingning Hanc44009c2017-05-06 11:36:49 -07007770 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007771 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7772 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7773 } else {
Yunqing Wang876a8b02017-11-13 17:13:27 -08007774 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
7775 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
7776 // Rank the samples by motion vector difference
7777 if (mbmi->num_proj_ref[0] > 1) {
7778 mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts,
7779 pts_inref, mbmi->num_proj_ref[0]);
7780 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
7781 }
7782#endif // CONFIG_EXT_WARPED_MOTION
7783
7784 if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
7785 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
7786 &mbmi->wm_params[0], mi_row, mi_col)) {
7787 // Refine MV for NEWMV mode
7788 if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
7789 int tmp_rate_mv = 0;
7790 const int_mv mv0 = mbmi->mv[0];
Luc Trudeauf3bf8b12017-12-08 14:38:41 -05007791 const WarpedMotionParams wm_params0 = mbmi->wm_params[0];
Yunqing Wang876a8b02017-11-13 17:13:27 -08007792#if CONFIG_EXT_WARPED_MOTION
7793 int num_proj_ref0 = mbmi->num_proj_ref[0];
7794
7795 // Refine MV in a small range.
7796 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0,
7797 pts_inref0, pts_mv0, total_samples);
7798#else
7799 // Refine MV in a small range.
7800 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
7801#endif // CONFIG_EXT_WARPED_MOTION
7802
7803 // Keep the refined MV and WM parameters.
7804 if (mv0.as_int != mbmi->mv[0].as_int) {
7805 const int ref = refs[0];
7806 const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
7807
7808 tmp_rate_mv =
7809 av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv, x->nmvjointcost,
7810 x->mvcost, MV_COST_WEIGHT);
7811
7812 if (cpi->sf.adaptive_motion_search)
7813 x->pred_mv[ref] = mbmi->mv[0].as_mv;
7814
7815 single_newmv[ref] = mbmi->mv[0];
7816
7817 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7818 refs[0])) {
7819 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
7820 }
7821#if CONFIG_EXT_WARPED_MOTION
7822 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
7823#endif // CONFIG_EXT_WARPED_MOTION
7824 tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
7825#if CONFIG_DUAL_FILTER
7826 mbmi->interp_filters =
7827 condition_interp_filters_on_mv(mbmi->interp_filters, xd);
7828#endif // CONFIG_DUAL_FILTER
7829 } else {
7830 // Restore the old MV and WM parameters.
7831 mbmi->mv[0] = mv0;
7832 mbmi->wm_params[0] = wm_params0;
7833#if CONFIG_EXT_WARPED_MOTION
7834 mbmi->num_proj_ref[0] = num_proj_ref0;
7835#endif // CONFIG_EXT_WARPED_MOTION
7836 }
7837 }
7838
7839#if CONFIG_EXT_WARPED_MOTION
7840 // Handle the special case of 0 MV.
7841 if (handle_zero_mv(cm, x, bsize, mi_col, mi_row)) continue;
7842#endif // CONFIG_EXT_WARPED_MOTION
7843
7844 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
7845 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7846 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7847 } else {
7848 continue;
7849 }
7850#if CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007851 }
Yunqing Wang876a8b02017-11-13 17:13:27 -08007852#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007853 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007854 x->skip = 0;
7855
7856 rd_stats->dist = 0;
7857 rd_stats->sse = 0;
7858 rd_stats->skip = 1;
7859 rd_stats->rate = tmp_rate2;
7860 if (last_motion_mode_allowed > SIMPLE_TRANSLATION) {
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007861 if (last_motion_mode_allowed == WARPED_CAUSAL) {
7862#if CONFIG_EXT_WARPED_MOTION
7863 int wm_ctx = 0;
7864 if (mbmi->wm_ctx != -1) {
7865 wm_ctx = 1;
7866 if (mbmi->mode == NEARESTMV) wm_ctx = 2;
7867 }
7868
7869 rd_stats->rate += x->motion_mode_cost[wm_ctx][bsize][mbmi->motion_mode];
7870#else
Yue Chenbdc8dab2017-07-26 12:05:47 -07007871 rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode];
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007872#endif // CONFIG_EXT_WARPED_MOTION
7873 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07007874 rd_stats->rate += x->motion_mode_cost1[bsize][mbmi->motion_mode];
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007875 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007876 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007877 if (mbmi->motion_mode == WARPED_CAUSAL) {
7878 rd_stats->rate -= rs;
7879 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007880 if (!*skip_txfm_sb) {
7881 int64_t rdcosty = INT64_MAX;
7882 int is_cost_valid_uv = 0;
7883
7884 // cost and distortion
7885 av1_subtract_plane(x, bsize, 0);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007886 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07007887 // Motion mode
Yue Chen25dc0702017-10-18 23:36:06 -07007888 select_tx_type_yrd(cpi, x, rd_stats_y, bsize, mi_row, mi_col,
7889 ref_best_rd);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007890 } else {
7891 int idx, idy;
7892 super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
7893 for (idy = 0; idy < xd->n8_h; ++idy)
7894 for (idx = 0; idx < xd->n8_w; ++idx)
7895 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
7896 memset(x->blk_skip[0], rd_stats_y->skip,
7897 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
7898 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007899
7900 if (rd_stats_y->rate == INT_MAX) {
7901 av1_invalid_rd_stats(rd_stats);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007902 if (mbmi->motion_mode != SIMPLE_TRANSLATION) {
7903 continue;
7904 } else {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007905 restore_dst_buf(xd, *orig_dst);
7906 return INT64_MAX;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007907 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007908 }
7909
7910 av1_merge_rd_stats(rd_stats, rd_stats_y);
7911
Urvang Joshi70006e42017-06-14 16:08:55 -07007912 rdcosty = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
7913 rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, 0, rd_stats->sse));
Sebastien Alaiwanfb838772017-10-24 12:02:54 +02007914 /* clang-format off */
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007915 is_cost_valid_uv =
Debargha Mukherjee51666862017-10-24 14:29:13 -07007916 inter_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty,
7917 0);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007918 if (!is_cost_valid_uv) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007919 continue;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007920 }
7921 /* clang-format on */
7922 av1_merge_rd_stats(rd_stats, rd_stats_uv);
7923#if CONFIG_RD_DEBUG
7924 // record transform block coefficient cost
7925 // TODO(angiebird): So far rd_debug tool only detects discrepancy of
7926 // coefficient cost. Therefore, it is fine to copy rd_stats into mbmi
7927 // here because we already collect the coefficient cost. Move this part to
7928 // other place when we need to compare non-coefficient cost.
7929 mbmi->rd_stats = *rd_stats;
7930#endif // CONFIG_RD_DEBUG
Zoe Liu1eed2df2017-10-16 17:13:15 -07007931 const int skip_ctx = av1_get_skip_context(xd);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007932 if (rd_stats->skip) {
7933 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
7934 rd_stats_y->rate = 0;
7935 rd_stats_uv->rate = 0;
Zoe Liu1eed2df2017-10-16 17:13:15 -07007936 rd_stats->rate += x->skip_cost[skip_ctx][1];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007937 mbmi->skip = 0;
7938 // here mbmi->skip temporarily plays a role as what this_skip2 does
7939 } else if (!xd->lossless[mbmi->segment_id] &&
Urvang Joshi70006e42017-06-14 16:08:55 -07007940 (RDCOST(x->rdmult,
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007941 rd_stats_y->rate + rd_stats_uv->rate +
Zoe Liu1eed2df2017-10-16 17:13:15 -07007942 x->skip_cost[skip_ctx][0],
7943 rd_stats->dist) >= RDCOST(x->rdmult,
7944 x->skip_cost[skip_ctx][1],
7945 rd_stats->sse))) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007946 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
Zoe Liu1eed2df2017-10-16 17:13:15 -07007947 rd_stats->rate += x->skip_cost[skip_ctx][1];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007948 rd_stats->dist = rd_stats->sse;
7949 rd_stats_y->rate = 0;
7950 rd_stats_uv->rate = 0;
7951 mbmi->skip = 1;
7952 } else {
Zoe Liu1eed2df2017-10-16 17:13:15 -07007953 rd_stats->rate += x->skip_cost[skip_ctx][0];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007954 mbmi->skip = 0;
7955 }
7956 *disable_skip = 0;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007957 } else {
7958 x->skip = 1;
7959 *disable_skip = 1;
7960 mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
7961
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01007962 // The cost of skip bit needs to be added.
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007963 mbmi->skip = 0;
Zoe Liu1eed2df2017-10-16 17:13:15 -07007964 rd_stats->rate += x->skip_cost[av1_get_skip_context(xd)][1];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007965
7966 rd_stats->dist = *skip_sse_sb;
7967 rd_stats->sse = *skip_sse_sb;
7968 rd_stats_y->rate = 0;
7969 rd_stats_uv->rate = 0;
7970 rd_stats->skip = 1;
7971 }
7972
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07007973 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007974 if (is_nontrans_global_motion(xd)) {
7975 rd_stats->rate -= rs;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007976 mbmi->interp_filters = av1_broadcast_interp_filter(
7977 av1_unswitchable_filter(cm->interp_filter));
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007978 }
7979 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007980
Urvang Joshi70006e42017-06-14 16:08:55 -07007981 tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007982 if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) {
7983 best_mbmi = *mbmi;
7984 best_rd = tmp_rd;
7985 best_rd_stats = *rd_stats;
7986 best_rd_stats_y = *rd_stats_y;
7987 best_rd_stats_uv = *rd_stats_uv;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007988 for (int i = 0; i < MAX_MB_PLANE; ++i)
7989 memcpy(best_blk_skip[i], x->blk_skip[i],
7990 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007991 best_xskip = x->skip;
7992 best_disable_skip = *disable_skip;
7993 }
7994 }
7995
7996 if (best_rd == INT64_MAX) {
7997 av1_invalid_rd_stats(rd_stats);
7998 restore_dst_buf(xd, *orig_dst);
7999 return INT64_MAX;
8000 }
8001 *mbmi = best_mbmi;
8002 *rd_stats = best_rd_stats;
8003 *rd_stats_y = best_rd_stats_y;
8004 *rd_stats_uv = best_rd_stats_uv;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008005 for (int i = 0; i < MAX_MB_PLANE; ++i)
8006 memcpy(x->blk_skip[i], best_blk_skip[i],
8007 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008008 x->skip = best_xskip;
8009 *disable_skip = best_disable_skip;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008010
8011 restore_dst_buf(xd, *orig_dst);
8012 return 0;
8013}
8014
Zoe Liuf40a9572017-10-13 12:37:19 -07008015#if CONFIG_EXT_SKIP
8016static int64_t skip_mode_rd(const AV1_COMP *const cpi, MACROBLOCK *const x,
8017 BLOCK_SIZE bsize, int mi_row, int mi_col,
8018 BUFFER_SET *const orig_dst) {
8019 const AV1_COMMON *cm = &cpi->common;
8020 MACROBLOCKD *const xd = &x->e_mbd;
8021 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8022
Zoe Liuf40a9572017-10-13 12:37:19 -07008023 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
8024
8025 int64_t total_sse = 0;
Zoe Liuf40a9572017-10-13 12:37:19 -07008026 for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
8027 const struct macroblock_plane *const p = &x->plane[plane];
8028 const struct macroblockd_plane *const pd = &xd->plane[plane];
8029 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
8030 const int bw = block_size_wide[plane_bsize];
8031 const int bh = block_size_high[plane_bsize];
8032
8033 av1_subtract_plane(x, bsize, plane);
8034 int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh);
8035 sse = sse << 4;
8036 total_sse += sse;
8037 }
8038 x->skip_mode_dist = x->skip_mode_sse = total_sse;
8039 x->skip_mode_rate = 0;
8040 x->skip_mode_rdcost = RDCOST(x->rdmult, x->skip_mode_rate, x->skip_mode_dist);
8041
8042 // Save the ref frames / motion vectors
8043 x->skip_mode_ref_frame[0] = mbmi->ref_frame[0];
8044 x->skip_mode_ref_frame[1] = mbmi->ref_frame[1];
8045 x->skip_mode_mv[0].as_int = mbmi->mv[0].as_int;
8046 x->skip_mode_mv[1].as_int = mbmi->mv[1].as_int;
8047
8048 // Save the mode index
8049 x->skip_mode_index = x->skip_mode_index_candidate;
8050
8051 restore_dst_buf(xd, *orig_dst);
8052 return 0;
8053}
8054#endif // CONFIG_EXT_SKIP
8055
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008056static int64_t handle_inter_mode(
8057 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
8058 RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
8059 int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
8060 int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) {
Urvang Joshi52648442016-10-13 17:27:51 -07008061 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008062 MACROBLOCKD *xd = &x->e_mbd;
Sarah Parker19234cc2017-03-10 16:43:25 -08008063 MODE_INFO *mi = xd->mi[0];
8064 MB_MODE_INFO *mbmi = &mi->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008065 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8066 const int is_comp_pred = has_second_ref(mbmi);
8067 const int this_mode = mbmi->mode;
8068 int_mv *frame_mv = mode_mv[this_mode];
8069 int i;
8070 int refs[2] = { mbmi->ref_frame[0],
8071 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
8072 int_mv cur_mv[2];
8073 int rate_mv = 0;
Angie Chiang75c22092016-10-25 12:19:16 -07008074 int pred_exists = 1;
Jingning Hanae5cfde2016-11-30 12:01:44 -08008075 const int bw = block_size_wide[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008076 int_mv single_newmv[TOTAL_REFS_PER_FRAME];
Yue Chenb23d00a2017-07-28 17:01:21 -07008077 const int *const interintra_mode_cost =
8078 x->interintra_mode_cost[size_group_lookup[bsize]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008079 const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
Yaowu Xuf883b422016-08-30 14:01:10 -07008080 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008081#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008082 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
8083#else
8084 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[MAX_MB_PLANE * MAX_SB_SQUARE]);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008085#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008086 uint8_t *tmp_buf;
8087
Yaowu Xuc27fc142016-08-22 16:08:15 -07008088 int rate2_bmc_nocoeff;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008089 MB_MODE_INFO best_bmc_mbmi;
Yue Chen69f18e12016-09-08 14:48:15 -07008090 int rate_mv_bmc;
Angie Chiang75c22092016-10-25 12:19:16 -07008091 int64_t rd = INT64_MAX;
David Barkerac37fa32016-12-02 12:30:21 +00008092 BUFFER_SET orig_dst, tmp_dst;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008093 int rs = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008094
8095 int skip_txfm_sb = 0;
8096 int64_t skip_sse_sb = INT64_MAX;
Yaowu Xub0d0d002016-11-22 09:26:43 -08008097 int16_t mode_ctx;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008098
Yue Chen5e606542017-05-24 17:03:17 -07008099 int compmode_interintra_cost = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008100 mbmi->use_wedge_interintra = 0;
Yue Chen5e606542017-05-24 17:03:17 -07008101 int compmode_interinter_cost = 0;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008102 mbmi->interinter_compound_type = COMPOUND_AVERAGE;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008103
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07008104 if (!cm->allow_interintra_compound && is_comp_interintra_pred)
8105 return INT64_MAX;
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07008106
Yaowu Xuc27fc142016-08-22 16:08:15 -07008107 // is_comp_interintra_pred implies !is_comp_pred
8108 assert(!is_comp_interintra_pred || (!is_comp_pred));
8109 // is_comp_interintra_pred implies is_interintra_allowed(mbmi->sb_type)
8110 assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07008111
Yaowu Xuc27fc142016-08-22 16:08:15 -07008112 if (is_comp_pred)
8113 mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
8114 else
Yaowu Xuf883b422016-08-30 14:01:10 -07008115 mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
8116 mbmi->ref_frame, bsize, -1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008117
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008118#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008119 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
8120 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf_);
8121 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008122#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008123 tmp_buf = tmp_buf_;
David Barkerb8069f92016-11-18 14:49:56 +00008124 // Make sure that we didn't leave the plane destination buffers set
8125 // to tmp_buf at the end of the last iteration
8126 assert(xd->plane[0].dst.buf != tmp_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008127
Yue Chen69f18e12016-09-08 14:48:15 -07008128 mbmi->num_proj_ref[0] = 0;
8129 mbmi->num_proj_ref[1] = 0;
Yue Chen69f18e12016-09-08 14:48:15 -07008130
Yaowu Xuc27fc142016-08-22 16:08:15 -07008131 if (is_comp_pred) {
8132 if (frame_mv[refs[0]].as_int == INVALID_MV ||
8133 frame_mv[refs[1]].as_int == INVALID_MV)
8134 return INT64_MAX;
8135 }
8136
Yue Chene9638cc2016-10-10 12:37:54 -07008137 mbmi->motion_mode = SIMPLE_TRANSLATION;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008138 if (have_newmv_in_inter_mode(this_mode)) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008139 const int64_t ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col,
8140 &rate_mv, single_newmv, args);
Fergus Simpson45509632017-02-22 15:30:50 -08008141 if (ret_val != 0)
8142 return ret_val;
8143 else
8144 rd_stats->rate += rate_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008145 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008146 for (i = 0; i < is_comp_pred + 1; ++i) {
8147 cur_mv[i] = frame_mv[refs[i]];
Zoe Liued29ea22017-04-20 16:48:15 -07008148 // Clip "next_nearest" so that it does not extend to far out of image
8149 if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008150 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008151 mbmi->mv[i].as_int = cur_mv[i].as_int;
8152 }
8153
Cheng Chen0a7f2f52017-10-10 15:16:09 -07008154#if CONFIG_JNT_COMP
Cheng Chenb09e55c2017-11-10 12:09:19 -08008155 if (is_comp_pred) {
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008156 if (mbmi->compound_idx == 0) {
Cheng Chen5a881722017-11-30 17:05:10 -08008157 int masked_compound_used = is_any_masked_compound_used(bsize);
8158 masked_compound_used = masked_compound_used && cm->allow_masked_compound;
8159
8160 if (masked_compound_used) {
8161 const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
8162 rd_stats->rate += x->comp_group_idx_cost[comp_group_idx_ctx][0];
8163 }
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008164
8165 const int comp_index_ctx = get_comp_index_context(cm, xd);
Cheng Chen5a881722017-11-30 17:05:10 -08008166 rd_stats->rate += x->comp_idx_cost[comp_index_ctx][mbmi->compound_idx];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008167 }
Cheng Chen0a7f2f52017-10-10 15:16:09 -07008168 }
8169#endif // CONFIG_JNT_COMP
8170
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02008171 if (this_mode == NEAREST_NEARESTMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008172 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
8173 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
8174 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
8175
8176 for (i = 0; i < 2; ++i) {
8177 clamp_mv2(&cur_mv[i].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008178 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008179 mbmi->mv[i].as_int = cur_mv[i].as_int;
8180 }
8181 }
8182 }
8183
Yaowu Xuc27fc142016-08-22 16:08:15 -07008184 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008185 if (this_mode == NEAREST_NEWMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008186 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
8187
RogerZhou3b635242017-09-19 10:06:46 -07008188#if CONFIG_AMVR
8189 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
RogerZhou10a03802017-10-26 11:49:48 -07008190 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008191#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008192 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008193#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07008194 clamp_mv2(&cur_mv[0].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008195 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008196 mbmi->mv[0].as_int = cur_mv[0].as_int;
8197 }
8198
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -07008199 if (this_mode == NEW_NEARESTMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008200 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
8201
RogerZhou3b635242017-09-19 10:06:46 -07008202#if CONFIG_AMVR
8203 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
RogerZhou10a03802017-10-26 11:49:48 -07008204 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008205#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008206 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008207#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07008208 clamp_mv2(&cur_mv[1].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008209 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008210 mbmi->mv[1].as_int = cur_mv[1].as_int;
8211 }
8212 }
8213
8214 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
David Barker404b2e82017-03-27 13:07:47 +01008215 int ref_mv_idx = mbmi->ref_mv_idx + 1;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008216 if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) {
David Barker404b2e82017-03-27 13:07:47 +01008217 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008218
RogerZhou3b635242017-09-19 10:06:46 -07008219#if CONFIG_AMVR
8220 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
RogerZhou10a03802017-10-26 11:49:48 -07008221 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008222#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008223 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008224#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07008225 clamp_mv2(&cur_mv[0].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008226 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008227 mbmi->mv[0].as_int = cur_mv[0].as_int;
8228 }
8229
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008230 if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) {
8231 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008232
RogerZhou3b635242017-09-19 10:06:46 -07008233#if CONFIG_AMVR
8234 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
RogerZhou10a03802017-10-26 11:49:48 -07008235 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008236#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008237 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008238#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07008239 clamp_mv2(&cur_mv[1].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008240 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008241 mbmi->mv[1].as_int = cur_mv[1].as_int;
8242 }
8243 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008244
8245 // do first prediction into the destination buffer. Do the next
8246 // prediction into a temporary buffer. Then keep track of which one
8247 // of these currently holds the best predictor, and use the other
8248 // one for future predictions. In the end, copy from tmp_buf to
8249 // dst if necessary.
8250 for (i = 0; i < MAX_MB_PLANE; i++) {
David Barkerac37fa32016-12-02 12:30:21 +00008251 tmp_dst.plane[i] = tmp_buf + i * MAX_SB_SQUARE;
8252 tmp_dst.stride[i] = MAX_SB_SIZE;
Angie Chiang75c22092016-10-25 12:19:16 -07008253 }
8254 for (i = 0; i < MAX_MB_PLANE; i++) {
David Barkerac37fa32016-12-02 12:30:21 +00008255 orig_dst.plane[i] = xd->plane[i].dst.buf;
8256 orig_dst.stride[i] = xd->plane[i].dst.stride;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008257 }
8258
8259 // We don't include the cost of the second reference here, because there
8260 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
8261 // words if you present them in that order, the second one is always known
8262 // if the first is known.
8263 //
8264 // Under some circumstances we discount the cost of new mv mode to encourage
8265 // initiation of a motion field.
8266 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
8267 refs[0])) {
Yue Chenb23d00a2017-07-28 17:01:21 -07008268 rd_stats->rate += AOMMIN(
8269 cost_mv_ref(x, this_mode, mode_ctx),
8270 cost_mv_ref(x, is_comp_pred ? NEAREST_NEARESTMV : NEARESTMV, mode_ctx));
Yaowu Xuc27fc142016-08-22 16:08:15 -07008271 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07008272 rd_stats->rate += cost_mv_ref(x, this_mode, mode_ctx);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008273 }
8274
Urvang Joshi70006e42017-06-14 16:08:55 -07008275 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02008276 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV)
Yaowu Xuc27fc142016-08-22 16:08:15 -07008277 return INT64_MAX;
8278
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08008279 int64_t ret_val = interpolation_filter_search(
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08008280 x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst, args->single_filter,
8281 &rd, &rs, &skip_txfm_sb, &skip_sse_sb);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08008282 if (ret_val != 0) return ret_val;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008283
Yaowu Xuc27fc142016-08-22 16:08:15 -07008284 best_bmc_mbmi = *mbmi;
Angie Chiang76159122016-11-09 12:13:22 -08008285 rate2_bmc_nocoeff = rd_stats->rate;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008286 if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs;
Yue Chen69f18e12016-09-08 14:48:15 -07008287 rate_mv_bmc = rate_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008288
Cheng Chen3392c432017-10-23 15:58:23 -07008289#if CONFIG_JNT_COMP
8290 if (is_comp_pred && mbmi->compound_idx)
8291#else
Yushin Cho67dda512017-07-25 14:58:22 -07008292 if (is_comp_pred)
Cheng Chen3392c432017-10-23 15:58:23 -07008293#endif // CONFIG_JNT_COMP
Yushin Cho67dda512017-07-25 14:58:22 -07008294 {
Urvang Joshi368fbc92016-10-17 16:31:34 -07008295 int rate_sum, rs2;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008296 int64_t dist_sum;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008297 int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX;
8298 INTERINTER_COMPOUND_DATA best_compound_data;
8299 int_mv best_mv[2];
8300 int best_tmp_rate_mv = rate_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008301 int tmp_skip_txfm_sb;
8302 int64_t tmp_skip_sse_sb;
Yaowu Xu5e8007f2017-06-28 12:39:18 -07008303 DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]);
8304 DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]);
Sarah Parker6fdc8532016-11-16 17:47:13 -08008305 uint8_t *preds0[1] = { pred0 };
8306 uint8_t *preds1[1] = { pred1 };
8307 int strides[1] = { bw };
Sarah Parker2e604882017-01-17 17:31:25 -08008308 int tmp_rate_mv;
Sarah Parker42d96102017-01-31 21:05:27 -08008309 int masked_compound_used = is_any_masked_compound_used(bsize);
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07008310 masked_compound_used = masked_compound_used && cm->allow_masked_compound;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008311 COMPOUND_TYPE cur_type;
Yue Chena4245512017-08-31 11:58:08 -07008312 int best_compmode_interinter_cost = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008313
Sarah Parker6fdc8532016-11-16 17:47:13 -08008314 best_mv[0].as_int = cur_mv[0].as_int;
8315 best_mv[1].as_int = cur_mv[1].as_int;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008316 memset(&best_compound_data, 0, sizeof(best_compound_data));
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008317 uint8_t tmp_mask_buf[2 * MAX_SB_SQUARE];
8318 best_compound_data.seg_mask = tmp_mask_buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008319
Sarah Parker42d96102017-01-31 21:05:27 -08008320 if (masked_compound_used) {
Sarah Parker6fdc8532016-11-16 17:47:13 -08008321 // get inter predictors to use for masked compound modes
Yaowu Xuf883b422016-08-30 14:01:10 -07008322 av1_build_inter_predictors_for_planes_single_buf(
Yaowu Xuc27fc142016-08-22 16:08:15 -07008323 xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
Yaowu Xuf883b422016-08-30 14:01:10 -07008324 av1_build_inter_predictors_for_planes_single_buf(
Yaowu Xuc27fc142016-08-22 16:08:15 -07008325 xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
Sarah Parker6fdc8532016-11-16 17:47:13 -08008326 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008327
Sarah Parker6fdc8532016-11-16 17:47:13 -08008328 for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07008329 if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
Yue Chene2518252017-06-05 12:36:46 -07008330 if (!is_interinter_compound_used(cur_type, bsize)) continue;
Sarah Parker2e604882017-01-17 17:31:25 -08008331 tmp_rate_mv = rate_mv;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008332 best_rd_cur = INT64_MAX;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008333 mbmi->interinter_compound_type = cur_type;
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008334#if CONFIG_JNT_COMP
Cheng Chen5a881722017-11-30 17:05:10 -08008335 const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
8336 int masked_type_cost = 0;
8337 if (masked_compound_used) {
8338 if (cur_type == COMPOUND_AVERAGE) {
8339 masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][0];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008340
Cheng Chen5a881722017-11-30 17:05:10 -08008341 const int comp_index_ctx = get_comp_index_context(cm, xd);
8342 masked_type_cost += x->comp_idx_cost[comp_index_ctx][1];
8343 } else {
8344 masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][1];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008345
Cheng Chen5a881722017-11-30 17:05:10 -08008346 masked_type_cost +=
8347 x->compound_type_cost[bsize][mbmi->interinter_compound_type - 1];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008348 }
Cheng Chen5a881722017-11-30 17:05:10 -08008349 } else {
8350 const int comp_index_ctx = get_comp_index_context(cm, xd);
8351 masked_type_cost += x->comp_idx_cost[comp_index_ctx][1];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008352 }
Cheng Chen5a881722017-11-30 17:05:10 -08008353 rs2 = av1_cost_literal(get_interinter_compound_type_bits(
8354 bsize, mbmi->interinter_compound_type)) +
8355 masked_type_cost;
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008356#else
Sarah Parker680b9b12017-08-16 18:55:34 -07008357 int masked_type_cost = 0;
8358 if (masked_compound_used) {
Sarah Parker680b9b12017-08-16 18:55:34 -07008359 if (!is_interinter_compound_used(COMPOUND_WEDGE, bsize))
8360 masked_type_cost += av1_cost_literal(1);
8361 else
Sarah Parker680b9b12017-08-16 18:55:34 -07008362 masked_type_cost +=
Yue Chena4245512017-08-31 11:58:08 -07008363 x->compound_type_cost[bsize][mbmi->interinter_compound_type];
Sarah Parker680b9b12017-08-16 18:55:34 -07008364 }
Sarah Parker6fdc8532016-11-16 17:47:13 -08008365 rs2 = av1_cost_literal(get_interinter_compound_type_bits(
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008366 bsize, mbmi->interinter_compound_type)) +
Sarah Parker680b9b12017-08-16 18:55:34 -07008367 masked_type_cost;
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008368#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -07008369
Sarah Parker6fdc8532016-11-16 17:47:13 -08008370 switch (cur_type) {
8371 case COMPOUND_AVERAGE:
Jingning Hanc44009c2017-05-06 11:36:49 -07008372 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
8373 bsize);
Sarah Parker6fdc8532016-11-16 17:47:13 -08008374 av1_subtract_plane(x, bsize, 0);
8375 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8376 &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
8377 INT64_MAX);
8378 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07008379 best_rd_cur = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
Sarah Parker2e604882017-01-17 17:31:25 -08008380 best_rd_compound = best_rd_cur;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008381 break;
8382 case COMPOUND_WEDGE:
Sarah Parker6fdc8532016-11-16 17:47:13 -08008383 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
8384 best_rd_compound / 3 < ref_best_rd) {
Sarah Parkerddcea392017-04-25 15:57:22 -07008385 best_rd_cur = build_and_cost_compound_type(
David Barkerac37fa32016-12-02 12:30:21 +00008386 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
8387 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
Sarah Parker6fdc8532016-11-16 17:47:13 -08008388 }
8389 break;
Sarah Parker569edda2016-12-14 14:57:38 -08008390 case COMPOUND_SEG:
Sarah Parker569edda2016-12-14 14:57:38 -08008391 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
8392 best_rd_compound / 3 < ref_best_rd) {
Sarah Parkerddcea392017-04-25 15:57:22 -07008393 best_rd_cur = build_and_cost_compound_type(
Sarah Parker569edda2016-12-14 14:57:38 -08008394 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
8395 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
Sarah Parker569edda2016-12-14 14:57:38 -08008396 }
8397 break;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008398 default: assert(0); return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008399 }
Sarah Parker2e604882017-01-17 17:31:25 -08008400
8401 if (best_rd_cur < best_rd_compound) {
8402 best_rd_compound = best_rd_cur;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008403 best_compound_data.wedge_index = mbmi->wedge_index;
8404 best_compound_data.wedge_sign = mbmi->wedge_sign;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008405 best_compound_data.mask_type = mbmi->mask_type;
8406 memcpy(best_compound_data.seg_mask, xd->seg_mask,
8407 2 * MAX_SB_SQUARE * sizeof(uint8_t));
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008408 best_compound_data.interinter_compound_type =
8409 mbmi->interinter_compound_type;
Yue Chena4245512017-08-31 11:58:08 -07008410 best_compmode_interinter_cost = rs2;
Sarah Parker2e604882017-01-17 17:31:25 -08008411 if (have_newmv_in_inter_mode(this_mode)) {
8412 if (use_masked_motion_search(cur_type)) {
8413 best_tmp_rate_mv = tmp_rate_mv;
8414 best_mv[0].as_int = mbmi->mv[0].as_int;
8415 best_mv[1].as_int = mbmi->mv[1].as_int;
8416 } else {
8417 best_mv[0].as_int = cur_mv[0].as_int;
8418 best_mv[1].as_int = cur_mv[1].as_int;
8419 }
8420 }
8421 }
8422 // reset to original mvs for next iteration
8423 mbmi->mv[0].as_int = cur_mv[0].as_int;
8424 mbmi->mv[1].as_int = cur_mv[1].as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008425 }
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008426 mbmi->wedge_index = best_compound_data.wedge_index;
8427 mbmi->wedge_sign = best_compound_data.wedge_sign;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008428 mbmi->mask_type = best_compound_data.mask_type;
8429 memcpy(xd->seg_mask, best_compound_data.seg_mask,
8430 2 * MAX_SB_SQUARE * sizeof(uint8_t));
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008431 mbmi->interinter_compound_type =
8432 best_compound_data.interinter_compound_type;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008433 if (have_newmv_in_inter_mode(this_mode)) {
8434 mbmi->mv[0].as_int = best_mv[0].as_int;
8435 mbmi->mv[1].as_int = best_mv[1].as_int;
8436 xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
8437 xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008438 if (use_masked_motion_search(mbmi->interinter_compound_type)) {
Sarah Parker6fdc8532016-11-16 17:47:13 -08008439 rd_stats->rate += best_tmp_rate_mv - rate_mv;
8440 rate_mv = best_tmp_rate_mv;
8441 }
8442 }
8443
8444 if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) {
David Barkerac37fa32016-12-02 12:30:21 +00008445 restore_dst_buf(xd, orig_dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008446 return INT64_MAX;
David Barkerb8069f92016-11-18 14:49:56 +00008447 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008448
8449 pred_exists = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008450
Yue Chena4245512017-08-31 11:58:08 -07008451 compmode_interinter_cost = best_compmode_interinter_cost;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008452 }
8453
8454 if (is_comp_interintra_pred) {
8455 INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
8456 int64_t best_interintra_rd = INT64_MAX;
8457 int rmode, rate_sum;
8458 int64_t dist_sum;
8459 int j;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008460 int tmp_rate_mv = 0;
8461 int tmp_skip_txfm_sb;
8462 int64_t tmp_skip_sse_sb;
8463 DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_SB_SQUARE]);
8464 uint8_t *intrapred;
8465
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008466#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008467 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
8468 intrapred = CONVERT_TO_BYTEPTR(intrapred_);
8469 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008470#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008471 intrapred = intrapred_;
8472
Emil Keyder01770b32017-01-20 18:03:11 -05008473 mbmi->ref_frame[1] = NONE_FRAME;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008474 for (j = 0; j < MAX_MB_PLANE; j++) {
8475 xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
8476 xd->plane[j].dst.stride = bw;
8477 }
Debargha Mukherjeead8be032017-05-09 15:28:45 -07008478 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize);
David Barkerac37fa32016-12-02 12:30:21 +00008479 restore_dst_buf(xd, orig_dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008480 mbmi->ref_frame[1] = INTRA_FRAME;
8481 mbmi->use_wedge_interintra = 0;
8482
8483 for (j = 0; j < INTERINTRA_MODES; ++j) {
8484 mbmi->interintra_mode = (INTERINTRA_MODE)j;
8485 rmode = interintra_mode_cost[mbmi->interintra_mode];
David Barker761b1ac2017-09-25 11:23:03 +01008486 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, &orig_dst,
David Barkerac37fa32016-12-02 12:30:21 +00008487 intrapred, bw);
Yaowu Xuf883b422016-08-30 14:01:10 -07008488 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008489 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
8490 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07008491 rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008492 if (rd < best_interintra_rd) {
8493 best_interintra_rd = rd;
8494 best_interintra_mode = mbmi->interintra_mode;
8495 }
8496 }
8497 mbmi->interintra_mode = best_interintra_mode;
8498 rmode = interintra_mode_cost[mbmi->interintra_mode];
David Barker761b1ac2017-09-25 11:23:03 +01008499 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, &orig_dst,
David Barkerac37fa32016-12-02 12:30:21 +00008500 intrapred, bw);
Yaowu Xuf883b422016-08-30 14:01:10 -07008501 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
8502 av1_subtract_plane(x, bsize, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008503 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8504 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
8505 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07008506 rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum, dist_sum);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008507 best_interintra_rd = rd;
8508
8509 if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) {
David Barkerb8069f92016-11-18 14:49:56 +00008510 // Don't need to call restore_dst_buf here
Yaowu Xuc27fc142016-08-22 16:08:15 -07008511 return INT64_MAX;
8512 }
8513 if (is_interintra_wedge_used(bsize)) {
Debargha Mukherjeec5f735f2017-04-26 03:25:28 +00008514 int64_t best_interintra_rd_nowedge = INT64_MAX;
8515 int64_t best_interintra_rd_wedge = INT64_MAX;
8516 int_mv tmp_mv;
Yue Cheneaf128a2017-10-16 17:01:36 -07008517 int rwedge = x->wedge_interintra_cost[bsize][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008518 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07008519 rd = RDCOST(x->rdmult, rmode + rate_mv + rwedge + rate_sum, dist_sum);
Yue Chenf03907a2017-05-31 12:04:04 -07008520 best_interintra_rd_nowedge = best_interintra_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008521
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008522 // Disable wedge search if source variance is small
Yaowu Xuc27fc142016-08-22 16:08:15 -07008523 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
8524 mbmi->use_wedge_interintra = 1;
8525
Yaowu Xuf883b422016-08-30 14:01:10 -07008526 rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) +
Yue Cheneaf128a2017-10-16 17:01:36 -07008527 x->wedge_interintra_cost[bsize][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008528
8529 best_interintra_rd_wedge =
8530 pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
8531
8532 best_interintra_rd_wedge +=
Urvang Joshi70006e42017-06-14 16:08:55 -07008533 RDCOST(x->rdmult, rmode + rate_mv + rwedge, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008534 // Refine motion vector.
8535 if (have_newmv_in_inter_mode(this_mode)) {
8536 // get negative of mask
Yaowu Xuf883b422016-08-30 14:01:10 -07008537 const uint8_t *mask = av1_get_contiguous_soft_mask(
Yaowu Xuc27fc142016-08-22 16:08:15 -07008538 mbmi->interintra_wedge_index, 1, bsize);
David Barkerf19f35f2017-05-22 16:33:22 +01008539 tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
8540 compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
8541 mi_col, intrapred, mask, bw,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07008542 &tmp_rate_mv, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008543 mbmi->mv[0].as_int = tmp_mv.as_int;
Debargha Mukherjeead8be032017-05-09 15:28:45 -07008544 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
8545 bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008546 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
8547 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07008548 rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
8549 dist_sum);
Yue Chenf03907a2017-05-31 12:04:04 -07008550 if (rd >= best_interintra_rd_wedge) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008551 tmp_mv.as_int = cur_mv[0].as_int;
8552 tmp_rate_mv = rate_mv;
8553 }
8554 } else {
8555 tmp_mv.as_int = cur_mv[0].as_int;
8556 tmp_rate_mv = rate_mv;
Yaowu Xuf883b422016-08-30 14:01:10 -07008557 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008558 }
8559 // Evaluate closer to true rd
Yaowu Xuf883b422016-08-30 14:01:10 -07008560 av1_subtract_plane(x, bsize, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008561 rd =
8562 estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8563 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
8564 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07008565 rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
8566 dist_sum);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008567 best_interintra_rd_wedge = rd;
8568 if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
8569 mbmi->use_wedge_interintra = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008570 mbmi->mv[0].as_int = tmp_mv.as_int;
Angie Chiang76159122016-11-09 12:13:22 -08008571 rd_stats->rate += tmp_rate_mv - rate_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008572 rate_mv = tmp_rate_mv;
8573 } else {
8574 mbmi->use_wedge_interintra = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008575 mbmi->mv[0].as_int = cur_mv[0].as_int;
8576 }
8577 } else {
8578 mbmi->use_wedge_interintra = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008579 }
8580 }
8581
8582 pred_exists = 0;
Yue Cheneaf128a2017-10-16 17:01:36 -07008583 compmode_interintra_cost = x->interintra_cost[size_group_lookup[bsize]][1] +
8584 interintra_mode_cost[mbmi->interintra_mode];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008585 if (is_interintra_wedge_used(bsize)) {
Yue Cheneaf128a2017-10-16 17:01:36 -07008586 compmode_interintra_cost +=
8587 x->wedge_interintra_cost[bsize][mbmi->use_wedge_interintra];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008588 if (mbmi->use_wedge_interintra) {
Yue Chen5e606542017-05-24 17:03:17 -07008589 compmode_interintra_cost +=
Yaowu Xuf883b422016-08-30 14:01:10 -07008590 av1_cost_literal(get_interintra_wedge_bits(bsize));
Yaowu Xuc27fc142016-08-22 16:08:15 -07008591 }
8592 }
8593 } else if (is_interintra_allowed(mbmi)) {
Yue Cheneaf128a2017-10-16 17:01:36 -07008594 compmode_interintra_cost = x->interintra_cost[size_group_lookup[bsize]][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008595 }
8596
Angie Chiang75c22092016-10-25 12:19:16 -07008597 if (pred_exists == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008598 int tmp_rate;
8599 int64_t tmp_dist;
Jingning Hanc44009c2017-05-06 11:36:49 -07008600 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008601 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
8602 &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07008603 rd = RDCOST(x->rdmult, rs + tmp_rate, tmp_dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008604 }
8605
Fergus Simpson3424c2d2017-03-09 11:48:15 -08008606 if (!is_comp_pred)
Rupert Swarbrick27e90292017-09-28 17:46:50 +01008607 args->single_filter[this_mode][refs[0]] =
8608 av1_extract_interp_filter(mbmi->interp_filters, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008609
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08008610 if (args->modelled_rd != NULL) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008611 if (is_comp_pred) {
8612 const int mode0 = compound_ref0_mode(this_mode);
8613 const int mode1 = compound_ref1_mode(this_mode);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08008614 const int64_t mrd = AOMMIN(args->modelled_rd[mode0][refs[0]],
8615 args->modelled_rd[mode1][refs[1]]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008616 if (rd / 4 * 3 > mrd && ref_best_rd < INT64_MAX) {
David Barkerac37fa32016-12-02 12:30:21 +00008617 restore_dst_buf(xd, orig_dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008618 return INT64_MAX;
8619 }
8620 } else if (!is_comp_interintra_pred) {
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08008621 args->modelled_rd[this_mode][refs[0]] = rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008622 }
8623 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008624
8625 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
8626 // if current pred_error modeled rd is substantially more than the best
8627 // so far, do not bother doing full rd
8628 if (rd / 2 > ref_best_rd) {
David Barkerac37fa32016-12-02 12:30:21 +00008629 restore_dst_buf(xd, orig_dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008630 return INT64_MAX;
8631 }
8632 }
8633
Yue Chen5e606542017-05-24 17:03:17 -07008634 rd_stats->rate += compmode_interintra_cost;
Yue Chen5e606542017-05-24 17:03:17 -07008635 rate2_bmc_nocoeff += compmode_interintra_cost;
Yue Chen5e606542017-05-24 17:03:17 -07008636 rd_stats->rate += compmode_interinter_cost;
Yue Chen5e606542017-05-24 17:03:17 -07008637
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01008638 ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
8639 disable_skip, mode_mv, mi_row, mi_col, args,
8640 ref_best_rd, refs, rate_mv, single_newmv,
8641 rate2_bmc_nocoeff, &best_bmc_mbmi, rate_mv_bmc, rs,
8642 &skip_txfm_sb, &skip_sse_sb, &orig_dst);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008643 if (ret_val != 0) return ret_val;
Angie Chiang76159122016-11-09 12:13:22 -08008644
Yaowu Xuc27fc142016-08-22 16:08:15 -07008645 return 0; // The rate-distortion cost will be re-calculated by caller.
8646}
8647
Alex Converse28744302017-04-13 14:46:22 -07008648#if CONFIG_INTRABC
Alex Converse28744302017-04-13 14:46:22 -07008649static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
Angie Chiang2a2a7dd2017-04-25 16:08:47 -07008650 RD_STATS *rd_cost, BLOCK_SIZE bsize,
Alex Converse28744302017-04-13 14:46:22 -07008651 int64_t best_rd) {
Alex Converse3d8adf32017-04-24 12:35:42 -07008652 const AV1_COMMON *const cm = &cpi->common;
RogerZhouca865462017-10-05 15:06:27 -07008653 if (!av1_allow_intrabc(bsize, cm)) return INT64_MAX;
Alex Converse3d8adf32017-04-24 12:35:42 -07008654
Alex Converse28744302017-04-13 14:46:22 -07008655 MACROBLOCKD *const xd = &x->e_mbd;
Alex Converse3d8adf32017-04-24 12:35:42 -07008656 const TileInfo *tile = &xd->tile;
8657 MODE_INFO *const mi = xd->mi[0];
8658 const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
8659 const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
8660 const int w = block_size_wide[bsize];
8661 const int h = block_size_high[bsize];
Hui Su8de99a62017-12-01 12:04:56 -08008662 const int sb_row = mi_row >> cm->mib_size_log2;
8663 const int sb_col = mi_col >> cm->mib_size_log2;
Alex Converse3d8adf32017-04-24 12:35:42 -07008664
Alex Converse44c2bad2017-05-11 09:36:10 -07008665 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8666 MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
8667 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
8668 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
8669 mbmi_ext->ref_mv_stack[ref_frame],
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02008670 mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
8671 NULL, NULL, mbmi_ext->mode_context);
Alex Converse44c2bad2017-05-11 09:36:10 -07008672
8673 int_mv nearestmv, nearmv;
RogerZhou10a03802017-10-26 11:49:48 -07008674#if CONFIG_AMVR
8675 av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv, 0);
8676#else
Alex Converse44c2bad2017-05-11 09:36:10 -07008677 av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv);
RogerZhou10a03802017-10-26 11:49:48 -07008678#endif
Alex Converse44c2bad2017-05-11 09:36:10 -07008679
8680 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
8681 if (dv_ref.as_int == 0) av1_find_ref_dv(&dv_ref, mi_row, mi_col);
Hui Su1e6bf6b2017-11-02 13:00:29 -07008682 // Ref DV should not have sub-pel.
8683 assert((dv_ref.as_mv.col & 7) == 0);
8684 assert((dv_ref.as_mv.row & 7) == 0);
Alex Converse44c2bad2017-05-11 09:36:10 -07008685 mbmi_ext->ref_mvs[INTRA_FRAME][0] = dv_ref;
Alex Converse3d8adf32017-04-24 12:35:42 -07008686
Alex Converse3d8adf32017-04-24 12:35:42 -07008687 struct buf_2d yv12_mb[MAX_MB_PLANE];
8688 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL);
8689 for (int i = 0; i < MAX_MB_PLANE; ++i) {
8690 xd->plane[i].pre[0] = yv12_mb[i];
8691 }
8692
Alex Converse861d7072017-05-15 14:19:53 -07008693 enum IntrabcMotionDirection {
8694 IBC_MOTION_ABOVE,
8695 IBC_MOTION_LEFT,
8696 IBC_MOTION_DIRECTIONS
8697 };
Alex Converse3d8adf32017-04-24 12:35:42 -07008698
Alex Converse3d8adf32017-04-24 12:35:42 -07008699 MB_MODE_INFO *mbmi = &mi->mbmi;
8700 MB_MODE_INFO best_mbmi = *mbmi;
8701 RD_STATS best_rdcost = *rd_cost;
8702 int best_skip = x->skip;
Alex Converse861d7072017-05-15 14:19:53 -07008703
8704 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
8705 dir < IBC_MOTION_DIRECTIONS; ++dir) {
8706 const MvLimits tmp_mv_limits = x->mv_limits;
8707 switch (dir) {
8708 case IBC_MOTION_ABOVE:
8709 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
8710 x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
8711 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
Hui Su8de99a62017-12-01 12:04:56 -08008712 x->mv_limits.row_max = (sb_row * cm->mib_size - mi_row) * MI_SIZE - h;
Alex Converse861d7072017-05-15 14:19:53 -07008713 break;
8714 case IBC_MOTION_LEFT:
8715 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
Hui Su8de99a62017-12-01 12:04:56 -08008716 x->mv_limits.col_max = (sb_col * cm->mib_size - mi_col) * MI_SIZE - w;
Alex Converse861d7072017-05-15 14:19:53 -07008717 // TODO(aconverse@google.com): Minimize the overlap between above and
8718 // left areas.
8719 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
8720 int bottom_coded_mi_edge =
Hui Su8de99a62017-12-01 12:04:56 -08008721 AOMMIN((sb_row + 1) * cm->mib_size, tile->mi_row_end);
Alex Converse861d7072017-05-15 14:19:53 -07008722 x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
8723 break;
8724 default: assert(0);
8725 }
8726 assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
8727 assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
8728 assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
8729 assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
8730 av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
8731
8732 if (x->mv_limits.col_max < x->mv_limits.col_min ||
8733 x->mv_limits.row_max < x->mv_limits.row_min) {
8734 x->mv_limits = tmp_mv_limits;
8735 continue;
8736 }
8737
8738 int step_param = cpi->mv_step_param;
8739 MV mvp_full = dv_ref.as_mv;
8740 mvp_full.col >>= 3;
8741 mvp_full.row >>= 3;
8742 int sadpb = x->sadperbit16;
8743 int cost_list[5];
RogerZhoucc5d35d2017-08-07 22:20:15 -07008744#if CONFIG_HASH_ME
RogerZhoud15e7c12017-09-26 08:49:28 -07008745 int bestsme = av1_full_pixel_search(
8746 cpi, x, bsize, &mvp_full, step_param, sadpb,
8747 cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1,
8748 (MI_SIZE * mi_col), (MI_SIZE * mi_row), 1);
RogerZhoucc5d35d2017-08-07 22:20:15 -07008749#else
Alex Converse861d7072017-05-15 14:19:53 -07008750 int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
8751 sadpb, cond_cost_list(cpi, cost_list),
8752 &dv_ref.as_mv, INT_MAX, 1);
RogerZhoucc5d35d2017-08-07 22:20:15 -07008753#endif
Alex Converse861d7072017-05-15 14:19:53 -07008754
8755 x->mv_limits = tmp_mv_limits;
8756 if (bestsme == INT_MAX) continue;
8757 mvp_full = x->best_mv.as_mv;
8758 MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 };
8759 if (mv_check_bounds(&x->mv_limits, &dv)) continue;
Hui Su64463e72017-11-06 12:36:00 -08008760 if (!av1_is_dv_valid(dv, tile, mi_row, mi_col, bsize, cm->mib_size_log2))
8761 continue;
Alex Converse861d7072017-05-15 14:19:53 -07008762
Hui Su1e6bf6b2017-11-02 13:00:29 -07008763 // DV should not have sub-pel.
8764 assert((dv.col & 7) == 0);
8765 assert((dv.row & 7) == 0);
Alex Converse861d7072017-05-15 14:19:53 -07008766 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
Alex Converse861d7072017-05-15 14:19:53 -07008767 mbmi->use_intrabc = 1;
8768 mbmi->mode = DC_PRED;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04008769 mbmi->uv_mode = UV_DC_PRED;
Alex Converse861d7072017-05-15 14:19:53 -07008770 mbmi->mv[0].as_mv = dv;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01008771 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
Alex Converse861d7072017-05-15 14:19:53 -07008772 mbmi->skip = 0;
8773 x->skip = 0;
8774 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
Alex Converse3d8adf32017-04-24 12:35:42 -07008775
Hui Sudfcbfbd2017-11-13 12:05:30 -08008776 int *dvcost[2] = { (int *)&cpi->dv_cost[0][MV_MAX],
8777 (int *)&cpi->dv_cost[1][MV_MAX] };
Alex Conversed5d9b6c2017-05-23 15:23:45 -07008778 // TODO(aconverse@google.com): The full motion field defining discount
8779 // in MV_COST_WEIGHT is too large. Explore other values.
Hui Sudfcbfbd2017-11-13 12:05:30 -08008780 int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, cpi->dv_joint_cost,
8781 dvcost, MV_COST_WEIGHT_SUB);
Hui Su6c8584f2017-09-14 15:37:02 -07008782 const int rate_mode = x->intrabc_cost[1];
Alex Converse861d7072017-05-15 14:19:53 -07008783 RD_STATS rd_stats, rd_stats_uv;
8784 av1_subtract_plane(x, bsize, 0);
Hui Su12546aa2017-10-13 16:10:01 -07008785 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07008786 // Intrabc
Yue Chen25dc0702017-10-18 23:36:06 -07008787 select_tx_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, INT64_MAX);
Hui Su12546aa2017-10-13 16:10:01 -07008788 } else {
8789 int idx, idy;
8790 super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
8791 for (idy = 0; idy < xd->n8_h; ++idy)
8792 for (idx = 0; idx < xd->n8_w; ++idx)
8793 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
8794 memset(x->blk_skip[0], rd_stats.skip,
8795 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
8796 }
Alex Converse861d7072017-05-15 14:19:53 -07008797 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
8798 av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
Alex Converse3d8adf32017-04-24 12:35:42 -07008799#if CONFIG_RD_DEBUG
Alex Converse861d7072017-05-15 14:19:53 -07008800 mbmi->rd_stats = rd_stats;
Alex Converse3d8adf32017-04-24 12:35:42 -07008801#endif
8802
Zoe Liu1eed2df2017-10-16 17:13:15 -07008803 const int skip_ctx = av1_get_skip_context(xd);
Alex Converse3d8adf32017-04-24 12:35:42 -07008804
Alex Converse861d7072017-05-15 14:19:53 -07008805 RD_STATS rdc_noskip;
8806 av1_init_rd_stats(&rdc_noskip);
8807 rdc_noskip.rate =
Zoe Liu1eed2df2017-10-16 17:13:15 -07008808 rate_mode + rate_mv + rd_stats.rate + x->skip_cost[skip_ctx][0];
Alex Converse861d7072017-05-15 14:19:53 -07008809 rdc_noskip.dist = rd_stats.dist;
Urvang Joshi70006e42017-06-14 16:08:55 -07008810 rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist);
Alex Converse861d7072017-05-15 14:19:53 -07008811 if (rdc_noskip.rdcost < best_rd) {
8812 best_rd = rdc_noskip.rdcost;
8813 best_mbmi = *mbmi;
8814 best_skip = x->skip;
8815 best_rdcost = rdc_noskip;
8816 }
Alex Converse3d8adf32017-04-24 12:35:42 -07008817
Alex Converse861d7072017-05-15 14:19:53 -07008818 x->skip = 1;
8819 mbmi->skip = 1;
8820 RD_STATS rdc_skip;
8821 av1_init_rd_stats(&rdc_skip);
Zoe Liu1eed2df2017-10-16 17:13:15 -07008822 rdc_skip.rate = rate_mode + rate_mv + x->skip_cost[skip_ctx][1];
Alex Converse861d7072017-05-15 14:19:53 -07008823 rdc_skip.dist = rd_stats.sse;
Urvang Joshi70006e42017-06-14 16:08:55 -07008824 rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist);
Alex Converse861d7072017-05-15 14:19:53 -07008825 if (rdc_skip.rdcost < best_rd) {
8826 best_rd = rdc_skip.rdcost;
8827 best_mbmi = *mbmi;
8828 best_skip = x->skip;
8829 best_rdcost = rdc_skip;
8830 }
Alex Converse3d8adf32017-04-24 12:35:42 -07008831 }
8832 *mbmi = best_mbmi;
8833 *rd_cost = best_rdcost;
8834 x->skip = best_skip;
8835 return best_rd;
Alex Converse28744302017-04-13 14:46:22 -07008836}
8837#endif // CONFIG_INTRABC
8838
Urvang Joshi52648442016-10-13 17:27:51 -07008839void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
Angie Chiang2a2a7dd2017-04-25 16:08:47 -07008840 RD_STATS *rd_cost, BLOCK_SIZE bsize,
Urvang Joshi52648442016-10-13 17:27:51 -07008841 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
8842 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008843 MACROBLOCKD *const xd = &x->e_mbd;
Luc Trudeau14fc5042017-06-16 12:40:29 -04008844 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008845 struct macroblockd_plane *const pd = xd->plane;
8846 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
8847 int y_skip = 0, uv_skip = 0;
8848 int64_t dist_y = 0, dist_uv = 0;
8849 TX_SIZE max_uv_tx_size;
Jingning Han271bb2c2016-12-14 12:34:46 -08008850
Zoe Liu1eed2df2017-10-16 17:13:15 -07008851 (void)cm;
8852
Yaowu Xuc27fc142016-08-22 16:08:15 -07008853 ctx->skip = 0;
Luc Trudeau14fc5042017-06-16 12:40:29 -04008854 mbmi->ref_frame[0] = INTRA_FRAME;
8855 mbmi->ref_frame[1] = NONE_FRAME;
Alex Converse28744302017-04-13 14:46:22 -07008856#if CONFIG_INTRABC
Luc Trudeau14fc5042017-06-16 12:40:29 -04008857 mbmi->use_intrabc = 0;
8858 mbmi->mv[0].as_int = 0;
Alex Converse28744302017-04-13 14:46:22 -07008859#endif // CONFIG_INTRABC
Yaowu Xuc27fc142016-08-22 16:08:15 -07008860
Debargha Mukherjeeedced252017-10-20 00:02:00 -07008861 const int64_t intra_yrd = rd_pick_intra_sby_mode(
8862 cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, bsize, best_rd);
Alex Conversed1b6fad2017-04-26 15:39:37 -07008863
8864 if (intra_yrd < best_rd) {
Luc Trudeau14fc5042017-06-16 12:40:29 -04008865#if CONFIG_CFL
Luc Trudeaub05eeae2017-08-18 15:14:30 -04008866 // Only store reconstructed luma when there's chroma RDO. When there's no
8867 // chroma RDO, the reconstructed luma will be stored in encode_superblock().
Luc Trudeau1e84af52017-11-25 15:00:28 -05008868 xd->cfl.store_y = !x->skip_chroma_rd;
8869 if (xd->cfl.store_y) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04008870 // Perform one extra call to txfm_rd_in_plane(), with the values chosen
8871 // during luma RDO, so we can store reconstructed luma values
8872 RD_STATS this_rd_stats;
Luc Trudeau32306c22017-08-14 14:44:26 -04008873 txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
8874 mbmi->sb_type, mbmi->tx_size,
8875 cpi->sf.use_fast_coef_costing);
Luc Trudeau1e84af52017-11-25 15:00:28 -05008876 xd->cfl.store_y = 0;
Luc Trudeau32306c22017-08-14 14:44:26 -04008877 }
Luc Trudeau6e1cd782017-06-21 13:52:36 -04008878#endif // CONFIG_CFL
Luc Trudeau14fc5042017-06-16 12:40:29 -04008879 max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x]
8880 [pd[1].subsampling_y];
8881 init_sbuv_mode(mbmi);
Alex Conversed1b6fad2017-04-26 15:39:37 -07008882 if (!x->skip_chroma_rd)
8883 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
8884 &uv_skip, bsize, max_uv_tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008885
Rupert Swarbrickc6cc1f52017-10-04 11:52:12 +01008886 if (y_skip && (uv_skip || x->skip_chroma_rd)) {
Alex Conversed1b6fad2017-04-26 15:39:37 -07008887 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
Zoe Liu1eed2df2017-10-16 17:13:15 -07008888 x->skip_cost[av1_get_skip_context(xd)][1];
Alex Conversed1b6fad2017-04-26 15:39:37 -07008889 rd_cost->dist = dist_y + dist_uv;
8890 } else {
8891 rd_cost->rate =
Zoe Liu1eed2df2017-10-16 17:13:15 -07008892 rate_y + rate_uv + x->skip_cost[av1_get_skip_context(xd)][0];
Alex Conversed1b6fad2017-04-26 15:39:37 -07008893 rd_cost->dist = dist_y + dist_uv;
8894 }
Urvang Joshi70006e42017-06-14 16:08:55 -07008895 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008896 } else {
Alex Conversed1b6fad2017-04-26 15:39:37 -07008897 rd_cost->rate = INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008898 }
8899
Alex Converse28744302017-04-13 14:46:22 -07008900#if CONFIG_INTRABC
Alex Conversed1b6fad2017-04-26 15:39:37 -07008901 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
8902 best_rd = rd_cost->rdcost;
Alex Converse28744302017-04-13 14:46:22 -07008903 if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
8904 ctx->skip = x->skip; // FIXME where is the proper place to set this?!
Alex Conversed1b6fad2017-04-26 15:39:37 -07008905 assert(rd_cost->rate != INT_MAX);
Alex Converse28744302017-04-13 14:46:22 -07008906 }
8907#endif
Alex Conversed1b6fad2017-04-26 15:39:37 -07008908 if (rd_cost->rate == INT_MAX) return;
Alex Converse28744302017-04-13 14:46:22 -07008909
Yaowu Xuc27fc142016-08-22 16:08:15 -07008910 ctx->mic = *xd->mi[0];
8911 ctx->mbmi_ext = *x->mbmi_ext;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008912}
8913
Yaowu Xuc27fc142016-08-22 16:08:15 -07008914// Do we have an internal image edge (e.g. formatting bars).
Urvang Joshi52648442016-10-13 17:27:51 -07008915int av1_internal_image_edge(const AV1_COMP *cpi) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008916 return (cpi->oxcf.pass == 2) &&
8917 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
8918 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
8919}
8920
8921// Checks to see if a super block is on a horizontal image edge.
8922// In most cases this is the "real" edge unless there are formatting
8923// bars embedded in the stream.
Urvang Joshi52648442016-10-13 17:27:51 -07008924int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008925 int top_edge = 0;
8926 int bottom_edge = cpi->common.mi_rows;
8927 int is_active_h_edge = 0;
8928
8929 // For two pass account for any formatting bars detected.
8930 if (cpi->oxcf.pass == 2) {
Urvang Joshi52648442016-10-13 17:27:51 -07008931 const TWO_PASS *const twopass = &cpi->twopass;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008932
8933 // The inactive region is specified in MBs not mi units.
8934 // The image edge is in the following MB row.
8935 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
8936
8937 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
Yaowu Xuf883b422016-08-30 14:01:10 -07008938 bottom_edge = AOMMAX(top_edge, bottom_edge);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008939 }
8940
8941 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
8942 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
8943 is_active_h_edge = 1;
8944 }
8945 return is_active_h_edge;
8946}
8947
8948// Checks to see if a super block is on a vertical image edge.
8949// In most cases this is the "real" edge unless there are formatting
8950// bars embedded in the stream.
Urvang Joshi52648442016-10-13 17:27:51 -07008951int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008952 int left_edge = 0;
8953 int right_edge = cpi->common.mi_cols;
8954 int is_active_v_edge = 0;
8955
8956 // For two pass account for any formatting bars detected.
8957 if (cpi->oxcf.pass == 2) {
Urvang Joshi52648442016-10-13 17:27:51 -07008958 const TWO_PASS *const twopass = &cpi->twopass;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008959
8960 // The inactive region is specified in MBs not mi units.
8961 // The image edge is in the following MB row.
8962 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
8963
8964 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
Yaowu Xuf883b422016-08-30 14:01:10 -07008965 right_edge = AOMMAX(left_edge, right_edge);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008966 }
8967
8968 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
8969 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
8970 is_active_v_edge = 1;
8971 }
8972 return is_active_v_edge;
8973}
8974
8975// Checks to see if a super block is at the edge of the active image.
8976// In most cases this is the "real" edge unless there are formatting
8977// bars embedded in the stream.
Urvang Joshi52648442016-10-13 17:27:51 -07008978int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
Yaowu Xuf883b422016-08-30 14:01:10 -07008979 return av1_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
8980 av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008981}
8982
Urvang Joshi52648442016-10-13 17:27:51 -07008983static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008984 MACROBLOCKD *const xd = &x->e_mbd;
8985 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8986 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
8987 const BLOCK_SIZE bsize = mbmi->sb_type;
Urvang Joshic9e71d42017-08-09 18:58:33 -07008988 assert(bsize >= BLOCK_8X8);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008989 int src_stride = x->plane[1].src.stride;
8990 const uint8_t *const src_u = x->plane[1].src.buf;
8991 const uint8_t *const src_v = x->plane[2].src.buf;
8992 float *const data = x->palette_buffer->kmeans_data_buf;
8993 float centroids[2 * PALETTE_MAX_SIZE];
8994 uint8_t *const color_map = xd->plane[1].color_index_map;
8995 int r, c;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008996#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008997 const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
8998 const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008999#endif // CONFIG_HIGHBITDEPTH
Urvang Joshi56ba91b2017-01-10 13:22:09 -08009000 int plane_block_width, plane_block_height, rows, cols;
9001 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
9002 &plane_block_height, &rows, &cols);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009003 (void)cpi;
9004
9005 for (r = 0; r < rows; ++r) {
9006 for (c = 0; c < cols; ++c) {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009007#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009008 if (cpi->common.use_highbitdepth) {
9009 data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
9010 data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
9011 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009012#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009013 data[(r * cols + c) * 2] = src_u[r * src_stride + c];
9014 data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009015#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009016 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009017#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009018 }
9019 }
9020
9021 for (r = 1; r < 3; ++r) {
9022 for (c = 0; c < pmi->palette_size[1]; ++c) {
9023 centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
9024 }
9025 }
9026
Yaowu Xuf883b422016-08-30 14:01:10 -07009027 av1_calc_indices(data, centroids, color_map, rows * cols,
9028 pmi->palette_size[1], 2);
Urvang Joshi56ba91b2017-01-10 13:22:09 -08009029 extend_palette_color_map(color_map, cols, rows, plane_block_width,
9030 plane_block_height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009031}
9032
Yaowu Xuf883b422016-08-30 14:01:10 -07009033static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
9034 const MACROBLOCKD *xd, int mi_row,
9035 int mi_col, const uint8_t *above,
9036 int above_stride, const uint8_t *left,
Yue Chene9638cc2016-10-10 12:37:54 -07009037 int left_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009038
Zoe Liuf40a9572017-10-13 12:37:19 -07009039#if CONFIG_EXT_SKIP
9040static void estimate_skip_mode_rdcost(
9041 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
9042 BLOCK_SIZE bsize, int mi_row, int mi_col,
9043 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME],
9044 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
9045 const AV1_COMMON *const cm = &cpi->common;
9046 MACROBLOCKD *const xd = &x->e_mbd;
9047 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
9048 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
9049
9050 int *mode_map = tile_data->mode_map[bsize];
9051 static const int flag_list[TOTAL_REFS_PER_FRAME] = { 0,
9052 AOM_LAST_FLAG,
9053 AOM_LAST2_FLAG,
9054 AOM_LAST3_FLAG,
9055 AOM_GOLD_FLAG,
9056 AOM_BWD_FLAG,
9057 AOM_ALT2_FLAG,
9058 AOM_ALT_FLAG };
9059 int i;
9060
9061 for (int midx = 0; midx < MAX_MODES; ++midx) {
9062 const int mode_index = mode_map[midx];
9063 x->skip_mode_index_candidate = mode_index;
9064
9065 const MV_REFERENCE_FRAME ref_frame =
9066 av1_mode_order[mode_index].ref_frame[0];
9067 const MV_REFERENCE_FRAME second_ref_frame =
9068 av1_mode_order[mode_index].ref_frame[1];
9069 const int comp_pred = second_ref_frame > INTRA_FRAME;
9070
9071 if (!comp_pred) continue;
9072
9073 const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
9074
9075 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
9076 if (comp_pred && !(cpi->ref_frame_flags & flag_list[second_ref_frame]))
9077 continue;
9078 // Check whether current refs/mode align with skip_mode
9079 if (!(ref_frame == (LAST_FRAME + cm->ref_frame_idx_0) &&
9080 second_ref_frame == (LAST_FRAME + cm->ref_frame_idx_1) &&
9081 this_mode == NEAREST_NEARESTMV)) {
9082 continue;
9083 }
9084
9085 frame_mv[this_mode][ref_frame].as_int =
9086 frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
9087 frame_mv[this_mode][second_ref_frame].as_int =
9088 frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
9089
9090 if (frame_mv[this_mode][ref_frame].as_int == INVALID_MV ||
9091 frame_mv[this_mode][second_ref_frame].as_int == INVALID_MV)
9092 break;
9093
Zoe Liuf40a9572017-10-13 12:37:19 -07009094 mbmi->mode = this_mode;
9095 mbmi->uv_mode = UV_DC_PRED;
9096 mbmi->ref_frame[0] = ref_frame;
9097 mbmi->ref_frame[1] = second_ref_frame;
9098
9099 // Obtain NEAREST_NEARESTMV.
9100 {
9101 for (i = 0; i < 2; ++i) {
9102 int_mv cur_mv = frame_mv[mbmi->mode][mbmi->ref_frame[i]];
9103 clamp_mv2(&cur_mv.as_mv, xd);
9104 if (mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
9105 x->skip_mode_rdcost = INT64_MAX;
9106 break;
9107 }
9108 mbmi->mv[i].as_int = cur_mv.as_int;
9109 }
9110 if (x->skip_mode_rdcost == INT64_MAX) break;
9111
9112 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
9113 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
9114 for (i = 0; i < 2; ++i) {
9115 int_mv cur_mv =
9116 (i == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
9117 : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9118 clamp_mv2(&cur_mv.as_mv, xd);
9119 if (mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
9120 x->skip_mode_rdcost = INT64_MAX;
9121 break;
9122 }
9123 mbmi->mv[i].as_int = cur_mv.as_int;
9124 }
9125 if (x->skip_mode_rdcost == INT64_MAX) break;
9126 }
9127 }
9128
9129#if CONFIG_FILTER_INTRA
9130 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9131 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9132#endif // CONFIG_FILTER_INTRA
9133 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
Zoe Liu50c810d2017-12-05 16:09:37 -08009134#if CONFIG_JNT_COMP
9135 mbmi->comp_group_idx = 0;
9136 mbmi->compound_idx = 1;
9137#endif // CONFIG_JNT_COMP
Zoe Liuf40a9572017-10-13 12:37:19 -07009138 mbmi->interinter_compound_type = COMPOUND_AVERAGE;
9139 mbmi->motion_mode = SIMPLE_TRANSLATION;
9140 mbmi->ref_mv_idx = 0;
9141 mbmi->skip_mode = mbmi->skip = 1;
9142
Zoe Liu8a5d3432017-11-30 16:33:44 -08009143 set_default_interp_filters(mbmi, cm->interp_filter);
9144
Zoe Liuf40a9572017-10-13 12:37:19 -07009145 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
9146 for (i = 0; i < MAX_MB_PLANE; i++) {
9147 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
9148 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
9149 }
9150
9151 BUFFER_SET orig_dst;
9152 for (i = 0; i < MAX_MB_PLANE; i++) {
9153 orig_dst.plane[i] = xd->plane[i].dst.buf;
9154 orig_dst.stride[i] = xd->plane[i].dst.stride;
9155 }
9156
9157 // Obtain the rdcost for skip_mode.
9158 skip_mode_rd(cpi, x, bsize, mi_row, mi_col, &orig_dst);
9159 break;
9160 }
9161}
9162#endif // CONFIG_EXT_SKIP
9163
Urvang Joshi52648442016-10-13 17:27:51 -07009164void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
Yaowu Xuf883b422016-08-30 14:01:10 -07009165 MACROBLOCK *x, int mi_row, int mi_col,
Sebastien Alaiwan0cf54d42017-10-16 16:10:04 +02009166 RD_STATS *rd_cost, BLOCK_SIZE bsize,
9167 PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) {
Urvang Joshi52648442016-10-13 17:27:51 -07009168 const AV1_COMMON *const cm = &cpi->common;
9169 const RD_OPT *const rd_opt = &cpi->rd;
9170 const SPEED_FEATURES *const sf = &cpi->sf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009171 MACROBLOCKD *const xd = &x->e_mbd;
9172 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Hui Sue87fb232017-10-05 15:00:15 -07009173 const int try_palette =
9174 av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009175 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
9176 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
9177 const struct segmentation *const seg = &cm->seg;
9178 PREDICTION_MODE this_mode;
9179 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
9180 unsigned char segment_id = mbmi->segment_id;
9181 int comp_pred, i, k;
9182 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
9183 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009184 int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
Zoe Liu7f24e1b2017-03-17 17:42:05 -07009185 int single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
9186 int64_t modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02009187 static const int flag_list[TOTAL_REFS_PER_FRAME] = { 0,
9188 AOM_LAST_FLAG,
9189 AOM_LAST2_FLAG,
9190 AOM_LAST3_FLAG,
9191 AOM_GOLD_FLAG,
9192 AOM_BWD_FLAG,
9193 AOM_ALT2_FLAG,
9194 AOM_ALT_FLAG };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009195 int64_t best_rd = best_rd_so_far;
9196 int best_rate_y = INT_MAX, best_rate_uv = INT_MAX;
9197 int64_t best_pred_diff[REFERENCE_MODES];
9198 int64_t best_pred_rd[REFERENCE_MODES];
9199 MB_MODE_INFO best_mbmode;
Zoe Liu1eed2df2017-10-16 17:13:15 -07009200 const int skip_ctx = av1_get_skip_context(xd);
9201 int rate_skip0 = x->skip_cost[skip_ctx][0];
9202 int rate_skip1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009203 int best_mode_skippable = 0;
9204 int midx, best_mode_index = -1;
9205 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -07009206#if CONFIG_EXT_COMP_REFS
9207 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
9208#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07009209 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -07009210#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuf883b422016-08-30 14:01:10 -07009211 aom_prob comp_mode_p;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009212 int64_t best_intra_rd = INT64_MAX;
9213 unsigned int best_pred_sse = UINT_MAX;
9214 PREDICTION_MODE best_intra_mode = DC_PRED;
Urvang Joshifeb925f2016-12-05 10:37:29 -08009215 int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
9216 int64_t dist_uvs[TX_SIZES_ALL];
9217 int skip_uvs[TX_SIZES_ALL];
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04009218 UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL];
Urvang Joshifeb925f2016-12-05 10:37:29 -08009219 PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009220#if CONFIG_EXT_INTRA
Urvang Joshifeb925f2016-12-05 10:37:29 -08009221 int8_t uv_angle_delta[TX_SIZES_ALL];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009222 int is_directional_mode, angle_stats_ready = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009223 uint8_t directional_mode_skip_mask[INTRA_MODES];
9224#endif // CONFIG_EXT_INTRA
Yaowu Xuf883b422016-08-30 14:01:10 -07009225 const int intra_cost_penalty = av1_get_intra_cost_penalty(
Yaowu Xuc27fc142016-08-22 16:08:15 -07009226 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
Yue Chenb23d00a2017-07-28 17:01:21 -07009227 const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009228 int best_skip2 = 0;
Zoe Liu97ad0582017-02-09 10:51:00 -08009229 uint16_t ref_frame_skip_mask[2] = { 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009230 uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
9231 MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
9232 int64_t best_single_inter_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009233 int mode_skip_start = sf->mode_skip_start + 1;
9234 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
9235 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
9236 int64_t mode_threshold[MAX_MODES];
9237 int *mode_map = tile_data->mode_map[bsize];
9238 const int mode_search_skip_flags = sf->mode_search_skip_flags;
Yushin Cho77bba8d2016-11-04 16:36:56 -07009239
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009240 HandleInterModeArgs args = {
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01009241 { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
9242 { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
9243 NULL, NULL,
9244 NULL, { { 0 } },
Fergus Simpson073c6f32017-02-17 12:13:48 -08009245 };
9246
Jingning Hanae5cfde2016-11-30 12:01:44 -08009247 const int rows = block_size_high[bsize];
9248 const int cols = block_size_wide[bsize];
Urvang Joshib100db72016-10-12 16:28:56 -07009249 int palette_ctx = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009250 const MODE_INFO *above_mi = xd->above_mi;
9251 const MODE_INFO *left_mi = xd->left_mi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009252 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
9253 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
9254 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
9255 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009256
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009257#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009258 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
9259 int len = sizeof(uint16_t);
Jingning Hand064cf02017-06-01 10:00:39 -07009260 args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
9261 args.above_pred_buf[1] =
9262 CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009263 args.above_pred_buf[2] =
Jingning Hand064cf02017-06-01 10:00:39 -07009264 CONVERT_TO_BYTEPTR(x->above_pred_buf + 2 * MAX_SB_SQUARE * len);
9265 args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
9266 args.left_pred_buf[1] =
9267 CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009268 args.left_pred_buf[2] =
Jingning Hand064cf02017-06-01 10:00:39 -07009269 CONVERT_TO_BYTEPTR(x->left_pred_buf + 2 * MAX_SB_SQUARE * len);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009270 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009271#endif // CONFIG_HIGHBITDEPTH
Jingning Hand064cf02017-06-01 10:00:39 -07009272 args.above_pred_buf[0] = x->above_pred_buf;
9273 args.above_pred_buf[1] = x->above_pred_buf + MAX_SB_SQUARE;
9274 args.above_pred_buf[2] = x->above_pred_buf + 2 * MAX_SB_SQUARE;
9275 args.left_pred_buf[0] = x->left_pred_buf;
9276 args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE;
9277 args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009278#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009279 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009280#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009281
Yaowu Xuf883b422016-08-30 14:01:10 -07009282 av1_zero(best_mbmode);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009283
Urvang Joshib100db72016-10-12 16:28:56 -07009284 av1_zero(pmi_uv);
hui su9bc1d8d2017-03-24 12:36:03 -07009285 if (try_palette) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009286 if (above_mi)
9287 palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
9288 if (left_mi)
9289 palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
9290 }
9291
Yue Chen170678a2017-10-17 13:43:10 -07009292 estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
9293 ref_costs_comp, &comp_mode_p);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009294
9295 for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
Urvang Joshifeb925f2016-12-05 10:37:29 -08009296 for (i = 0; i < TX_SIZES_ALL; i++) rate_uv_intra[i] = INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009297 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
9298 for (i = 0; i < MB_MODE_COUNT; ++i) {
9299 for (k = 0; k < TOTAL_REFS_PER_FRAME; ++k) {
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009300 args.single_filter[i][k] = SWITCHABLE;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009301 }
9302 }
9303
9304 rd_cost->rate = INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009305
9306 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
9307 x->pred_mv_sad[ref_frame] = INT_MAX;
9308 x->mbmi_ext->mode_context[ref_frame] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009309 x->mbmi_ext->compound_mode_context[ref_frame] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009310 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
9311 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
9312 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
9313 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
9314 }
9315 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009316 frame_mv[GLOBALMV][ref_frame].as_int =
Sarah Parkerae7c4582017-02-28 16:30:30 -08009317 gm_get_motion_vector(&cm->global_motion[ref_frame],
Debargha Mukherjeefebb59c2017-03-02 12:23:45 -08009318 cm->allow_high_precision_mv, bsize, mi_col, mi_row,
RogerZhou3b635242017-09-19 10:06:46 -07009319 0
9320#if CONFIG_AMVR
9321 ,
RogerZhou10a03802017-10-26 11:49:48 -07009322 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07009323#endif
9324 )
David Barkercdcac6d2016-12-01 17:04:16 +00009325 .as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009326 frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009327 frame_mv[GLOBAL_GLOBALMV][ref_frame].as_int =
Sarah Parkerae7c4582017-02-28 16:30:30 -08009328 gm_get_motion_vector(&cm->global_motion[ref_frame],
Debargha Mukherjeefebb59c2017-03-02 12:23:45 -08009329 cm->allow_high_precision_mv, bsize, mi_col, mi_row,
RogerZhou3b635242017-09-19 10:06:46 -07009330 0
9331#if CONFIG_AMVR
9332 ,
RogerZhou10a03802017-10-26 11:49:48 -07009333 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07009334#endif
9335 )
Sarah Parkerc2d38712017-01-24 15:15:41 -08009336 .as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009337 }
9338
Yaowu Xuc27fc142016-08-22 16:08:15 -07009339 for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
9340 MODE_INFO *const mi = xd->mi[0];
9341 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
9342 x->mbmi_ext->mode_context[ref_frame] = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -07009343 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
9344 mbmi_ext->ref_mv_stack[ref_frame],
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02009345 mbmi_ext->compound_mode_context, candidates, mi_row,
9346 mi_col, NULL, NULL, mbmi_ext->mode_context);
Jingning Han731af492016-11-17 11:53:23 -08009347 if (mbmi_ext->ref_mv_count[ref_frame] < 2) {
9348 MV_REFERENCE_FRAME rf[2];
9349 av1_set_ref_frame(rf, ref_frame);
David Barkercdcac6d2016-12-01 17:04:16 +00009350 if (mbmi_ext->ref_mvs[rf[0]][0].as_int !=
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009351 frame_mv[GLOBALMV][rf[0]].as_int ||
David Barkercdcac6d2016-12-01 17:04:16 +00009352 mbmi_ext->ref_mvs[rf[0]][1].as_int !=
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009353 frame_mv[GLOBALMV][rf[0]].as_int ||
David Barkercdcac6d2016-12-01 17:04:16 +00009354 mbmi_ext->ref_mvs[rf[1]][0].as_int !=
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009355 frame_mv[GLOBALMV][rf[1]].as_int ||
9356 mbmi_ext->ref_mvs[rf[1]][1].as_int !=
9357 frame_mv[GLOBALMV][rf[1]].as_int)
Jingning Han731af492016-11-17 11:53:23 -08009358 mbmi_ext->mode_context[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET);
9359 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009360 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009361
Yue Chen5329a2b2017-02-28 17:33:00 +08009362 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
Jingning Hanad586b92017-05-23 10:24:57 -07009363
Yue Chenf7ba6472017-04-19 11:08:58 -07009364 if (check_num_overlappable_neighbors(mbmi) &&
9365 is_motion_variation_allowed_bsize(bsize)) {
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009366 av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
9367 args.above_pred_buf, dst_width1,
9368 dst_height1, args.above_pred_stride);
Yue Chen5329a2b2017-02-28 17:33:00 +08009369 av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009370 args.left_pred_buf, dst_width2,
9371 dst_height2, args.left_pred_stride);
Jingning Han91d9a792017-04-18 12:01:52 -07009372 av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
9373 mi_col);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009374 calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0],
9375 args.above_pred_stride[0], args.left_pred_buf[0],
9376 args.left_pred_stride[0]);
Yue Chen5329a2b2017-02-28 17:33:00 +08009377 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009378
9379 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
9380 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
Zoe Liue9b15e22017-07-19 15:53:01 -07009381 // Skip checking missing references in both single and compound reference
9382 // modes. Note that a mode will be skipped iff both reference frames
9383 // are masked out.
Zoe Liuc082bbc2017-05-17 13:31:37 -07009384 ref_frame_skip_mask[0] |= (1 << ref_frame);
9385 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009386 } else {
9387 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
9388 // Skip fixed mv modes for poor references
9389 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
9390 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
9391 break;
9392 }
9393 }
9394 }
9395 // If the segment reference frame feature is enabled....
9396 // then do nothing if the current ref frame is not allowed..
9397 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
9398 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
9399 ref_frame_skip_mask[0] |= (1 << ref_frame);
9400 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9401 }
9402 }
9403
9404 // Disable this drop out case if the ref frame
9405 // segment level feature is enabled for this segment. This is to
9406 // prevent the possibility that we end up unable to pick any mode.
9407 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009408 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
Yaowu Xuc27fc142016-08-22 16:08:15 -07009409 // unless ARNR filtering is enabled in which case we want
9410 // an unfiltered alternative. We allow near/nearest as well
9411 // because they may result in zero-zero MVs but be cheaper.
9412 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
Sarah Parkere5299862016-08-16 14:57:37 -07009413 int_mv zeromv;
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02009414 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << LAST2_FRAME) |
9415 (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) |
9416 (1 << ALTREF2_FRAME) | (1 << GOLDEN_FRAME);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009417 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
9418 // TODO(zoeliu): To further explore whether following needs to be done for
9419 // BWDREF_FRAME as well.
9420 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
David Barkercdcac6d2016-12-01 17:04:16 +00009421 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME],
Sarah Parkerae7c4582017-02-28 16:30:30 -08009422 cm->allow_high_precision_mv, bsize,
RogerZhou3b635242017-09-19 10:06:46 -07009423 mi_col, mi_row, 0
9424#if CONFIG_AMVR
9425 ,
RogerZhou10a03802017-10-26 11:49:48 -07009426 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07009427#endif
9428 )
David Barkercdcac6d2016-12-01 17:04:16 +00009429 .as_int;
Sarah Parkere5299862016-08-16 14:57:37 -07009430 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009431 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
Sarah Parkere5299862016-08-16 14:57:37 -07009432 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009433 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
Sarah Parkere5299862016-08-16 14:57:37 -07009434 if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009435 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
Sarah Parkere5299862016-08-16 14:57:37 -07009436 if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009437 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009438 }
9439 }
9440
9441 if (cpi->rc.is_src_frame_alt_ref) {
9442 if (sf->alt_ref_search_fp) {
9443 assert(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
9444 mode_skip_mask[ALTREF_FRAME] = 0;
9445 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
9446 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
9447 }
9448 }
9449
9450 if (sf->alt_ref_search_fp)
9451 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
9452 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
9453 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
9454
9455 if (sf->adaptive_mode_search) {
9456 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
9457 cpi->rc.frames_since_golden >= 3)
Yaowu Xu36bad472017-05-16 18:29:53 -07009458 if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
Yaowu Xuc27fc142016-08-22 16:08:15 -07009459 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
9460 }
9461
9462 if (bsize > sf->max_intra_bsize) {
9463 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
9464 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
9465 }
9466
9467 mode_skip_mask[INTRA_FRAME] |=
9468 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
9469
9470 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
9471 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
9472 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
9473
9474 midx = sf->schedule_mode_search ? mode_skip_start : 0;
9475 while (midx > 4) {
9476 uint8_t end_pos = 0;
9477 for (i = 5; i < midx; ++i) {
9478 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
9479 uint8_t tmp = mode_map[i];
9480 mode_map[i] = mode_map[i - 1];
9481 mode_map[i - 1] = tmp;
9482 end_pos = i;
9483 }
9484 }
9485 midx = end_pos;
9486 }
9487
9488 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
9489 x->use_default_intra_tx_type = 1;
9490 else
9491 x->use_default_intra_tx_type = 0;
9492
9493 if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
9494 x->use_default_inter_tx_type = 1;
9495 else
9496 x->use_default_inter_tx_type = 0;
Yushin Chod0b77ac2017-10-20 17:33:16 -07009497
Yaowu Xuc27fc142016-08-22 16:08:15 -07009498 for (i = 0; i < MB_MODE_COUNT; ++i)
9499 for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
9500 modelled_rd[i][ref_frame] = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009501
Zoe Liuf40a9572017-10-13 12:37:19 -07009502#if CONFIG_EXT_SKIP
9503 x->skip_mode_rdcost = -1;
9504 x->skip_mode_index = -1;
9505#endif // CONFIG_EXT_SKIP
9506
Yaowu Xuc27fc142016-08-22 16:08:15 -07009507 for (midx = 0; midx < MAX_MODES; ++midx) {
9508 int mode_index;
9509 int mode_excluded = 0;
9510 int64_t this_rd = INT64_MAX;
9511 int disable_skip = 0;
9512 int compmode_cost = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009513 int rate2 = 0, rate_y = 0, rate_uv = 0;
9514 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
9515 int skippable = 0;
9516 int this_skip2 = 0;
9517 int64_t total_sse = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009518 uint8_t ref_frame_type;
Yushin Chod0b77ac2017-10-20 17:33:16 -07009519
Yaowu Xuc27fc142016-08-22 16:08:15 -07009520 mode_index = mode_map[midx];
Zoe Liuf40a9572017-10-13 12:37:19 -07009521#if CONFIG_EXT_SKIP
9522 x->skip_mode_index_candidate = mode_index;
9523#endif // CONFIG_EXT_SKIP
Yaowu Xuf883b422016-08-30 14:01:10 -07009524 this_mode = av1_mode_order[mode_index].mode;
9525 ref_frame = av1_mode_order[mode_index].ref_frame[0];
9526 second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
Yaowu Xu4306b6e2016-09-27 12:55:32 -07009527 mbmi->ref_mv_idx = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009528
Yaowu Xuc27fc142016-08-22 16:08:15 -07009529 if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
9530 // Mode must by compatible
Debargha Mukherjee37f6fe62017-02-10 21:44:13 -08009531 if (!is_interintra_allowed_mode(this_mode)) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009532 if (!is_interintra_allowed_bsize(bsize)) continue;
9533 }
9534
9535 if (is_inter_compound_mode(this_mode)) {
9536 frame_mv[this_mode][ref_frame].as_int =
9537 frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
9538 frame_mv[this_mode][second_ref_frame].as_int =
9539 frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
9540 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009541
9542 // Look at the reference frame of the best mode so far and set the
9543 // skip mask to look at a subset of the remaining modes.
9544 if (midx == mode_skip_start && best_mode_index >= 0) {
9545 switch (best_mbmode.ref_frame[0]) {
9546 case INTRA_FRAME: break;
9547 case LAST_FRAME:
9548 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
9549 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9550 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009551 case LAST2_FRAME:
9552 ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK;
9553 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9554 break;
9555 case LAST3_FRAME:
9556 ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK;
9557 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9558 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009559 case GOLDEN_FRAME:
9560 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
9561 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9562 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009563 case BWDREF_FRAME:
9564 ref_frame_skip_mask[0] |= BWDREF_FRAME_MODE_MASK;
9565 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9566 break;
Zoe Liue9b15e22017-07-19 15:53:01 -07009567 case ALTREF2_FRAME:
9568 ref_frame_skip_mask[0] |= ALTREF2_FRAME_MODE_MASK;
9569 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9570 break;
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02009571 case ALTREF_FRAME:
9572 ref_frame_skip_mask[0] |= ALTREF_FRAME_MODE_MASK;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009573 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009574 break;
Emil Keyder01770b32017-01-20 18:03:11 -05009575 case NONE_FRAME:
Yaowu Xuc27fc142016-08-22 16:08:15 -07009576 case TOTAL_REFS_PER_FRAME:
9577 assert(0 && "Invalid Reference frame");
9578 break;
9579 }
9580 }
9581
9582 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
Yaowu Xuf883b422016-08-30 14:01:10 -07009583 (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
Yaowu Xuc27fc142016-08-22 16:08:15 -07009584 continue;
9585
9586 if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
9587
9588 // Test best rd so far against threshold for trying this mode.
9589 if (best_mode_skippable && sf->schedule_mode_search)
9590 mode_threshold[mode_index] <<= 1;
9591
9592 if (best_rd < mode_threshold[mode_index]) continue;
9593
Yunqing Wangff4fa062017-04-21 10:56:08 -07009594 // This is only used in motion vector unit test.
9595 if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
9596
Zoe Liuc01dddb2017-11-07 08:44:06 -08009597#if !CONFIG_EXT_COMP_REFS // Changes LL bitstream
Arild Fuldseth (arilfuld)3f429082017-04-28 15:54:28 +02009598 if (cpi->oxcf.pass == 0) {
9599 // Complexity-compression trade-offs
9600 // if (ref_frame == ALTREF_FRAME) continue;
9601 // if (ref_frame == BWDREF_FRAME) continue;
9602 if (second_ref_frame == ALTREF_FRAME) continue;
9603 // if (second_ref_frame == BWDREF_FRAME) continue;
9604 }
Zoe Liuc01dddb2017-11-07 08:44:06 -08009605#endif // !CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07009606 comp_pred = second_ref_frame > INTRA_FRAME;
9607 if (comp_pred) {
9608 if (!cpi->allow_comp_inter_inter) continue;
9609
9610 // Skip compound inter modes if ARF is not available.
9611 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
9612
9613 // Do not allow compound prediction if the segment level reference frame
9614 // feature is in use as in this case there can only be one reference.
9615 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
9616
9617 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
9618 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
9619 continue;
9620
9621 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
9622 } else {
Zoe Liud4d8b862017-12-06 10:56:01 -08009623#if CONFIG_REF_ADAPT
9624 if (ref_frame != INTRA_FRAME) mode_excluded = 0;
9625#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07009626 if (ref_frame != INTRA_FRAME)
9627 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
Zoe Liud4d8b862017-12-06 10:56:01 -08009628#endif // CONFIG_REF_ADAPT
Yaowu Xuc27fc142016-08-22 16:08:15 -07009629 }
9630
9631 if (ref_frame == INTRA_FRAME) {
9632 if (sf->adaptive_mode_search)
9633 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
9634 continue;
9635
9636 if (this_mode != DC_PRED) {
9637 // Disable intra modes other than DC_PRED for blocks with low variance
9638 // Threshold for intra skipping based on source variance
9639 // TODO(debargha): Specialize the threshold for super block sizes
9640 const unsigned int skip_intra_var_thresh = 64;
9641 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
9642 x->source_variance < skip_intra_var_thresh)
9643 continue;
9644 // Only search the oblique modes if the best so far is
9645 // one of the neighboring directional modes
9646 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
Urvang Joshi96d1c0a2017-10-10 13:15:32 -07009647 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009648 if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
9649 continue;
9650 }
9651 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
9652 if (conditional_skipintra(this_mode, best_intra_mode)) continue;
9653 }
9654 }
David Barkercf3d0b02016-11-10 10:14:49 +00009655 } else if (cm->global_motion[ref_frame].wmtype == IDENTITY &&
Sarah Parkere5299862016-08-16 14:57:37 -07009656 (!comp_pred ||
David Barkercf3d0b02016-11-10 10:14:49 +00009657 cm->global_motion[second_ref_frame].wmtype == IDENTITY)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009658 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
Yue Chenb23d00a2017-07-28 17:01:21 -07009659 if (!check_best_zero_mv(cpi, x, mbmi_ext->mode_context,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02009660 mbmi_ext->compound_mode_context, frame_mv,
9661 this_mode, ref_frames, bsize, -1, mi_row, mi_col))
Yaowu Xuc27fc142016-08-22 16:08:15 -07009662 continue;
9663 }
9664
9665 mbmi->mode = this_mode;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04009666 mbmi->uv_mode = UV_DC_PRED;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009667 mbmi->ref_frame[0] = ref_frame;
9668 mbmi->ref_frame[1] = second_ref_frame;
9669 pmi->palette_size[0] = 0;
9670 pmi->palette_size[1] = 0;
hui su5db97432016-10-14 16:10:14 -07009671#if CONFIG_FILTER_INTRA
9672 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9673 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9674#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009675 // Evaluate all sub-pel filters irrespective of whether we can use
9676 // them for this frame.
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07009677
9678 set_default_interp_filters(mbmi, cm->interp_filter);
9679
Yaowu Xuc27fc142016-08-22 16:08:15 -07009680 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
Yue Chencb60b182016-10-13 15:18:22 -07009681 mbmi->motion_mode = SIMPLE_TRANSLATION;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009682
9683 x->skip = 0;
9684 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
9685
9686 // Select prediction reference frames.
9687 for (i = 0; i < MAX_MB_PLANE; i++) {
9688 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
9689 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
9690 }
9691
Debargha Mukherjeecb603792016-10-04 13:10:23 -07009692 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009693
Jingning Hancf842ad2017-10-25 12:02:30 -07009694#if CONFIG_FRAME_MARKER
9695 if (sf->selective_ref_frame) {
Debargha Mukherjee06b40cc2017-11-02 13:39:39 -07009696 if (sf->selective_ref_frame == 2) {
9697 if (mbmi->ref_frame[0] == ALTREF2_FRAME ||
9698 mbmi->ref_frame[1] == ALTREF2_FRAME)
9699 if (cm->cur_frame->alt2_frame_offset < cm->frame_offset) continue;
9700 if (mbmi->ref_frame[0] == BWDREF_FRAME ||
9701 mbmi->ref_frame[1] == BWDREF_FRAME)
9702 if (cm->cur_frame->bwd_frame_offset < cm->frame_offset) continue;
9703 }
Jingning Hancf842ad2017-10-25 12:02:30 -07009704 if (mbmi->ref_frame[0] == LAST3_FRAME ||
9705 mbmi->ref_frame[1] == LAST3_FRAME)
9706 if (cm->cur_frame->lst3_frame_offset <= cm->cur_frame->gld_frame_offset)
9707 continue;
9708 if (mbmi->ref_frame[0] == LAST2_FRAME ||
9709 mbmi->ref_frame[1] == LAST2_FRAME)
9710 if (cm->cur_frame->lst2_frame_offset <= cm->cur_frame->gld_frame_offset)
9711 continue;
9712 }
Zoe Liu77fb5be2017-11-02 14:36:19 -07009713
9714 // One-sided compound is used only when all reference frames are one-sided.
9715 if (sf->selective_ref_frame && comp_pred && !cpi->all_one_sided_refs) {
9716 unsigned int ref_offsets[2];
9717 for (i = 0; i < 2; ++i) {
9718 const int buf_idx = cm->frame_refs[mbmi->ref_frame[i] - LAST_FRAME].idx;
9719 assert(buf_idx >= 0);
9720 ref_offsets[i] = cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
9721 }
9722 if ((ref_offsets[0] <= cm->frame_offset &&
9723 ref_offsets[1] <= cm->frame_offset) ||
9724 (ref_offsets[0] > cm->frame_offset &&
9725 ref_offsets[1] > cm->frame_offset))
9726 continue;
9727 }
9728#endif // CONFIG_FRAME_MARKER
Jingning Hancf842ad2017-10-25 12:02:30 -07009729
Yaowu Xuc27fc142016-08-22 16:08:15 -07009730 if (ref_frame == INTRA_FRAME) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009731 RD_STATS rd_stats_y;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009732 TX_SIZE uv_tx;
9733 struct macroblockd_plane *const pd = &xd->plane[1];
9734#if CONFIG_EXT_INTRA
hui su45dc5972016-12-08 17:42:50 -08009735 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
Joe Young830d4ce2017-05-30 17:48:13 -07009736 if (is_directional_mode && av1_use_angle_delta(bsize)) {
hui su45dc5972016-12-08 17:42:50 -08009737 int rate_dummy;
hui su9a416f52017-01-13 11:37:53 -08009738 int64_t model_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009739 if (!angle_stats_ready) {
9740 const int src_stride = x->plane[0].src.stride;
9741 const uint8_t *src = x->plane[0].src.buf;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009742#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009743 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
hui su9cc10652017-04-27 17:22:07 -07009744 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07009745 directional_mode_skip_mask);
9746 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009747#endif // CONFIG_HIGHBITDEPTH
hui su9cc10652017-04-27 17:22:07 -07009748 angle_estimation(src, src_stride, rows, cols, bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07009749 directional_mode_skip_mask);
9750 angle_stats_ready = 1;
9751 }
9752 if (directional_mode_skip_mask[mbmi->mode]) continue;
hui su45dc5972016-12-08 17:42:50 -08009753 rd_stats_y.rate = INT_MAX;
Yue Chenb0f808b2017-04-26 11:55:14 -07009754 rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rd_stats_y, bsize,
9755 intra_mode_cost[mbmi->mode], best_rd,
9756 &model_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009757 } else {
9758 mbmi->angle_delta[0] = 0;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009759 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009760 }
9761#else
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009762 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
hui su45dc5972016-12-08 17:42:50 -08009763#endif // CONFIG_EXT_INTRA
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009764 rate_y = rd_stats_y.rate;
9765 distortion_y = rd_stats_y.dist;
9766 skippable = rd_stats_y.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009767
hui su5db97432016-10-14 16:10:14 -07009768#if CONFIG_FILTER_INTRA
Yue Chen18f6c152017-11-06 11:23:47 -08009769 if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id] &&
9770 av1_filter_intra_allowed_bsize(mbmi->sb_type)) {
Yue Chen57b8ff62017-10-10 23:37:31 -07009771 RD_STATS rd_stats_y_fi;
9772 int filter_intra_selected_flag = 0;
9773 TX_SIZE best_tx_size = mbmi->tx_size;
9774 TX_TYPE best_tx_type = mbmi->tx_type;
9775 FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
Yue Chen95e13e22017-11-01 23:56:35 -07009776 int64_t best_rd_tmp = INT64_MAX;
9777 if (rate_y != INT_MAX &&
9778 av1_filter_intra_allowed_txsize(best_tx_size)) {
Yue Chen4eba69b2017-11-09 22:37:35 -08009779 best_rd_tmp = RDCOST(x->rdmult,
9780 rate_y + x->filter_intra_cost[mbmi->tx_size][0] +
9781 intra_mode_cost[mbmi->mode],
9782 distortion_y);
Yue Chen95e13e22017-11-01 23:56:35 -07009783 }
Yue Chen57b8ff62017-10-10 23:37:31 -07009784
9785 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
9786 for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED;
9787 fi_mode < FILTER_INTRA_MODES; ++fi_mode) {
9788 int this_rate_tmp;
9789 int64_t this_rd_tmp;
9790 mbmi->filter_intra_mode_info.filter_intra_mode[0] = fi_mode;
9791
9792 super_block_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd);
9793 if (rd_stats_y_fi.rate == INT_MAX) continue;
9794
Yue Chen4eba69b2017-11-09 22:37:35 -08009795 this_rate_tmp = rd_stats_y_fi.rate +
9796 x->filter_intra_cost[mbmi->tx_size][1] +
9797 x->filter_intra_mode_cost[0][fi_mode] +
9798 intra_mode_cost[mbmi->mode];
Yue Chen57b8ff62017-10-10 23:37:31 -07009799 this_rd_tmp = RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
9800
9801 if (this_rd_tmp < best_rd_tmp) {
9802 best_tx_size = mbmi->tx_size;
9803 best_tx_type = mbmi->tx_type;
9804 best_fi_mode = fi_mode;
9805 rd_stats_y = rd_stats_y_fi;
9806 rate_y = rd_stats_y_fi.rate;
9807 distortion_y = rd_stats_y_fi.dist;
9808 skippable = rd_stats_y_fi.skip;
9809 filter_intra_selected_flag = 1;
9810 best_rd_tmp = this_rd_tmp;
9811 }
9812 }
9813
9814 mbmi->tx_size = best_tx_size;
9815 mbmi->tx_type = best_tx_type;
9816 if (filter_intra_selected_flag) {
9817 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
9818 mbmi->filter_intra_mode_info.filter_intra_mode[0] = best_fi_mode;
9819 } else {
9820 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9821 }
9822 }
9823#endif
9824
9825 if (rate_y == INT_MAX) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009826
Debargha Mukherjee2f123402016-08-30 17:43:38 -07009827 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][pd->subsampling_x]
9828 [pd->subsampling_y];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009829 if (rate_uv_intra[uv_tx] == INT_MAX) {
Luc Trudeau9d4cbb82017-07-27 17:01:32 -04009830 choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
Urvang Joshi368fbc92016-10-17 16:31:34 -07009831 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
9832 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
hui su9bc1d8d2017-03-24 12:36:03 -07009833 if (try_palette) pmi_uv[uv_tx] = *pmi;
Urvang Joshib100db72016-10-12 16:28:56 -07009834
Yaowu Xuc27fc142016-08-22 16:08:15 -07009835#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009836 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
9837#endif // CONFIG_EXT_INTRA
9838 }
9839
9840 rate_uv = rate_uv_tokenonly[uv_tx];
Urvang Joshi368fbc92016-10-17 16:31:34 -07009841 distortion_uv = dist_uvs[uv_tx];
9842 skippable = skippable && skip_uvs[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009843 mbmi->uv_mode = mode_uv[uv_tx];
hui su9bc1d8d2017-03-24 12:36:03 -07009844 if (try_palette) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009845 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
9846 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
9847 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
9848 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
9849 }
Urvang Joshib100db72016-10-12 16:28:56 -07009850
Yaowu Xuc27fc142016-08-22 16:08:15 -07009851#if CONFIG_EXT_INTRA
9852 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009853#endif // CONFIG_EXT_INTRA
9854
Jingning Han36fe3202017-02-20 22:31:49 -08009855 rate2 = rate_y + intra_mode_cost[mbmi->mode];
9856 if (!x->skip_chroma_rd)
Yue Chenb23d00a2017-07-28 17:01:21 -07009857 rate2 += rate_uv + x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
Jingning Han36fe3202017-02-20 22:31:49 -08009858
hui su9bc1d8d2017-03-24 12:36:03 -07009859 if (try_palette && mbmi->mode == DC_PRED) {
Yue Chendab2ca92017-10-16 17:48:48 -07009860 rate2 += x->palette_y_mode_cost[bsize - BLOCK_8X8][palette_ctx][0];
hui su9bc1d8d2017-03-24 12:36:03 -07009861 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009862
Rupert Swarbrickfcff0b22017-10-05 09:26:04 +01009863 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009864 // super_block_yrd above includes the cost of the tx_size in the
9865 // tokenonly rate, but for intra blocks, tx_size is always coded
9866 // (prediction granularity), so we account for it in the full rate,
9867 // not the tokenonly rate.
Yue Chen3dd03e32017-10-17 15:39:52 -07009868 rate_y -= tx_size_cost(cm, x, bsize, mbmi->tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009869 }
9870#if CONFIG_EXT_INTRA
9871 if (is_directional_mode) {
Joe Young830d4ce2017-05-30 17:48:13 -07009872 if (av1_use_angle_delta(bsize)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07009873#if CONFIG_EXT_INTRA_MOD
9874 rate2 += x->angle_delta_cost[mbmi->mode - V_PRED]
9875 [mbmi->angle_delta[0] + MAX_ANGLE_DELTA];
9876#else
Joe Young830d4ce2017-05-30 17:48:13 -07009877 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
9878 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
Joe Young3ca43bf2017-10-06 15:12:46 -07009879#endif // CONFIG_EXT_INTRA_MOD
Joe Young830d4ce2017-05-30 17:48:13 -07009880 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009881 }
Luc Trudeauf24a35f2017-09-11 11:56:33 -04009882 if (av1_is_directional_mode(get_uv_mode(mbmi->uv_mode), bsize) &&
Joe Young830d4ce2017-05-30 17:48:13 -07009883 av1_use_angle_delta(bsize)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07009884#if CONFIG_EXT_INTRA_MOD
9885 rate2 += x->angle_delta_cost[mbmi->uv_mode - V_PRED]
9886 [mbmi->angle_delta[1] + MAX_ANGLE_DELTA];
9887#else
hui su0a6731f2017-04-26 15:23:47 -07009888 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
9889 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
Joe Young3ca43bf2017-10-06 15:12:46 -07009890#endif // CONFIG_EXT_INTRA_MOD
Yaowu Xuc27fc142016-08-22 16:08:15 -07009891 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009892#endif // CONFIG_EXT_INTRA
hui su5db97432016-10-14 16:10:14 -07009893#if CONFIG_FILTER_INTRA
Yue Chen4eba69b2017-11-09 22:37:35 -08009894 if (mbmi->mode == DC_PRED &&
9895 av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
hui su5db97432016-10-14 16:10:14 -07009896 rate2 +=
Yue Chen4eba69b2017-11-09 22:37:35 -08009897 x->filter_intra_cost[mbmi->tx_size][mbmi->filter_intra_mode_info
9898 .use_filter_intra_mode[0]];
hui su5db97432016-10-14 16:10:14 -07009899 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
Yue Chen57b8ff62017-10-10 23:37:31 -07009900 rate2 += x->filter_intra_mode_cost[0][mbmi->filter_intra_mode_info
9901 .filter_intra_mode[0]];
hui su5db97432016-10-14 16:10:14 -07009902 }
9903 }
hui su5db97432016-10-14 16:10:14 -07009904#endif // CONFIG_FILTER_INTRA
Urvang Joshi96d1c0a2017-10-10 13:15:32 -07009905 if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009906 rate2 += intra_cost_penalty;
9907 distortion2 = distortion_y + distortion_uv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009908 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009909 int_mv backup_ref_mv[2];
9910
Debargha Mukherjee0f248c42017-09-07 12:40:18 -07009911 if (!is_comp_ref_allowed(bsize) && mbmi->ref_frame[1] > INTRA_FRAME)
9912 continue;
Jingning Hanc41a5492017-02-24 11:18:52 -08009913
Yaowu Xuc27fc142016-08-22 16:08:15 -07009914 backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
9915 if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009916 if (second_ref_frame == INTRA_FRAME) {
9917 if (best_single_inter_ref != ref_frame) continue;
Debargha Mukherjeecb603792016-10-04 13:10:23 -07009918 mbmi->interintra_mode = intra_to_interintra_mode[best_intra_mode];
hui su5db97432016-10-14 16:10:14 -07009919// TODO(debargha|geza.lore):
9920// Should we use ext_intra modes for interintra?
Yaowu Xuc27fc142016-08-22 16:08:15 -07009921#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009922 mbmi->angle_delta[0] = 0;
9923 mbmi->angle_delta[1] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009924#endif // CONFIG_EXT_INTRA
hui su5db97432016-10-14 16:10:14 -07009925#if CONFIG_FILTER_INTRA
9926 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9927 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9928#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009929 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009930 mbmi->ref_mv_idx = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -07009931 ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009932
David Barker404b2e82017-03-27 13:07:47 +01009933 if (comp_pred) {
9934 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
David Barker3dfba992017-04-03 16:10:09 +01009935 int ref_mv_idx = 0;
9936 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
9937 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
9938 // mbmi->ref_mv_idx (like NEWMV)
9939 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
9940 ref_mv_idx = 1;
9941
9942 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
9943 int_mv this_mv =
9944 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
David Barker404b2e82017-03-27 13:07:47 +01009945 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9946 xd->n8_h << MI_SIZE_LOG2, xd);
9947 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
9948 }
David Barker3dfba992017-04-03 16:10:09 +01009949 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
9950 int_mv this_mv =
9951 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
David Barker404b2e82017-03-27 13:07:47 +01009952 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9953 xd->n8_h << MI_SIZE_LOG2, xd);
9954 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
9955 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009956 }
David Barker404b2e82017-03-27 13:07:47 +01009957 } else {
Zoe Liu1157d502017-04-30 07:57:14 -07009958 if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
David Barker404b2e82017-03-27 13:07:47 +01009959 int ref;
9960 for (ref = 0; ref < 1 + comp_pred; ++ref) {
9961 int_mv this_mv =
9962 (ref == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
9963 : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9964 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9965 xd->n8_h << MI_SIZE_LOG2, xd);
9966 mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
9967 }
9968 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009969 }
Cheng Chenca6958c2017-10-10 14:00:50 -07009970#if CONFIG_JNT_COMP
9971 {
9972 int cum_rate = rate2;
9973 MB_MODE_INFO backup_mbmi = *mbmi;
9974
9975 int_mv backup_frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
9976 int_mv backup_single_newmv[TOTAL_REFS_PER_FRAME];
9977 int backup_single_newmv_rate[TOTAL_REFS_PER_FRAME];
9978 int64_t backup_modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
9979
9980 memcpy(backup_frame_mv, frame_mv, sizeof(frame_mv));
9981 memcpy(backup_single_newmv, single_newmv, sizeof(single_newmv));
9982 memcpy(backup_single_newmv_rate, single_newmv_rate,
9983 sizeof(single_newmv_rate));
9984 memcpy(backup_modelled_rd, modelled_rd, sizeof(modelled_rd));
9985
9986 InterpFilters backup_interp_filters = mbmi->interp_filters;
9987
9988 for (int comp_idx = 0; comp_idx < 1 + has_second_ref(mbmi);
9989 ++comp_idx) {
9990 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
9991 av1_init_rd_stats(&rd_stats);
9992 av1_init_rd_stats(&rd_stats_y);
9993 av1_init_rd_stats(&rd_stats_uv);
9994 rd_stats.rate = cum_rate;
9995
9996 memcpy(frame_mv, backup_frame_mv, sizeof(frame_mv));
9997 memcpy(single_newmv, backup_single_newmv, sizeof(single_newmv));
9998 memcpy(single_newmv_rate, backup_single_newmv_rate,
9999 sizeof(single_newmv_rate));
10000 memcpy(modelled_rd, backup_modelled_rd, sizeof(modelled_rd));
10001
10002 mbmi->interp_filters = backup_interp_filters;
10003
10004 int dummy_disable_skip = 0;
10005
10006 // Point to variables that are maintained between loop iterations
10007 args.single_newmv = single_newmv;
10008 args.single_newmv_rate = single_newmv_rate;
10009 args.modelled_rd = modelled_rd;
10010 mbmi->compound_idx = comp_idx;
10011
10012 int64_t tmp_rd = handle_inter_mode(
10013 cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
10014 &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
10015
10016 if (tmp_rd < INT64_MAX) {
10017 if (RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist) <
10018 RDCOST(x->rdmult, 0, rd_stats.sse))
10019 tmp_rd =
10020 RDCOST(x->rdmult, rd_stats.rate + x->skip_cost[skip_ctx][0],
10021 rd_stats.dist);
10022 else
10023 tmp_rd = RDCOST(x->rdmult,
10024 rd_stats.rate + x->skip_cost[skip_ctx][1] -
10025 rd_stats_y.rate - rd_stats_uv.rate,
10026 rd_stats.sse);
10027 }
10028
10029 if (tmp_rd < this_rd) {
10030 this_rd = tmp_rd;
10031 rate2 = rd_stats.rate;
10032 skippable = rd_stats.skip;
10033 distortion2 = rd_stats.dist;
10034 total_sse = rd_stats.sse;
10035 rate_y = rd_stats_y.rate;
10036 rate_uv = rd_stats_uv.rate;
10037 disable_skip = dummy_disable_skip;
10038 backup_mbmi = *mbmi;
Jingning Han5d0320f2017-11-07 08:49:16 -080010039 for (i = 0; i < MAX_MB_PLANE; ++i)
10040 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
10041 sizeof(uint8_t) * ctx->num_4x4_blk);
Cheng Chenca6958c2017-10-10 14:00:50 -070010042 }
10043 }
10044 *mbmi = backup_mbmi;
Jingning Han5d0320f2017-11-07 08:49:16 -080010045 for (i = 0; i < MAX_MB_PLANE; ++i)
10046 memcpy(x->blk_skip[i], x->blk_skip_drl[i],
10047 sizeof(uint8_t) * ctx->num_4x4_blk);
Cheng Chenca6958c2017-10-10 14:00:50 -070010048 }
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010049#else // CONFIG_JNT_COMP
Angie Chiang76159122016-11-09 12:13:22 -080010050 {
10051 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
10052 av1_init_rd_stats(&rd_stats);
10053 rd_stats.rate = rate2;
Fergus Simpson073c6f32017-02-17 12:13:48 -080010054
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010055 // Point to variables that are maintained between loop iterations
10056 args.single_newmv = single_newmv;
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010057 args.single_newmv_rate = single_newmv_rate;
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010058 args.modelled_rd = modelled_rd;
Fergus Simpson3424c2d2017-03-09 11:48:15 -080010059 this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
10060 &rd_stats_uv, &disable_skip, frame_mv,
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010061 mi_row, mi_col, &args, best_rd);
Angie Chiang76159122016-11-09 12:13:22 -080010062 rate2 = rd_stats.rate;
10063 skippable = rd_stats.skip;
10064 distortion2 = rd_stats.dist;
10065 total_sse = rd_stats.sse;
10066 rate_y = rd_stats_y.rate;
10067 rate_uv = rd_stats_uv.rate;
10068 }
Cheng Chenca6958c2017-10-10 14:00:50 -070010069#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -070010070
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010071 // TODO(jingning): This needs some refactoring to improve code quality
10072 // and reduce redundant steps.
David Barker3dfba992017-04-03 16:10:09 +010010073 if ((have_nearmv_in_inter_mode(mbmi->mode) &&
David Barker404b2e82017-03-27 13:07:47 +010010074 mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
10075 ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010076 mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010077 int_mv backup_mv = frame_mv[NEARMV][ref_frame];
10078 MB_MODE_INFO backup_mbmi = *mbmi;
10079 int backup_skip = x->skip;
10080 int64_t tmp_ref_rd = this_rd;
10081 int ref_idx;
10082
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +020010083 // TODO(jingning): This should be deprecated shortly.
David Barker3dfba992017-04-03 16:10:09 +010010084 int idx_offset = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010085 int ref_set =
Yaowu Xuf883b422016-08-30 14:01:10 -070010086 AOMMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010087
10088 uint8_t drl_ctx =
Yaowu Xuf883b422016-08-30 14:01:10 -070010089 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx_offset);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010090 // Dummy
10091 int_mv backup_fmv[2];
10092 backup_fmv[0] = frame_mv[NEWMV][ref_frame];
10093 if (comp_pred) backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
10094
Yue Chenb23d00a2017-07-28 17:01:21 -070010095 rate2 += (rate2 < INT_MAX ? x->drl_mode_cost0[drl_ctx][0] : 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010096
10097 if (this_rd < INT64_MAX) {
Urvang Joshi70006e42017-06-14 16:08:55 -070010098 if (RDCOST(x->rdmult, rate_y + rate_uv, distortion2) <
10099 RDCOST(x->rdmult, 0, total_sse))
10100 tmp_ref_rd = RDCOST(
Zoe Liu1eed2df2017-10-16 17:13:15 -070010101 x->rdmult, rate2 + x->skip_cost[av1_get_skip_context(xd)][0],
Urvang Joshi70006e42017-06-14 16:08:55 -070010102 distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010103 else
clang-format4eafefe2017-09-04 12:51:20 -070010104 tmp_ref_rd =
10105 RDCOST(x->rdmult,
Zoe Liu1eed2df2017-10-16 17:13:15 -070010106 rate2 + x->skip_cost[av1_get_skip_context(xd)][1] -
clang-format4eafefe2017-09-04 12:51:20 -070010107 rate_y - rate_uv,
10108 total_sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010109 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010110 for (i = 0; i < MAX_MB_PLANE; ++i)
10111 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
10112 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010113
Cheng Chenca6958c2017-10-10 14:00:50 -070010114#if CONFIG_JNT_COMP
10115 for (int sidx = 0; sidx < ref_set * (1 + has_second_ref(mbmi)); ++sidx)
10116#else
10117 for (ref_idx = 0; ref_idx < ref_set; ++ref_idx)
10118#endif // CONFIG_JNT_COMP
10119 {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010120 int64_t tmp_alt_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010121 int dummy_disable_skip = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010122 int_mv cur_mv;
Angie Chiang76159122016-11-09 12:13:22 -080010123 RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
Cheng Chenca6958c2017-10-10 14:00:50 -070010124#if CONFIG_JNT_COMP
10125 ref_idx = sidx;
10126 if (has_second_ref(mbmi)) ref_idx /= 2;
10127 mbmi->compound_idx = sidx % 2;
10128#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -070010129
Yaowu Xu5bfbfdf2016-11-22 16:43:34 -080010130 av1_invalid_rd_stats(&tmp_rd_stats);
Yushin Choc0f6bf22017-06-09 16:08:02 -070010131
Jingning Han52617b22017-04-11 12:50:08 -070010132 x->skip = 0;
Yaowu Xu5bfbfdf2016-11-22 16:43:34 -080010133
Yaowu Xuc27fc142016-08-22 16:08:15 -070010134 mbmi->ref_mv_idx = 1 + ref_idx;
10135
David Barker3dfba992017-04-03 16:10:09 +010010136 if (comp_pred) {
10137 int ref_mv_idx = mbmi->ref_mv_idx;
10138 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
10139 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
10140 // mbmi->ref_mv_idx (like NEWMV)
10141 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
10142 ref_mv_idx = 1 + mbmi->ref_mv_idx;
10143
10144 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
10145 int_mv this_mv =
10146 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
10147 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10148 xd->n8_h << MI_SIZE_LOG2, xd);
10149 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
10150 } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV) {
10151 int_mv this_mv =
10152 mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
10153 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10154 xd->n8_h << MI_SIZE_LOG2, xd);
10155 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
10156 }
10157
10158 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
10159 int_mv this_mv =
10160 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
10161 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10162 xd->n8_h << MI_SIZE_LOG2, xd);
10163 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
10164 } else if (compound_ref1_mode(mbmi->mode) == NEARESTMV) {
10165 int_mv this_mv =
10166 mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
10167 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10168 xd->n8_h << MI_SIZE_LOG2, xd);
10169 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
10170 }
10171 } else {
Jingning Hanc3ef32a2017-12-04 09:56:53 -080010172 int_mv this_mv =
10173 mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
10174 .this_mv;
10175 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10176 xd->n8_h << MI_SIZE_LOG2, xd);
10177 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010178 }
10179
10180 cur_mv =
10181 mbmi_ext->ref_mv_stack[ref_frame][mbmi->ref_mv_idx + idx_offset]
10182 .this_mv;
10183 clamp_mv2(&cur_mv.as_mv, xd);
10184
Alex Converse0fa0f422017-04-24 12:51:14 -070010185 if (!mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010186 int_mv dummy_single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010187 int dummy_single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -070010188
10189 frame_mv[NEARMV][ref_frame] = cur_mv;
Angie Chiang76159122016-11-09 12:13:22 -080010190 av1_init_rd_stats(&tmp_rd_stats);
Yushin Cho8e75e8b2017-09-12 16:33:28 -070010191
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010192 // Point to variables that are not maintained between iterations
10193 args.single_newmv = dummy_single_newmv;
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010194 args.single_newmv_rate = dummy_single_newmv_rate;
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010195 args.modelled_rd = NULL;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010196 tmp_alt_rd = handle_inter_mode(
10197 cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv,
10198 &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010199 // Prevent pointers from escaping local scope
10200 args.single_newmv = NULL;
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010201 args.single_newmv_rate = NULL;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010202 }
10203
10204 for (i = 0; i < mbmi->ref_mv_idx; ++i) {
10205 uint8_t drl1_ctx = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -070010206 drl1_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
10207 i + idx_offset);
Angie Chiang76159122016-11-09 12:13:22 -080010208 tmp_rd_stats.rate +=
Yue Chenb23d00a2017-07-28 17:01:21 -070010209 (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][1]
Angie Chiang76159122016-11-09 12:13:22 -080010210 : 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010211 }
10212
10213 if (mbmi_ext->ref_mv_count[ref_frame_type] >
10214 mbmi->ref_mv_idx + idx_offset + 1 &&
10215 ref_idx < ref_set - 1) {
10216 uint8_t drl1_ctx =
Yaowu Xuf883b422016-08-30 14:01:10 -070010217 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
10218 mbmi->ref_mv_idx + idx_offset);
Yaowu Xu83ed6fe2016-11-22 11:15:29 -080010219 tmp_rd_stats.rate +=
Yue Chenb23d00a2017-07-28 17:01:21 -070010220 (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][0]
Yaowu Xu83ed6fe2016-11-22 11:15:29 -080010221 : 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010222 }
10223
10224 if (tmp_alt_rd < INT64_MAX) {
Urvang Joshi70006e42017-06-14 16:08:55 -070010225 tmp_alt_rd =
10226 RDCOST(x->rdmult, tmp_rd_stats.rate, tmp_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010227 }
10228
10229 if (tmp_ref_rd > tmp_alt_rd) {
Angie Chiang76159122016-11-09 12:13:22 -080010230 rate2 = tmp_rd_stats.rate;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010231 disable_skip = dummy_disable_skip;
Angie Chiang76159122016-11-09 12:13:22 -080010232 distortion2 = tmp_rd_stats.dist;
10233 skippable = tmp_rd_stats.skip;
10234 rate_y = tmp_rd_stats_y.rate;
10235 rate_uv = tmp_rd_stats_uv.rate;
10236 total_sse = tmp_rd_stats.sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010237 this_rd = tmp_alt_rd;
10238 tmp_ref_rd = tmp_alt_rd;
10239 backup_mbmi = *mbmi;
10240 backup_skip = x->skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010241 for (i = 0; i < MAX_MB_PLANE; ++i)
10242 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
10243 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010244 } else {
10245 *mbmi = backup_mbmi;
10246 x->skip = backup_skip;
10247 }
10248 }
10249
10250 frame_mv[NEARMV][ref_frame] = backup_mv;
10251 frame_mv[NEWMV][ref_frame] = backup_fmv[0];
10252 if (comp_pred) frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010253 for (i = 0; i < MAX_MB_PLANE; ++i)
10254 memcpy(x->blk_skip[i], x->blk_skip_drl[i],
10255 sizeof(uint8_t) * ctx->num_4x4_blk);
Cheng Chenca6958c2017-10-10 14:00:50 -070010256#if CONFIG_JNT_COMP
10257 *mbmi = backup_mbmi;
10258#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -070010259 }
10260 mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
10261 if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010262
10263 if (this_rd == INT64_MAX) continue;
10264
Debargha Mukherjee0f248c42017-09-07 12:40:18 -070010265 if (is_comp_ref_allowed(mbmi->sb_type))
Jingning Hanc41a5492017-02-24 11:18:52 -080010266 compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010267
10268 if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
10269 }
10270
Yaowu Xuc27fc142016-08-22 16:08:15 -070010271 // Estimate the reference frame signaling cost and add it
10272 // to the rolling cost variable.
10273 if (comp_pred) {
Zoe Liuc082bbc2017-05-17 13:31:37 -070010274#if CONFIG_EXT_COMP_REFS
10275 rate2 += ref_costs_comp[ref_frame][second_ref_frame];
Sebastien Alaiwan365e6442017-10-16 11:35:00 +020010276#else // !CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -070010277 rate2 += ref_costs_comp[ref_frame];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010278 rate2 += ref_costs_comp[second_ref_frame];
Zoe Liuc082bbc2017-05-17 13:31:37 -070010279#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -070010280 } else {
10281 rate2 += ref_costs_single[ref_frame];
10282 }
10283
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +010010284 if (ref_frame == INTRA_FRAME) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010285 if (skippable) {
10286 // Back out the coefficient coding costs
10287 rate2 -= (rate_y + rate_uv);
10288 rate_y = 0;
10289 rate_uv = 0;
10290 // Cost the skip mb case
Zoe Liu1eed2df2017-10-16 17:13:15 -070010291 rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010292 } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Urvang Joshi70006e42017-06-14 16:08:55 -070010293 if (RDCOST(x->rdmult, rate_y + rate_uv + rate_skip0, distortion2) <
10294 RDCOST(x->rdmult, rate_skip1, total_sse)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010295 // Add in the cost of the no skip flag.
Zoe Liu1eed2df2017-10-16 17:13:15 -070010296 rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010297 } else {
10298 // FIXME(rbultje) make this work for splitmv also
Zoe Liu1eed2df2017-10-16 17:13:15 -070010299 rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010300 distortion2 = total_sse;
10301 assert(total_sse >= 0);
10302 rate2 -= (rate_y + rate_uv);
10303 this_skip2 = 1;
10304 rate_y = 0;
10305 rate_uv = 0;
10306 }
10307 } else {
10308 // Add in the cost of the no skip flag.
Zoe Liu1eed2df2017-10-16 17:13:15 -070010309 rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010310 }
10311
10312 // Calculate the final RD estimate for this mode.
Urvang Joshi70006e42017-06-14 16:08:55 -070010313 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010314 } else {
10315 this_skip2 = mbmi->skip;
Urvang Joshi70006e42017-06-14 16:08:55 -070010316 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010317 if (this_skip2) {
10318 rate_y = 0;
10319 rate_uv = 0;
10320 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010321 }
10322
Yaowu Xuc27fc142016-08-22 16:08:15 -070010323 if (ref_frame == INTRA_FRAME) {
10324 // Keep record of best intra rd
10325 if (this_rd < best_intra_rd) {
10326 best_intra_rd = this_rd;
10327 best_intra_mode = mbmi->mode;
10328 }
Emil Keyder01770b32017-01-20 18:03:11 -050010329 } else if (second_ref_frame == NONE_FRAME) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010330 if (this_rd < best_single_inter_rd) {
10331 best_single_inter_rd = this_rd;
10332 best_single_inter_ref = mbmi->ref_frame[0];
10333 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010334 }
10335
10336 if (!disable_skip && ref_frame == INTRA_FRAME) {
10337 for (i = 0; i < REFERENCE_MODES; ++i)
Yaowu Xuf883b422016-08-30 14:01:10 -070010338 best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010339 }
10340
10341 // Did this mode help.. i.e. is it the new best mode
10342 if (this_rd < best_rd || x->skip) {
10343 if (!mode_excluded) {
10344 // Note index of best mode so far
10345 best_mode_index = mode_index;
10346
10347 if (ref_frame == INTRA_FRAME) {
10348 /* required for left and above block mv */
10349 mbmi->mv[0].as_int = 0;
10350 } else {
10351 best_pred_sse = x->pred_sse[ref_frame];
10352 }
10353
10354 rd_cost->rate = rate2;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010355 rd_cost->dist = distortion2;
10356 rd_cost->rdcost = this_rd;
10357 best_rd = this_rd;
10358 best_mbmode = *mbmi;
10359 best_skip2 = this_skip2;
10360 best_mode_skippable = skippable;
Zoe Liu1eed2df2017-10-16 17:13:15 -070010361 best_rate_y =
10362 rate_y +
10363 x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010364 best_rate_uv = rate_uv;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010365 for (i = 0; i < MAX_MB_PLANE; ++i)
10366 memcpy(ctx->blk_skip[i], x->blk_skip[i],
10367 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010368 }
10369 }
Yushin Cho8e75e8b2017-09-12 16:33:28 -070010370
Yaowu Xuc27fc142016-08-22 16:08:15 -070010371 /* keep record of best compound/single-only prediction */
10372 if (!disable_skip && ref_frame != INTRA_FRAME) {
10373 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
10374
10375 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
10376 single_rate = rate2 - compmode_cost;
10377 hybrid_rate = rate2;
10378 } else {
10379 single_rate = rate2;
10380 hybrid_rate = rate2 + compmode_cost;
10381 }
10382
Urvang Joshi70006e42017-06-14 16:08:55 -070010383 single_rd = RDCOST(x->rdmult, single_rate, distortion2);
10384 hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010385
10386 if (!comp_pred) {
10387 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
10388 best_pred_rd[SINGLE_REFERENCE] = single_rd;
10389 } else {
10390 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
10391 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
10392 }
10393 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
10394 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
10395 }
10396
Yaowu Xuc27fc142016-08-22 16:08:15 -070010397 if (x->skip && !comp_pred) break;
10398 }
10399
10400 if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
10401 ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
10402 is_inter_mode(best_mbmode.mode)) ||
10403 (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
10404 !is_inter_mode(best_mbmode.mode)))) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010405 int skip_blk = 0;
10406 RD_STATS rd_stats_y, rd_stats_uv;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010407
10408 x->use_default_inter_tx_type = 0;
10409 x->use_default_intra_tx_type = 0;
10410
10411 *mbmi = best_mbmode;
10412
10413 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
10414
10415 // Select prediction reference frames.
10416 for (i = 0; i < MAX_MB_PLANE; i++) {
10417 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
10418 if (has_second_ref(mbmi))
10419 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
10420 }
10421
10422 if (is_inter_mode(mbmi->mode)) {
Jingning Hanc44009c2017-05-06 11:36:49 -070010423 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
Sarah Parker19234cc2017-03-10 16:43:25 -080010424 if (mbmi->motion_mode == OBMC_CAUSAL) {
Fergus Simpson073c6f32017-02-17 12:13:48 -080010425 av1_build_obmc_inter_prediction(
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010426 cm, xd, mi_row, mi_col, args.above_pred_buf, args.above_pred_stride,
10427 args.left_pred_buf, args.left_pred_stride);
Sarah Parker19234cc2017-03-10 16:43:25 -080010428 }
Yaowu Xuf883b422016-08-30 14:01:10 -070010429 av1_subtract_plane(x, bsize, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010430 if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee51666862017-10-24 14:29:13 -070010431 // av1_rd_pick_inter_mode_sb
Yue Chen25dc0702017-10-18 23:36:06 -070010432 select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col,
10433 INT64_MAX);
Hui Su1ddf2312017-08-19 15:21:34 -070010434 assert(rd_stats_y.rate != INT_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010435 } else {
10436 int idx, idy;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010437 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010438 for (idy = 0; idy < xd->n8_h; ++idy)
10439 for (idx = 0; idx < xd->n8_w; ++idx)
10440 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010441 memset(x->blk_skip[0], rd_stats_y.skip,
Yaowu Xuc27fc142016-08-22 16:08:15 -070010442 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
10443 }
10444
Debargha Mukherjee51666862017-10-24 14:29:13 -070010445 inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010446 } else {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010447 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
Angie Chiang284d7772016-11-08 11:06:45 -080010448 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010449 }
10450
Urvang Joshi70006e42017-06-14 16:08:55 -070010451 if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010452 (rd_stats_y.dist + rd_stats_uv.dist)) >
Urvang Joshi70006e42017-06-14 16:08:55 -070010453 RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010454 skip_blk = 1;
Zoe Liu1eed2df2017-10-16 17:13:15 -070010455 rd_stats_y.rate = x->skip_cost[av1_get_skip_context(xd)][1];
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010456 rd_stats_uv.rate = 0;
10457 rd_stats_y.dist = rd_stats_y.sse;
10458 rd_stats_uv.dist = rd_stats_uv.sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010459 } else {
10460 skip_blk = 0;
Zoe Liu1eed2df2017-10-16 17:13:15 -070010461 rd_stats_y.rate += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010462 }
10463
Urvang Joshi70006e42017-06-14 16:08:55 -070010464 if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
10465 RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010466 (rd_stats_y.dist + rd_stats_uv.dist))) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010467 int idx, idy;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010468 best_mbmode.tx_type = mbmi->tx_type;
10469 best_mbmode.tx_size = mbmi->tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010470 for (idy = 0; idy < xd->n8_h; ++idy)
10471 for (idx = 0; idx < xd->n8_w; ++idx)
10472 best_mbmode.inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
10473
10474 for (i = 0; i < MAX_MB_PLANE; ++i)
10475 memcpy(ctx->blk_skip[i], x->blk_skip[i],
10476 sizeof(uint8_t) * ctx->num_4x4_blk);
Jingning Hane67b38a2016-11-04 10:30:00 -070010477
10478 best_mbmode.min_tx_size = mbmi->min_tx_size;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010479 rd_cost->rate +=
10480 (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
10481 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
Urvang Joshi70006e42017-06-14 16:08:55 -070010482 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010483 best_skip2 = skip_blk;
10484 }
10485 }
10486
10487 // Only try palette mode when the best mode so far is an intra mode.
hui su9bc1d8d2017-03-24 12:36:03 -070010488 if (try_palette && !is_inter_mode(best_mbmode.mode)) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010489 int rate2 = 0;
Urvang Joshi451e0f22017-01-31 11:18:31 -080010490 int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd,
10491 best_model_rd_palette = INT64_MAX;
Urvang Joshi626591d2016-10-24 14:13:55 -070010492 int skippable = 0, rate_overhead_palette = 0;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010493 RD_STATS rd_stats_y;
hui sude0c70a2017-01-09 17:12:17 -080010494 TX_SIZE uv_tx;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010495 uint8_t *const best_palette_color_map =
10496 x->palette_buffer->best_palette_color_map;
10497 uint8_t *const color_map = xd->plane[0].color_index_map;
Hui Suefb755c2017-10-26 16:09:05 -070010498 MB_MODE_INFO best_mbmi_palette = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010499
10500 mbmi->mode = DC_PRED;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -040010501 mbmi->uv_mode = UV_DC_PRED;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010502 mbmi->ref_frame[0] = INTRA_FRAME;
Emil Keyder01770b32017-01-20 18:03:11 -050010503 mbmi->ref_frame[1] = NONE_FRAME;
Urvang Joshi626591d2016-10-24 14:13:55 -070010504 rate_overhead_palette = rd_pick_palette_intra_sby(
Urvang Joshi451e0f22017-01-31 11:18:31 -080010505 cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED],
10506 &best_mbmi_palette, best_palette_color_map, &best_rd_palette,
10507 &best_model_rd_palette, NULL, NULL, NULL, NULL);
hui sude0c70a2017-01-09 17:12:17 -080010508 if (pmi->palette_size[0] == 0) goto PALETTE_EXIT;
10509 memcpy(color_map, best_palette_color_map,
10510 rows * cols * sizeof(best_palette_color_map[0]));
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010511 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
10512 if (rd_stats_y.rate == INT_MAX) goto PALETTE_EXIT;
Debargha Mukherjee2f123402016-08-30 17:43:38 -070010513 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
10514 [xd->plane[1].subsampling_y];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010515 if (rate_uv_intra[uv_tx] == INT_MAX) {
Luc Trudeau9d4cbb82017-07-27 17:01:32 -040010516 choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
Urvang Joshi368fbc92016-10-17 16:31:34 -070010517 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
10518 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010519 pmi_uv[uv_tx] = *pmi;
10520#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -070010521 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
10522#endif // CONFIG_EXT_INTRA
10523 }
10524 mbmi->uv_mode = mode_uv[uv_tx];
10525 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
hui sude0c70a2017-01-09 17:12:17 -080010526 if (pmi->palette_size[1] > 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010527 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
10528 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
10529 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
hui sude0c70a2017-01-09 17:12:17 -080010530 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010531#if CONFIG_EXT_INTRA
10532 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010533#endif // CONFIG_EXT_INTRA
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010534 skippable = rd_stats_y.skip && skip_uvs[uv_tx];
10535 distortion2 = rd_stats_y.dist + dist_uvs[uv_tx];
10536 rate2 = rd_stats_y.rate + rate_overhead_palette + rate_uv_intra[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010537 rate2 += ref_costs_single[INTRA_FRAME];
10538
10539 if (skippable) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010540 rate2 -= (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
Zoe Liu1eed2df2017-10-16 17:13:15 -070010541 rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010542 } else {
Zoe Liu1eed2df2017-10-16 17:13:15 -070010543 rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010544 }
Urvang Joshi70006e42017-06-14 16:08:55 -070010545 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010546 if (this_rd < best_rd) {
10547 best_mode_index = 3;
10548 mbmi->mv[0].as_int = 0;
10549 rd_cost->rate = rate2;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010550 rd_cost->dist = distortion2;
10551 rd_cost->rdcost = this_rd;
10552 best_rd = this_rd;
10553 best_mbmode = *mbmi;
10554 best_skip2 = 0;
10555 best_mode_skippable = skippable;
10556 }
10557 }
10558PALETTE_EXIT:
Zoe Liuf40a9572017-10-13 12:37:19 -070010559
10560#if CONFIG_EXT_SKIP
Zoe Liu8a5d3432017-11-30 16:33:44 -080010561 best_mbmode.skip_mode = 0;
10562 if (cm->skip_mode_flag &&
Zoe Liuf40a9572017-10-13 12:37:19 -070010563 !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
10564 is_comp_ref_allowed(bsize)) {
Zoe Liu8a5d3432017-11-30 16:33:44 -080010565 // Obtain the rdcost for skip_mode.
Zoe Liuf40a9572017-10-13 12:37:19 -070010566 estimate_skip_mode_rdcost(cpi, tile_data, x, bsize, mi_row, mi_col,
10567 frame_mv, yv12_mb);
Zoe Liuf40a9572017-10-13 12:37:19 -070010568
Zoe Liu8a5d3432017-11-30 16:33:44 -080010569 if (x->skip_mode_rdcost >= 0 && x->skip_mode_rdcost < INT64_MAX) {
10570 // Update skip mode rdcost.
10571 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
10572 x->skip_mode_rate += x->skip_mode_cost[skip_mode_ctx][1];
10573 x->skip_mode_rdcost =
10574 RDCOST(x->rdmult, x->skip_mode_rate, x->skip_mode_dist);
Zoe Liuf40a9572017-10-13 12:37:19 -070010575
Zoe Liu8a5d3432017-11-30 16:33:44 -080010576 // Compare the use of skip_mode with the best intra/inter mode obtained.
10577 const int64_t best_intra_inter_mode_cost =
10578 RDCOST(x->rdmult, rd_cost->rate + x->skip_mode_cost[skip_mode_ctx][0],
10579 rd_cost->dist);
10580
Zoe Liu50c810d2017-12-05 16:09:37 -080010581 if (x->skip_mode_rdcost <= best_intra_inter_mode_cost)
Zoe Liu8a5d3432017-11-30 16:33:44 -080010582 best_mbmode.skip_mode = 1;
10583 }
10584
10585 if (best_mbmode.skip_mode) {
Zoe Liuf40a9572017-10-13 12:37:19 -070010586 best_mbmode = *mbmi;
10587
10588 best_mbmode.skip_mode = best_mbmode.skip = 1;
10589 best_mbmode.mode = NEAREST_NEARESTMV;
10590 best_mbmode.ref_frame[0] = x->skip_mode_ref_frame[0];
10591 best_mbmode.ref_frame[1] = x->skip_mode_ref_frame[1];
10592 best_mbmode.mv[0].as_int = x->skip_mode_mv[0].as_int;
10593 best_mbmode.mv[1].as_int = x->skip_mode_mv[1].as_int;
Zoe Liu8a5d3432017-11-30 16:33:44 -080010594 best_mbmode.ref_mv_idx = 0;
Zoe Liuf40a9572017-10-13 12:37:19 -070010595
10596 // Set up tx_size related variables for skip-specific loop filtering.
10597 best_mbmode.tx_size = block_signals_txsize(bsize)
10598 ? tx_size_from_tx_mode(bsize, cm->tx_mode, 1)
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -080010599 : max_txsize_rect_lookup[1][bsize];
Zoe Liuf40a9572017-10-13 12:37:19 -070010600 {
10601 const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
10602 const int height = block_size_high[bsize] >> tx_size_high_log2[0];
10603 for (int idy = 0; idy < height; ++idy)
10604 for (int idx = 0; idx < width; ++idx)
10605 best_mbmode.inter_tx_size[idy >> 1][idx >> 1] = best_mbmode.tx_size;
10606 }
10607 best_mbmode.min_tx_size = get_min_tx_size(best_mbmode.tx_size);
10608 set_txfm_ctxs(best_mbmode.tx_size, xd->n8_w, xd->n8_h, best_mbmode.skip,
10609 xd);
10610
10611 // Set up color-related variables for skip mode.
10612 best_mbmode.uv_mode = UV_DC_PRED;
10613 best_mbmode.palette_mode_info.palette_size[0] = 0;
10614 best_mbmode.palette_mode_info.palette_size[1] = 0;
Zoe Liuf40a9572017-10-13 12:37:19 -070010615 best_mbmode.interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
10616 best_mbmode.interinter_compound_type = COMPOUND_AVERAGE;
10617 best_mbmode.motion_mode = SIMPLE_TRANSLATION;
Zoe Liuf40a9572017-10-13 12:37:19 -070010618#if CONFIG_FILTER_INTRA
10619 best_mbmode.filter_intra_mode_info.use_filter_intra_mode[0] = 0;
10620 best_mbmode.filter_intra_mode_info.use_filter_intra_mode[1] = 0;
10621#endif // CONFIG_FILTER_INTRA
10622
Zoe Liuf40a9572017-10-13 12:37:19 -070010623 set_default_interp_filters(&best_mbmode, cm->interp_filter);
10624
Zoe Liu8a5d3432017-11-30 16:33:44 -080010625 best_mode_index = x->skip_mode_index;
10626
Zoe Liuf40a9572017-10-13 12:37:19 -070010627 // Update rd_cost
Zoe Liu8a5d3432017-11-30 16:33:44 -080010628 rd_cost->rate = x->skip_mode_rate;
Zoe Liuf40a9572017-10-13 12:37:19 -070010629 rd_cost->dist = rd_cost->sse = x->skip_mode_dist;
10630 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
Zoe Liu8a5d3432017-11-30 16:33:44 -080010631
Zoe Liuf40a9572017-10-13 12:37:19 -070010632 best_rd = rd_cost->rdcost;
Zoe Liu8a5d3432017-11-30 16:33:44 -080010633 best_skip2 = 1;
10634 best_mode_skippable = (x->skip_mode_sse == 0);
Zoe Liuf40a9572017-10-13 12:37:19 -070010635
10636 x->skip = 1;
10637#if 0
Zoe Liu8a5d3432017-11-30 16:33:44 -080010638 // TODO(zoeliu): To investigate why following cause performance drop.
10639 for (i = 0; i < MAX_MB_PLANE; ++i) {
10640 memset(x->blk_skip[i], x->skip, sizeof(uint8_t) * ctx->num_4x4_blk);
10641 memcpy(ctx->blk_skip[i], x->blk_skip[i],
Zoe Liuf40a9572017-10-13 12:37:19 -070010642 sizeof(uint8_t) * ctx->num_4x4_blk);
Zoe Liu8a5d3432017-11-30 16:33:44 -080010643 }
Zoe Liuf40a9572017-10-13 12:37:19 -070010644#endif // 0
10645 }
10646 }
10647#endif // CONFIG_EXT_SKIP
10648
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010649 // The inter modes' rate costs are not calculated precisely in some cases.
10650 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
10651 // GLOBALMV. Here, checks are added for those cases, and the mode decisions
10652 // are corrected.
Yunqing Wang876a8b02017-11-13 17:13:27 -080010653 if ((best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV)
10654#if CONFIG_EXT_WARPED_MOTION
10655 && best_mbmode.motion_mode != WARPED_CAUSAL
10656#endif // CONFIG_EXT_WARPED_MOTION
10657 ) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010658 const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
10659 best_mbmode.ref_frame[1] };
10660 int comp_pred_mode = refs[1] > INTRA_FRAME;
Sarah Parkere5299862016-08-16 14:57:37 -070010661 int_mv zeromv[2];
Yaowu Xuf883b422016-08-30 14:01:10 -070010662 const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
Debargha Mukherjeefebb59c2017-03-02 12:23:45 -080010663 zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
10664 cm->allow_high_precision_mv, bsize,
RogerZhou3b635242017-09-19 10:06:46 -070010665 mi_col, mi_row, 0
10666#if CONFIG_AMVR
10667 ,
RogerZhou10a03802017-10-26 11:49:48 -070010668 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010669#endif
10670 )
Debargha Mukherjeefebb59c2017-03-02 12:23:45 -080010671 .as_int;
RogerZhou10a03802017-10-26 11:49:48 -070010672 zeromv[1].as_int = comp_pred_mode
10673 ? gm_get_motion_vector(&cm->global_motion[refs[1]],
10674 cm->allow_high_precision_mv,
10675 bsize, mi_col, mi_row, 0
RogerZhou3b635242017-09-19 10:06:46 -070010676#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -070010677 ,
10678 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010679#endif
RogerZhou10a03802017-10-26 11:49:48 -070010680 )
10681 .as_int
10682 : 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010683 if (!comp_pred_mode) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010684 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
Yaowu Xuf883b422016-08-30 14:01:10 -070010685 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
Yaowu Xuc27fc142016-08-22 16:08:15 -070010686 : INT_MAX;
10687
10688 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10689 int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10690 if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
10691 best_mbmode.mode = NEARMV;
10692 best_mbmode.ref_mv_idx = i;
10693 }
10694 }
10695
10696 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
10697 best_mbmode.mode = NEARESTMV;
Sarah Parkere5299862016-08-16 14:57:37 -070010698 else if (best_mbmode.mv[0].as_int == zeromv[0].as_int)
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010699 best_mbmode.mode = GLOBALMV;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010700 } else {
10701 int_mv nearestmv[2];
10702 int_mv nearmv[2];
10703
Yaowu Xuc27fc142016-08-22 16:08:15 -070010704 if (mbmi_ext->ref_mv_count[rf_type] > 1) {
10705 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
10706 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
10707 } else {
10708 nearmv[0] = frame_mv[NEARMV][refs[0]];
10709 nearmv[1] = frame_mv[NEARMV][refs[1]];
10710 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010711 if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
10712 nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
10713 nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
10714 } else {
10715 nearestmv[0] = frame_mv[NEARESTMV][refs[0]];
10716 nearestmv[1] = frame_mv[NEARESTMV][refs[1]];
10717 }
10718
10719 if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +020010720 nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010721 best_mbmode.mode = NEAREST_NEARESTMV;
David Barker404b2e82017-03-27 13:07:47 +010010722 } else {
10723 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
10724 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
10725 : INT_MAX;
10726
10727 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10728 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10729 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
10730
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -070010731 // Try switching to the NEAR_NEARMV mode
10732 if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
David Barker404b2e82017-03-27 13:07:47 +010010733 nearmv[1].as_int == best_mbmode.mv[1].as_int) {
10734 best_mbmode.mode = NEAR_NEARMV;
10735 best_mbmode.ref_mv_idx = i;
10736 }
10737 }
10738
David Barker3dfba992017-04-03 16:10:09 +010010739 if (best_mbmode.mode == NEW_NEWMV &&
David Barker404b2e82017-03-27 13:07:47 +010010740 best_mbmode.mv[0].as_int == zeromv[0].as_int &&
10741 best_mbmode.mv[1].as_int == zeromv[1].as_int)
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010742 best_mbmode.mode = GLOBAL_GLOBALMV;
David Barker404b2e82017-03-27 13:07:47 +010010743 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010744 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010745 }
10746
David Barker9620bcd2017-03-22 14:46:42 +000010747 // Make sure that the ref_mv_idx is only nonzero when we're
10748 // using a mode which can support ref_mv_idx
10749 if (best_mbmode.ref_mv_idx != 0 &&
David Barker3dfba992017-04-03 16:10:09 +010010750 !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010751 have_nearmv_in_inter_mode(best_mbmode.mode))) {
David Barker9620bcd2017-03-22 14:46:42 +000010752 best_mbmode.ref_mv_idx = 0;
10753 }
10754
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010755 if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
Yunqing Wang876a8b02017-11-13 17:13:27 -080010756 best_mbmode.ref_frame[1] <= INTRA_FRAME
10757#if CONFIG_EXT_WARPED_MOTION
10758 && best_mbmode.motion_mode != WARPED_CAUSAL
10759#endif // CONFIG_EXT_WARPED_MOTION
Zoe Liuf40a9572017-10-13 12:37:19 -070010760#if CONFIG_EXT_SKIP
10761 && !best_mbmode.skip_mode
10762#endif // CONFIG_EXT_SKIP
Yunqing Wang876a8b02017-11-13 17:13:27 -080010763 ) {
Jingning Han731af492016-11-17 11:53:23 -080010764 int8_t ref_frame_type = av1_ref_frame_type(best_mbmode.ref_frame);
10765 int16_t mode_ctx = mbmi_ext->mode_context[ref_frame_type];
David Barker68e6e862016-11-24 15:10:15 +000010766 if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010767 int_mv zeromv;
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010768 const MV_REFERENCE_FRAME ref = best_mbmode.ref_frame[0];
10769 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ref],
10770 cm->allow_high_precision_mv, bsize,
10771 mi_col, mi_row, 0
RogerZhou3b635242017-09-19 10:06:46 -070010772#if CONFIG_AMVR
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010773 ,
RogerZhou10a03802017-10-26 11:49:48 -070010774 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010775#endif
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010776 )
10777 .as_int;
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010778 if (best_mbmode.mv[0].as_int == zeromv.as_int) {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010779 best_mbmode.mode = GLOBALMV;
David Barkercdcac6d2016-12-01 17:04:16 +000010780 }
David Barker68e6e862016-11-24 15:10:15 +000010781 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010782 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010783
10784 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
10785 rd_cost->rate = INT_MAX;
10786 rd_cost->rdcost = INT64_MAX;
10787 return;
10788 }
10789
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010790 assert((cm->interp_filter == SWITCHABLE) ||
10791 (cm->interp_filter ==
10792 av1_extract_interp_filter(best_mbmode.interp_filters, 0)) ||
10793 !is_inter_block(&best_mbmode));
Yaowu Xuc27fc142016-08-22 16:08:15 -070010794#if CONFIG_DUAL_FILTER
10795 assert((cm->interp_filter == SWITCHABLE) ||
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010796 (cm->interp_filter ==
10797 av1_extract_interp_filter(best_mbmode.interp_filters, 1)) ||
Yaowu Xuc27fc142016-08-22 16:08:15 -070010798 !is_inter_block(&best_mbmode));
Fergus Simpson4063a682017-02-28 16:52:22 -080010799#endif // CONFIG_DUAL_FILTER
Yaowu Xuc27fc142016-08-22 16:08:15 -070010800
10801 if (!cpi->rc.is_src_frame_alt_ref)
Yaowu Xuf883b422016-08-30 14:01:10 -070010802 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
10803 sf->adaptive_rd_thresh, bsize, best_mode_index);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010804
10805 // macroblock modes
10806 *mbmi = best_mbmode;
10807 x->skip |= best_skip2;
10808
Sebastien Alaiwan48795802017-10-30 12:07:13 +010010809 // Note: this section is needed since the mode may have been forced to
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010810 // GLOBALMV by the all-zero mode handling of ref-mv.
10811 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
10812 // Correct the motion mode for GLOBALMV
Sarah Parker0eea89f2017-07-11 11:56:36 -070010813 const MOTION_MODE last_motion_mode_allowed =
Sebastien Alaiwan1f56b8e2017-10-31 17:37:16 +010010814 motion_mode_allowed(0, xd->global_motion, xd, xd->mi[0]);
Sarah Parker19234cc2017-03-10 16:43:25 -080010815 if (mbmi->motion_mode > last_motion_mode_allowed)
10816 mbmi->motion_mode = last_motion_mode_allowed;
Sarah Parker19234cc2017-03-10 16:43:25 -080010817
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010818 // Correct the interpolation filter for GLOBALMV
Yue Chen19e7aa82016-11-30 14:05:39 -080010819 if (is_nontrans_global_motion(xd)) {
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010820 mbmi->interp_filters = av1_broadcast_interp_filter(
10821 av1_unswitchable_filter(cm->interp_filter));
Yue Chen19e7aa82016-11-30 14:05:39 -080010822 }
10823 }
Yue Chen19e7aa82016-11-30 14:05:39 -080010824
Yaowu Xuc27fc142016-08-22 16:08:15 -070010825 for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
10826 if (mbmi->mode != NEWMV)
10827 mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
10828 else
10829 mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
10830 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010831
10832 for (i = 0; i < REFERENCE_MODES; ++i) {
10833 if (best_pred_rd[i] == INT64_MAX)
10834 best_pred_diff[i] = INT_MIN;
10835 else
10836 best_pred_diff[i] = best_rd - best_pred_rd[i];
10837 }
10838
10839 x->skip |= best_mode_skippable;
10840
10841 assert(best_mode_index >= 0);
10842
10843 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
10844 best_mode_skippable);
10845
Urvang Joshic9e71d42017-08-09 18:58:33 -070010846 if (pmi->palette_size[1] > 0) {
10847 assert(try_palette);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010848 restore_uv_color_map(cpi, x);
10849 }
10850}
10851
Urvang Joshi52648442016-10-13 17:27:51 -070010852void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
10853 TileDataEnc *tile_data, MACROBLOCK *x,
David Barker45390c12017-02-20 14:44:40 +000010854 int mi_row, int mi_col,
Angie Chiang2a2a7dd2017-04-25 16:08:47 -070010855 RD_STATS *rd_cost, BLOCK_SIZE bsize,
Yaowu Xuf883b422016-08-30 14:01:10 -070010856 PICK_MODE_CONTEXT *ctx,
10857 int64_t best_rd_so_far) {
Urvang Joshi52648442016-10-13 17:27:51 -070010858 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010859 MACROBLOCKD *const xd = &x->e_mbd;
10860 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10861 unsigned char segment_id = mbmi->segment_id;
10862 const int comp_pred = 0;
10863 int i;
10864 int64_t best_pred_diff[REFERENCE_MODES];
10865 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -070010866#if CONFIG_EXT_COMP_REFS
10867 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
10868#else
Yaowu Xuc27fc142016-08-22 16:08:15 -070010869 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -070010870#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuf883b422016-08-30 14:01:10 -070010871 aom_prob comp_mode_p;
James Zern7b9407a2016-05-18 23:48:05 -070010872 InterpFilter best_filter = SWITCHABLE;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010873 int64_t this_rd = INT64_MAX;
10874 int rate2 = 0;
10875 const int64_t distortion2 = 0;
David Barker45390c12017-02-20 14:44:40 +000010876 (void)mi_row;
10877 (void)mi_col;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010878
Yue Chen170678a2017-10-17 13:43:10 -070010879 estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
10880 ref_costs_comp, &comp_mode_p);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010881
10882 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
10883 for (i = LAST_FRAME; i < TOTAL_REFS_PER_FRAME; ++i)
10884 x->pred_mv_sad[i] = INT_MAX;
10885
10886 rd_cost->rate = INT_MAX;
10887
10888 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
10889
10890 mbmi->palette_mode_info.palette_size[0] = 0;
10891 mbmi->palette_mode_info.palette_size[1] = 0;
Urvang Joshib100db72016-10-12 16:28:56 -070010892
hui su5db97432016-10-14 16:10:14 -070010893#if CONFIG_FILTER_INTRA
10894 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
10895 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
10896#endif // CONFIG_FILTER_INTRA
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010897 mbmi->mode = GLOBALMV;
Yue Chencb60b182016-10-13 15:18:22 -070010898 mbmi->motion_mode = SIMPLE_TRANSLATION;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -040010899 mbmi->uv_mode = UV_DC_PRED;
David Barkerd92f3562017-10-09 17:46:23 +010010900 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
10901 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
10902 else
10903 mbmi->ref_frame[0] = LAST_FRAME;
Emil Keyder01770b32017-01-20 18:03:11 -050010904 mbmi->ref_frame[1] = NONE_FRAME;
Sarah Parkere5299862016-08-16 14:57:37 -070010905 mbmi->mv[0].as_int =
Sarah Parkerae7c4582017-02-28 16:30:30 -080010906 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
RogerZhou3b635242017-09-19 10:06:46 -070010907 cm->allow_high_precision_mv, bsize, mi_col, mi_row, 0
10908#if CONFIG_AMVR
10909 ,
RogerZhou10a03802017-10-26 11:49:48 -070010910 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010911#endif
10912 )
David Barkercdcac6d2016-12-01 17:04:16 +000010913 .as_int;
Jingning Han64088952016-07-11 11:24:24 -070010914 mbmi->tx_size = max_txsize_lookup[bsize];
Yaowu Xuee775b12016-10-18 10:00:21 -070010915 x->skip = 1;
Sarah Parkere5299862016-08-16 14:57:37 -070010916
Yaowu Xuc27fc142016-08-22 16:08:15 -070010917 mbmi->ref_mv_idx = 0;
10918 mbmi->pred_mv[0].as_int = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010919
Yue Chendead17d2017-04-20 16:19:39 -070010920 mbmi->motion_mode = SIMPLE_TRANSLATION;
Yue Chendead17d2017-04-20 16:19:39 -070010921 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
Yue Chenf3e1ead2017-04-21 14:05:51 -070010922 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
10923 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
Yunqing Wang97d6a372017-10-09 14:15:15 -070010924#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang876a8b02017-11-13 17:13:27 -080010925 int pts_mv[SAMPLES_ARRAY_SIZE], pts_wm[SAMPLES_ARRAY_SIZE];
Yunqing Wang1bc82862017-06-28 15:49:48 -070010926 mbmi->num_proj_ref[0] =
Yunqing Wang876a8b02017-11-13 17:13:27 -080010927 findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv, pts_wm);
Yunqing Wang1bc82862017-06-28 15:49:48 -070010928 // Rank the samples by motion vector difference
10929 if (mbmi->num_proj_ref[0] > 1)
10930 mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
10931 pts_inref, mbmi->num_proj_ref[0]);
10932#else
Yue Chenf3e1ead2017-04-21 14:05:51 -070010933 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
Yunqing Wang97d6a372017-10-09 14:15:15 -070010934#endif // CONFIG_EXT_WARPED_MOTION
Yue Chenf3e1ead2017-04-21 14:05:51 -070010935 }
Yue Chendead17d2017-04-20 16:19:39 -070010936
Debargha Mukherjee0df711f2017-05-02 16:00:20 -070010937 set_default_interp_filters(mbmi, cm->interp_filter);
10938
10939 if (cm->interp_filter != SWITCHABLE) {
10940 best_filter = cm->interp_filter;
10941 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010942 best_filter = EIGHTTAP_REGULAR;
Debargha Mukherjee0df711f2017-05-02 16:00:20 -070010943 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) &&
Yaowu Xuc27fc142016-08-22 16:08:15 -070010944 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
10945 int rs;
10946 int best_rs = INT_MAX;
10947 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010948 mbmi->interp_filters = av1_broadcast_interp_filter(i);
Yue Chenb23d00a2017-07-28 17:01:21 -070010949 rs = av1_get_switchable_rate(cm, x, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010950 if (rs < best_rs) {
10951 best_rs = rs;
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010952 best_filter = av1_extract_interp_filter(mbmi->interp_filters, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010953 }
10954 }
10955 }
10956 }
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010957 // Set the appropriate filter
10958 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
Yue Chenb23d00a2017-07-28 17:01:21 -070010959 rate2 += av1_get_switchable_rate(cm, x, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010960
10961 if (cm->reference_mode == REFERENCE_MODE_SELECT)
Yaowu Xuf883b422016-08-30 14:01:10 -070010962 rate2 += av1_cost_bit(comp_mode_p, comp_pred);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010963
10964 // Estimate the reference frame signaling cost and add it
10965 // to the rolling cost variable.
10966 rate2 += ref_costs_single[LAST_FRAME];
Urvang Joshi70006e42017-06-14 16:08:55 -070010967 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010968
10969 rd_cost->rate = rate2;
10970 rd_cost->dist = distortion2;
10971 rd_cost->rdcost = this_rd;
Yushin Cho8e75e8b2017-09-12 16:33:28 -070010972
Yaowu Xuc27fc142016-08-22 16:08:15 -070010973 if (this_rd >= best_rd_so_far) {
10974 rd_cost->rate = INT_MAX;
10975 rd_cost->rdcost = INT64_MAX;
10976 return;
10977 }
10978
Yaowu Xuc27fc142016-08-22 16:08:15 -070010979 assert((cm->interp_filter == SWITCHABLE) ||
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010980 (cm->interp_filter ==
10981 av1_extract_interp_filter(mbmi->interp_filters, 0)));
Yaowu Xuc27fc142016-08-22 16:08:15 -070010982
Yaowu Xuf883b422016-08-30 14:01:10 -070010983 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010984 cpi->sf.adaptive_rd_thresh, bsize, THR_GLOBALMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010985
Yaowu Xuf883b422016-08-30 14:01:10 -070010986 av1_zero(best_pred_diff);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010987
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010988 store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010989}
10990
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010991struct calc_target_weighted_pred_ctxt {
10992 const MACROBLOCK *x;
10993 const uint8_t *tmp;
10994 int tmp_stride;
10995 int overlap;
10996};
10997
10998static INLINE void calc_target_weighted_pred_above(MACROBLOCKD *xd,
10999 int rel_mi_col,
11000 uint8_t nb_mi_width,
11001 MODE_INFO *nb_mi,
11002 void *fun_ctxt) {
11003 (void)nb_mi;
11004
11005 struct calc_target_weighted_pred_ctxt *ctxt =
11006 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
11007
11008#if CONFIG_HIGHBITDEPTH
11009 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
11010#else
11011 const int is_hbd = 0;
11012#endif // CONFIG_HIGHBITDEPTH
11013
11014 const int bw = xd->n8_w << MI_SIZE_LOG2;
11015 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
11016
11017 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE);
11018 int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE);
11019 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
11020
11021 if (!is_hbd) {
11022 for (int row = 0; row < ctxt->overlap; ++row) {
11023 const uint8_t m0 = mask1d[row];
11024 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
11025 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
11026 wsrc[col] = m1 * tmp[col];
11027 mask[col] = m0;
11028 }
11029 wsrc += bw;
11030 mask += bw;
11031 tmp += ctxt->tmp_stride;
11032 }
11033#if CONFIG_HIGHBITDEPTH
11034 } else {
11035 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
11036
11037 for (int row = 0; row < ctxt->overlap; ++row) {
11038 const uint8_t m0 = mask1d[row];
11039 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
11040 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
11041 wsrc[col] = m1 * tmp16[col];
11042 mask[col] = m0;
11043 }
11044 wsrc += bw;
11045 mask += bw;
11046 tmp16 += ctxt->tmp_stride;
11047 }
11048#endif // CONFIG_HIGHBITDEPTH
11049 }
11050}
11051
11052static INLINE void calc_target_weighted_pred_left(MACROBLOCKD *xd,
11053 int rel_mi_row,
11054 uint8_t nb_mi_height,
11055 MODE_INFO *nb_mi,
11056 void *fun_ctxt) {
11057 (void)nb_mi;
11058
11059 struct calc_target_weighted_pred_ctxt *ctxt =
11060 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
11061
11062#if CONFIG_HIGHBITDEPTH
11063 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
11064#else
11065 const int is_hbd = 0;
11066#endif // CONFIG_HIGHBITDEPTH
11067
11068 const int bw = xd->n8_w << MI_SIZE_LOG2;
11069 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
11070
11071 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
11072 int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw);
11073 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
11074
11075 if (!is_hbd) {
11076 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
11077 for (int col = 0; col < ctxt->overlap; ++col) {
11078 const uint8_t m0 = mask1d[col];
11079 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
11080 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
11081 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
11082 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
11083 }
11084 wsrc += bw;
11085 mask += bw;
11086 tmp += ctxt->tmp_stride;
11087 }
11088#if CONFIG_HIGHBITDEPTH
11089 } else {
11090 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
11091
11092 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
11093 for (int col = 0; col < ctxt->overlap; ++col) {
11094 const uint8_t m0 = mask1d[col];
11095 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
11096 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
11097 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
11098 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
11099 }
11100 wsrc += bw;
11101 mask += bw;
11102 tmp16 += ctxt->tmp_stride;
11103 }
11104#endif // CONFIG_HIGHBITDEPTH
11105 }
11106}
11107
Yaowu Xuf883b422016-08-30 14:01:10 -070011108// This function has a structure similar to av1_build_obmc_inter_prediction
Yaowu Xuc27fc142016-08-22 16:08:15 -070011109//
11110// The OBMC predictor is computed as:
11111//
11112// PObmc(x,y) =
Yaowu Xuf883b422016-08-30 14:01:10 -070011113// AOM_BLEND_A64(Mh(x),
11114// AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
Yaowu Xuc27fc142016-08-22 16:08:15 -070011115// PLeft(x, y))
11116//
Yaowu Xuf883b422016-08-30 14:01:10 -070011117// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
Yaowu Xuc27fc142016-08-22 16:08:15 -070011118// rounding, this can be written as:
11119//
Yaowu Xuf883b422016-08-30 14:01:10 -070011120// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
Yaowu Xuc27fc142016-08-22 16:08:15 -070011121// Mh(x) * Mv(y) * P(x,y) +
11122// Mh(x) * Cv(y) * Pabove(x,y) +
Yaowu Xuf883b422016-08-30 14:01:10 -070011123// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011124//
11125// Where :
11126//
Yaowu Xuf883b422016-08-30 14:01:10 -070011127// Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
11128// Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011129//
11130// This function computes 'wsrc' and 'mask' as:
11131//
11132// wsrc(x, y) =
Yaowu Xuf883b422016-08-30 14:01:10 -070011133// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
Yaowu Xuc27fc142016-08-22 16:08:15 -070011134// Mh(x) * Cv(y) * Pabove(x,y) +
Yaowu Xuf883b422016-08-30 14:01:10 -070011135// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011136//
11137// mask(x, y) = Mh(x) * Mv(y)
11138//
11139// These can then be used to efficiently approximate the error for any
11140// predictor P in the context of the provided neighbouring predictors by
11141// computing:
11142//
11143// error(x, y) =
Yaowu Xuf883b422016-08-30 14:01:10 -070011144// wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011145//
Yaowu Xuf883b422016-08-30 14:01:10 -070011146static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
Yaowu Xuc27fc142016-08-22 16:08:15 -070011147 const MACROBLOCKD *xd, int mi_row,
11148 int mi_col, const uint8_t *above,
11149 int above_stride, const uint8_t *left,
Yue Chene9638cc2016-10-10 12:37:54 -070011150 int left_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070011151 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
Jingning Hanff6ee6a2016-12-07 09:55:21 -080011152 const int bw = xd->n8_w << MI_SIZE_LOG2;
11153 const int bh = xd->n8_h << MI_SIZE_LOG2;
Yue Chene9638cc2016-10-10 12:37:54 -070011154 int32_t *mask_buf = x->mask_buf;
11155 int32_t *wsrc_buf = x->wsrc_buf;
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011156
Yaowu Xuf883b422016-08-30 14:01:10 -070011157 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020011158#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070011159 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
11160#else
11161 const int is_hbd = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020011162#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070011163
11164 // plane 0 should not be subsampled
11165 assert(xd->plane[0].subsampling_x == 0);
11166 assert(xd->plane[0].subsampling_y == 0);
11167
Yaowu Xuf883b422016-08-30 14:01:10 -070011168 av1_zero_array(wsrc_buf, bw * bh);
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011169 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011170
11171 // handle above row
11172 if (xd->up_available) {
Jingning Han440d4252017-07-24 14:07:34 -070011173 const int overlap =
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011174 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
11175 struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride,
11176 overlap };
11177 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, mi_col,
11178 max_neighbor_obmc[b_width_log2_lookup[bsize]],
11179 calc_target_weighted_pred_above, &ctxt);
Yaowu Xuc27fc142016-08-22 16:08:15 -070011180 }
11181
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011182 for (int i = 0; i < bw * bh; ++i) {
Yaowu Xuf883b422016-08-30 14:01:10 -070011183 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
11184 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011185 }
11186
11187 // handle left column
11188 if (xd->left_available) {
Jingning Han440d4252017-07-24 14:07:34 -070011189 const int overlap =
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011190 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
11191 struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride,
11192 overlap };
11193 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, mi_row,
11194 max_neighbor_obmc[b_height_log2_lookup[bsize]],
11195 calc_target_weighted_pred_left, &ctxt);
Yaowu Xuc27fc142016-08-22 16:08:15 -070011196 }
11197
11198 if (!is_hbd) {
11199 const uint8_t *src = x->plane[0].src.buf;
11200
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011201 for (int row = 0; row < bh; ++row) {
11202 for (int col = 0; col < bw; ++col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070011203 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
11204 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011205 wsrc_buf += bw;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011206 src += x->plane[0].src.stride;
11207 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020011208#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070011209 } else {
11210 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
11211
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011212 for (int row = 0; row < bh; ++row) {
11213 for (int col = 0; col < bw; ++col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070011214 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
11215 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011216 wsrc_buf += bw;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011217 src += x->plane[0].src.stride;
11218 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020011219#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070011220 }
11221}