blob: 20c354717f5ba4a523aa9967b7169b5f2374f718 [file] [log] [blame]
Yaowu Xuc27fc142016-08-22 16:08:15 -07001/*
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07002 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
Yaowu Xuc27fc142016-08-22 16:08:15 -07003 *
Yaowu Xu2ab7ff02016-09-02 12:04:54 -07004 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
Yaowu Xuc27fc142016-08-22 16:08:15 -070010 */
11
12#include <assert.h>
13#include <math.h>
14
Yaowu Xuf883b422016-08-30 14:01:10 -070015#include "./aom_dsp_rtcd.h"
Jingning Han1aab8182016-06-03 11:09:06 -070016#include "./av1_rtcd.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070017
Yaowu Xuf883b422016-08-30 14:01:10 -070018#include "aom_dsp/aom_dsp_common.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070019#include "aom_dsp/blend.h"
Yaowu Xuf883b422016-08-30 14:01:10 -070020#include "aom_mem/aom_mem.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070021#include "aom_ports/mem.h"
22#include "aom_ports/system_state.h"
23
David Michael Barr5b2021e2017-08-17 18:12:39 +090024#if CONFIG_CFL
25#include "av1/common/cfl.h"
26#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -070027#include "av1/common/common.h"
28#include "av1/common/common_data.h"
29#include "av1/common/entropy.h"
30#include "av1/common/entropymode.h"
31#include "av1/common/idct.h"
32#include "av1/common/mvref_common.h"
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010033#include "av1/common/obmc.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070034#include "av1/common/pred_common.h"
35#include "av1/common/quant_common.h"
36#include "av1/common/reconinter.h"
37#include "av1/common/reconintra.h"
38#include "av1/common/scan.h"
39#include "av1/common/seg_common.h"
Angie Chiang47e4b362017-03-24 11:25:10 -070040#if CONFIG_LV_MAP
41#include "av1/common/txb_common.h"
42#endif
Yue Chen69f18e12016-09-08 14:48:15 -070043#include "av1/common/warped_motion.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070044
Jingning Han1aab8182016-06-03 11:09:06 -070045#include "av1/encoder/aq_variance.h"
Tom Finegan17ce8b12017-02-08 12:46:31 -080046#include "av1/encoder/av1_quantize.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070047#include "av1/encoder/cost.h"
48#include "av1/encoder/encodemb.h"
49#include "av1/encoder/encodemv.h"
50#include "av1/encoder/encoder.h"
Angie Chiang47e4b362017-03-24 11:25:10 -070051#if CONFIG_LV_MAP
52#include "av1/encoder/encodetxb.h"
53#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -070054#include "av1/encoder/hybrid_fwd_txfm.h"
55#include "av1/encoder/mcomp.h"
56#include "av1/encoder/palette.h"
Yaowu Xuc27fc142016-08-22 16:08:15 -070057#include "av1/encoder/ratectrl.h"
58#include "av1/encoder/rd.h"
59#include "av1/encoder/rdopt.h"
Debargha Mukherjeeceebb702016-10-11 05:26:50 -070060#include "av1/encoder/tokenize.h"
Alexander Bokov0c7eb102017-09-07 18:49:00 -070061#include "av1/encoder/tx_prune_model_weights.h"
Yushin Chod0b77ac2017-10-20 17:33:16 -070062
Yaowu Xuc27fc142016-08-22 16:08:15 -070063#if CONFIG_DUAL_FILTER
Angie Chiang5678ad92016-11-21 09:38:40 -080064#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS)
Angie Chiangaadbb022017-06-01 16:08:03 -070065#if USE_EXTRA_FILTER
Angie Chiang5678ad92016-11-21 09:38:40 -080066static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
Angie Chiangd91ab372016-11-21 18:16:49 -080067 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 },
68 { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 },
69 { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 },
Yaowu Xuc27fc142016-08-22 16:08:15 -070070};
Angie Chiangaadbb022017-06-01 16:08:03 -070071#else // USE_EXTRA_FILTER
72static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = {
73 { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 },
74 { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 },
75};
76#endif // USE_EXTRA_FILTER
Angie Chiang5678ad92016-11-21 09:38:40 -080077#endif // CONFIG_DUAL_FILTER
Yaowu Xuc27fc142016-08-22 16:08:15 -070078
Zoe Liue9b15e22017-07-19 15:53:01 -070079#define LAST_FRAME_MODE_MASK \
80 ((1 << INTRA_FRAME) | (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | \
81 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
82 (1 << ALTREF_FRAME))
83#define LAST2_FRAME_MODE_MASK \
84 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST3_FRAME) | \
85 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
86 (1 << ALTREF_FRAME))
87#define LAST3_FRAME_MODE_MASK \
88 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
89 (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
90 (1 << ALTREF_FRAME))
91#define GOLDEN_FRAME_MODE_MASK \
92 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
93 (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | (1 << ALTREF2_FRAME) | \
94 (1 << ALTREF_FRAME))
95#define BWDREF_FRAME_MODE_MASK \
96 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
97 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << ALTREF2_FRAME) | \
98 (1 << ALTREF_FRAME))
99#define ALTREF2_FRAME_MODE_MASK \
100 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
101 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
102 (1 << ALTREF_FRAME))
103#define ALTREF_FRAME_MODE_MASK \
104 ((1 << INTRA_FRAME) | (1 << LAST_FRAME) | (1 << LAST2_FRAME) | \
105 (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | (1 << BWDREF_FRAME) | \
106 (1 << ALTREF2_FRAME))
107
Zoe Liuc082bbc2017-05-17 13:31:37 -0700108#if CONFIG_EXT_COMP_REFS
Zoe Liuac889702017-08-23 14:22:58 -0700109#define SECOND_REF_FRAME_MASK \
110 ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | \
111 (1 << GOLDEN_FRAME) | (1 << LAST2_FRAME) | 0x01)
Zoe Liu3ac20932017-08-30 16:35:55 -0700112#else // !CONFIG_EXT_COMP_REFS
Zoe Liue9b15e22017-07-19 15:53:01 -0700113#define SECOND_REF_FRAME_MASK \
114 ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | 0x01)
Zoe Liuc082bbc2017-05-17 13:31:37 -0700115#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -0700116
117#define MIN_EARLY_TERM_INDEX 3
118#define NEW_MV_DISCOUNT_FACTOR 8
119
120#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -0700121#define ANGLE_SKIP_THRESH 10
122#define FILTER_FAST_SEARCH 1
123#endif // CONFIG_EXT_INTRA
124
James Zern67932792017-08-21 11:13:19 -0700125static const double ADST_FLIP_SVM[8] = {
126 /* vertical */
127 -6.6623, -2.8062, -3.2531, 3.1671,
128 /* horizontal */
129 -7.7051, -3.2234, -3.6193, 3.4533
130};
Yaowu Xuc27fc142016-08-22 16:08:15 -0700131
132typedef struct {
133 PREDICTION_MODE mode;
134 MV_REFERENCE_FRAME ref_frame[2];
135} MODE_DEFINITION;
136
137typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
138
139struct rdcost_block_args {
Yaowu Xuf883b422016-08-30 14:01:10 -0700140 const AV1_COMP *cpi;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700141 MACROBLOCK *x;
142 ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE];
143 ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE];
Angie Chiang7c2b7f22016-11-07 16:00:00 -0800144 RD_STATS rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700145 int64_t this_rd;
146 int64_t best_rd;
147 int exit_early;
148 int use_fast_coef_costing;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700149};
150
151#define LAST_NEW_MV_INDEX 6
Yaowu Xuf883b422016-08-30 14:01:10 -0700152static const MODE_DEFINITION av1_mode_order[MAX_MODES] = {
Emil Keyder01770b32017-01-20 18:03:11 -0500153 { NEARESTMV, { LAST_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500154 { NEARESTMV, { LAST2_FRAME, NONE_FRAME } },
155 { NEARESTMV, { LAST3_FRAME, NONE_FRAME } },
156 { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700157 { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500158 { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } },
159 { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700160
Emil Keyder01770b32017-01-20 18:03:11 -0500161 { DC_PRED, { INTRA_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700162
Emil Keyder01770b32017-01-20 18:03:11 -0500163 { NEWMV, { LAST_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500164 { NEWMV, { LAST2_FRAME, NONE_FRAME } },
165 { NEWMV, { LAST3_FRAME, NONE_FRAME } },
166 { NEWMV, { BWDREF_FRAME, NONE_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700167 { NEWMV, { ALTREF2_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500168 { NEWMV, { ALTREF_FRAME, NONE_FRAME } },
169 { NEWMV, { GOLDEN_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700170
Emil Keyder01770b32017-01-20 18:03:11 -0500171 { NEARMV, { LAST_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500172 { NEARMV, { LAST2_FRAME, NONE_FRAME } },
173 { NEARMV, { LAST3_FRAME, NONE_FRAME } },
174 { NEARMV, { BWDREF_FRAME, NONE_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700175 { NEARMV, { ALTREF2_FRAME, NONE_FRAME } },
Emil Keyder01770b32017-01-20 18:03:11 -0500176 { NEARMV, { ALTREF_FRAME, NONE_FRAME } },
177 { NEARMV, { GOLDEN_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700178
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700179 { GLOBALMV, { LAST_FRAME, NONE_FRAME } },
180 { GLOBALMV, { LAST2_FRAME, NONE_FRAME } },
181 { GLOBALMV, { LAST3_FRAME, NONE_FRAME } },
182 { GLOBALMV, { BWDREF_FRAME, NONE_FRAME } },
183 { GLOBALMV, { ALTREF2_FRAME, NONE_FRAME } },
184 { GLOBALMV, { GOLDEN_FRAME, NONE_FRAME } },
185 { GLOBALMV, { ALTREF_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700186
Sebastien Alaiwan34d55662017-11-15 09:36:03 +0100187 // TODO(zoeliu): May need to reconsider the order on the modes to check
Zoe Liu85b66462017-04-20 14:28:19 -0700188
Yaowu Xuc27fc142016-08-22 16:08:15 -0700189 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700190 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
191 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700192 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700193 { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
194 { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
195 { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
196 { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700197 { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
198 { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
199 { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
200 { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
Zoe Liuc082bbc2017-05-17 13:31:37 -0700201
202#if CONFIG_EXT_COMP_REFS
203 { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
Zoe Liufcf5fa22017-06-26 16:00:38 -0700204 { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
Zoe Liuc082bbc2017-05-17 13:31:37 -0700205 { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
206 { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
207#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -0700208
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700209 { PAETH_PRED, { INTRA_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700210
Emil Keyder01770b32017-01-20 18:03:11 -0500211 { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } },
Urvang Joshie6ca8e82017-03-15 14:57:41 -0700212 { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } },
213 { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } },
Urvang Joshi6be4a542016-11-03 15:24:05 -0700214
Yaowu Xuc27fc142016-08-22 16:08:15 -0700215 { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
216 { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
217 { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
218 { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } },
219 { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
220 { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700221 { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700222
Yaowu Xuc27fc142016-08-22 16:08:15 -0700223 { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
224 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } },
225 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
226 { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } },
227 { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
228 { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700229 { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700230
Yaowu Xuc27fc142016-08-22 16:08:15 -0700231 { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
232 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } },
233 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
234 { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } },
235 { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
236 { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700237 { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700238
Yaowu Xuc27fc142016-08-22 16:08:15 -0700239 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
240 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
241 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
242 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
243 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
244 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700245 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700246
Yaowu Xuc27fc142016-08-22 16:08:15 -0700247 { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
248 { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } },
249 { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
250 { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } },
251 { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
252 { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700253 { GLOBAL_GLOBALMV, { LAST_FRAME, BWDREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700254
Yaowu Xuc27fc142016-08-22 16:08:15 -0700255 { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
256 { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } },
257 { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
258 { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } },
259 { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
260 { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700261 { GLOBAL_GLOBALMV, { LAST2_FRAME, BWDREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700262
Yaowu Xuc27fc142016-08-22 16:08:15 -0700263 { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
264 { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } },
265 { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
266 { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } },
267 { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
268 { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700269 { GLOBAL_GLOBALMV, { LAST3_FRAME, BWDREF_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700270
Yaowu Xuc27fc142016-08-22 16:08:15 -0700271 { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
272 { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } },
273 { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
274 { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } },
275 { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
276 { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700277 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, BWDREF_FRAME } },
Zoe Liuc082bbc2017-05-17 13:31:37 -0700278
Zoe Liue9b15e22017-07-19 15:53:01 -0700279 { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
280 { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } },
281 { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
282 { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } },
283 { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
284 { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700285 { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700286
287 { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
288 { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } },
289 { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
290 { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } },
291 { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
292 { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700293 { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700294
295 { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
296 { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } },
297 { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
298 { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } },
299 { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
300 { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700301 { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700302
303 { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
304 { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
305 { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
306 { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
307 { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
308 { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700309 { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF2_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700310
Emil Keyder01770b32017-01-20 18:03:11 -0500311 { H_PRED, { INTRA_FRAME, NONE_FRAME } },
312 { V_PRED, { INTRA_FRAME, NONE_FRAME } },
313 { D135_PRED, { INTRA_FRAME, NONE_FRAME } },
314 { D207_PRED, { INTRA_FRAME, NONE_FRAME } },
315 { D153_PRED, { INTRA_FRAME, NONE_FRAME } },
316 { D63_PRED, { INTRA_FRAME, NONE_FRAME } },
317 { D117_PRED, { INTRA_FRAME, NONE_FRAME } },
318 { D45_PRED, { INTRA_FRAME, NONE_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700319
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700320 { GLOBALMV, { LAST_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700321 { NEARESTMV, { LAST_FRAME, INTRA_FRAME } },
322 { NEARMV, { LAST_FRAME, INTRA_FRAME } },
323 { NEWMV, { LAST_FRAME, INTRA_FRAME } },
324
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700325 { GLOBALMV, { LAST2_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700326 { NEARESTMV, { LAST2_FRAME, INTRA_FRAME } },
327 { NEARMV, { LAST2_FRAME, INTRA_FRAME } },
328 { NEWMV, { LAST2_FRAME, INTRA_FRAME } },
329
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700330 { GLOBALMV, { LAST3_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700331 { NEARESTMV, { LAST3_FRAME, INTRA_FRAME } },
332 { NEARMV, { LAST3_FRAME, INTRA_FRAME } },
333 { NEWMV, { LAST3_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700334
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700335 { GLOBALMV, { GOLDEN_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700336 { NEARESTMV, { GOLDEN_FRAME, INTRA_FRAME } },
337 { NEARMV, { GOLDEN_FRAME, INTRA_FRAME } },
338 { NEWMV, { GOLDEN_FRAME, INTRA_FRAME } },
339
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700340 { GLOBALMV, { BWDREF_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700341 { NEARESTMV, { BWDREF_FRAME, INTRA_FRAME } },
342 { NEARMV, { BWDREF_FRAME, INTRA_FRAME } },
343 { NEWMV, { BWDREF_FRAME, INTRA_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700344
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700345 { GLOBALMV, { ALTREF2_FRAME, INTRA_FRAME } },
Zoe Liue9b15e22017-07-19 15:53:01 -0700346 { NEARESTMV, { ALTREF2_FRAME, INTRA_FRAME } },
347 { NEARMV, { ALTREF2_FRAME, INTRA_FRAME } },
348 { NEWMV, { ALTREF2_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700349
Sarah Parker2b9ec2e2017-10-30 17:34:08 -0700350 { GLOBALMV, { ALTREF_FRAME, INTRA_FRAME } },
Yaowu Xuc27fc142016-08-22 16:08:15 -0700351 { NEARESTMV, { ALTREF_FRAME, INTRA_FRAME } },
352 { NEARMV, { ALTREF_FRAME, INTRA_FRAME } },
353 { NEWMV, { ALTREF_FRAME, INTRA_FRAME } },
Debargha Mukherjeec1077e92017-11-06 20:17:33 -0800354
355#if CONFIG_EXT_COMP_REFS
356 { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } },
357 { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } },
358 { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } },
359 { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } },
360 { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } },
361 { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } },
362 { GLOBAL_GLOBALMV, { LAST_FRAME, LAST2_FRAME } },
363
364 { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } },
365 { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } },
366 { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } },
367 { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } },
368 { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } },
369 { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } },
370 { GLOBAL_GLOBALMV, { LAST_FRAME, LAST3_FRAME } },
371
372 { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
373 { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } },
374 { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
375 { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } },
376 { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
377 { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } },
378 { GLOBAL_GLOBALMV, { LAST_FRAME, GOLDEN_FRAME } },
379
380 { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
381 { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } },
382 { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
383 { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } },
384 { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
385 { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } },
386 { GLOBAL_GLOBALMV, { BWDREF_FRAME, ALTREF_FRAME } },
387#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -0700388};
389
hui su8a516a82017-07-06 10:00:36 -0700390static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = {
Urvang Joshi96d1c0a2017-10-10 13:15:32 -0700391 DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED,
Urvang Joshib7301cd2017-11-09 15:08:56 -0800392 SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D207_PRED, D153_PRED,
393 D63_PRED, D117_PRED, D45_PRED,
hui su8a516a82017-07-06 10:00:36 -0700394};
395
Luc Trudeaud6d9eee2017-07-12 12:36:50 -0400396#if CONFIG_CFL
397static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
Urvang Joshib7301cd2017-11-09 15:08:56 -0800398 UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED,
399 UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED,
400 UV_D135_PRED, UV_D207_PRED, UV_D153_PRED, UV_D63_PRED,
401 UV_D117_PRED, UV_D45_PRED,
Luc Trudeaud6d9eee2017-07-12 12:36:50 -0400402};
403#else
404#define uv_rd_search_mode_order intra_rd_search_mode_order
405#endif // CONFIG_CFL
406
Yaowu Xuc27fc142016-08-22 16:08:15 -0700407static INLINE int write_uniform_cost(int n, int v) {
hui su37499292017-04-26 09:49:53 -0700408 const int l = get_unsigned_bits(n);
409 const int m = (1 << l) - n;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700410 if (l == 0) return 0;
411 if (v < m)
Yaowu Xuf883b422016-08-30 14:01:10 -0700412 return (l - 1) * av1_cost_bit(128, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700413 else
Yaowu Xuf883b422016-08-30 14:01:10 -0700414 return l * av1_cost_bit(128, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700415}
416
417// constants for prune 1 and prune 2 decision boundaries
418#define FAST_EXT_TX_CORR_MID 0.0
419#define FAST_EXT_TX_EDST_MID 0.1
420#define FAST_EXT_TX_CORR_MARGIN 0.5
421#define FAST_EXT_TX_EDST_MARGIN 0.3
422
Debargha Mukherjee51666862017-10-24 14:29:13 -0700423int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
424 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast);
425int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
426 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast);
427
Yushin Cho2f025aa2017-09-28 17:39:21 -0700428static unsigned pixel_dist_visible_only(
429 const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src,
430 const int src_stride, const uint8_t *dst, const int dst_stride,
431 const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows,
432 int visible_cols) {
433 unsigned sse;
434
Debargha Mukherjee35a4db32017-11-14 11:58:16 -0800435 if (txb_rows == visible_rows && txb_cols == visible_cols) {
Yushin Cho2f025aa2017-09-28 17:39:21 -0700436 cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
437 return sse;
438 }
439#if CONFIG_HIGHBITDEPTH
440 const MACROBLOCKD *xd = &x->e_mbd;
441
442 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
443 uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride,
444 visible_cols, visible_rows);
445 return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2);
446 }
447#else
448 (void)x;
449#endif // CONFIG_HIGHBITDEPTH
450 sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols,
451 visible_rows);
452 return sse;
453}
454
Yushin Choe30a47c2017-08-15 13:08:30 -0700455#if CONFIG_DIST_8X8
Yushin Choc49177e2017-07-18 17:18:09 -0700456static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
457 int sstride, int coeff_shift) {
458 uint64_t svar = 0;
459 uint64_t dvar = 0;
460 uint64_t sum_s = 0;
461 uint64_t sum_d = 0;
462 uint64_t sum_s2 = 0;
463 uint64_t sum_d2 = 0;
464 uint64_t sum_sd = 0;
465 uint64_t dist = 0;
466
467 int i, j;
468 for (i = 0; i < 8; i++) {
469 for (j = 0; j < 8; j++) {
470 sum_s += src[i * sstride + j];
471 sum_d += dst[i * dstride + j];
472 sum_s2 += src[i * sstride + j] * src[i * sstride + j];
473 sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
474 sum_sd += src[i * sstride + j] * dst[i * dstride + j];
475 }
476 }
477 /* Compute the variance -- the calculation cannot go negative. */
478 svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
479 dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
480
481 // Tuning of jm's original dering distortion metric used in CDEF tool,
482 // suggested by jm
483 const uint64_t a = 4;
484 const uint64_t b = 2;
485 const uint64_t c1 = (400 * a << 2 * coeff_shift);
486 const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift);
487
488 dist =
489 (uint64_t)floor(.5 +
490 (sum_d2 + sum_s2 - 2 * sum_sd) * .5 * (svar + dvar + c1) /
491 (sqrt(svar * (double)dvar + c2)));
492
493 // Calibrate dist to have similar rate for the same QP with MSE only
494 // distortion (as in master branch)
495 dist = (uint64_t)((float)dist * 0.75);
496
497 return dist;
498}
Yushin Choc49177e2017-07-18 17:18:09 -0700499
Yushin Chob7b60c52017-07-14 16:18:52 -0700500static int od_compute_var_4x4(uint16_t *x, int stride) {
Yushin Cho7a428ba2017-01-12 16:28:49 -0800501 int sum;
502 int s2;
503 int i;
504 sum = 0;
505 s2 = 0;
506 for (i = 0; i < 4; i++) {
507 int j;
508 for (j = 0; j < 4; j++) {
509 int t;
510
511 t = x[i * stride + j];
512 sum += t;
513 s2 += t * t;
514 }
515 }
Yushin Chob7b60c52017-07-14 16:18:52 -0700516
Yushin Cho7a428ba2017-01-12 16:28:49 -0800517 return (s2 - (sum * sum >> 4)) >> 4;
518}
519
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500520/* OD_DIST_LP_MID controls the frequency weighting filter used for computing
521 the distortion. For a value X, the filter is [1 X 1]/(X + 2) and
522 is applied both horizontally and vertically. For X=5, the filter is
523 a good approximation for the OD_QM8_Q4_HVS quantization matrix. */
524#define OD_DIST_LP_MID (5)
525#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2)
526
Yushin Chob7b60c52017-07-14 16:18:52 -0700527static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x,
528 uint16_t *y, od_coeff *e_lp, int stride) {
Yushin Cho7a428ba2017-01-12 16:28:49 -0800529 double sum;
530 int min_var;
531 double mean_var;
532 double var_stat;
533 double activity;
534 double calibration;
535 int i;
536 int j;
537 double vardist;
Yushin Cho7a428ba2017-01-12 16:28:49 -0800538
539 vardist = 0;
Yushin Chob7b60c52017-07-14 16:18:52 -0700540
Yushin Cho7a428ba2017-01-12 16:28:49 -0800541#if 1
542 min_var = INT_MAX;
543 mean_var = 0;
544 for (i = 0; i < 3; i++) {
545 for (j = 0; j < 3; j++) {
546 int varx;
547 int vary;
548 varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride);
549 vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride);
550 min_var = OD_MINI(min_var, varx);
551 mean_var += 1. / (1 + varx);
552 /* The cast to (double) is to avoid an overflow before the sqrt.*/
553 vardist += varx - 2 * sqrt(varx * (double)vary) + vary;
554 }
555 }
556 /* We use a different variance statistic depending on whether activity
James Zern89a015b2017-08-08 12:39:00 -0400557 masking is used, since the harmonic mean appeared slightly worse with
Yushin Cho7a428ba2017-01-12 16:28:49 -0800558 masking off. The calibration constant just ensures that we preserve the
559 rate compared to activity=1. */
560 if (use_activity_masking) {
561 calibration = 1.95;
562 var_stat = 9. / mean_var;
563 } else {
564 calibration = 1.62;
565 var_stat = min_var;
566 }
567 /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the
568 activity masking constant. */
569 activity = calibration * pow(.25 + var_stat, -1. / 6);
570#else
571 activity = 1;
Fergus Simpson4063a682017-02-28 16:52:22 -0800572#endif // 1
Yushin Cho7a428ba2017-01-12 16:28:49 -0800573 sum = 0;
574 for (i = 0; i < 8; i++) {
575 for (j = 0; j < 8; j++)
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500576 sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j];
Yushin Cho7a428ba2017-01-12 16:28:49 -0800577 }
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500578 /* Normalize the filter to unit DC response. */
579 sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM *
580 OD_DIST_LP_NORM);
Yushin Cho7a428ba2017-01-12 16:28:49 -0800581 return activity * activity * (sum + vardist);
582}
583
584// Note : Inputs x and y are in a pixel domain
Yushin Chob7b60c52017-07-14 16:18:52 -0700585static double od_compute_dist_common(int activity_masking, uint16_t *x,
586 uint16_t *y, int bsize_w, int bsize_h,
Yushin Cho75b01002017-06-21 13:43:57 -0700587 int qindex, od_coeff *tmp,
588 od_coeff *e_lp) {
589 int i, j;
590 double sum = 0;
591 const int mid = OD_DIST_LP_MID;
592
593 for (j = 0; j < bsize_w; j++) {
594 e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j];
595 e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] +
596 2 * tmp[(bsize_h - 2) * bsize_w + j];
597 }
598 for (i = 1; i < bsize_h - 1; i++) {
599 for (j = 0; j < bsize_w; j++) {
600 e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] +
601 tmp[(i - 1) * bsize_w + j] +
602 tmp[(i + 1) * bsize_w + j];
603 }
604 }
605 for (i = 0; i < bsize_h; i += 8) {
606 for (j = 0; j < bsize_w; j += 8) {
Yushin Chob7b60c52017-07-14 16:18:52 -0700607 sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j],
Yushin Cho75b01002017-06-21 13:43:57 -0700608 &y[i * bsize_w + j], &e_lp[i * bsize_w + j],
609 bsize_w);
610 }
611 }
612 /* Scale according to linear regression against SSE, for 8x8 blocks. */
613 if (activity_masking) {
614 sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) +
615 (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0);
616 } else {
617 sum *= qindex >= 128
618 ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128)
619 : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43)
620 : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43);
621 }
622
623 return sum;
624}
625
Yushin Chob7b60c52017-07-14 16:18:52 -0700626static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w,
627 int bsize_h, int qindex) {
Yushin Cho7a428ba2017-01-12 16:28:49 -0800628 assert(bsize_w >= 8 && bsize_h >= 8);
Yushin Chod0b77ac2017-10-20 17:33:16 -0700629
Yushin Chob7b60c52017-07-14 16:18:52 -0700630 int activity_masking = 0;
Yushin Chod0b77ac2017-10-20 17:33:16 -0700631
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400632 int i, j;
633 DECLARE_ALIGNED(16, od_coeff, e[MAX_TX_SQUARE]);
634 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
635 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
636 for (i = 0; i < bsize_h; i++) {
637 for (j = 0; j < bsize_w; j++) {
638 e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j];
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500639 }
Yushin Cho75b01002017-06-21 13:43:57 -0700640 }
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400641 int mid = OD_DIST_LP_MID;
642 for (i = 0; i < bsize_h; i++) {
643 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
644 tmp[i * bsize_w + bsize_w - 1] =
645 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
646 for (j = 1; j < bsize_w - 1; j++) {
647 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
648 e[i * bsize_w + j + 1];
649 }
650 }
651 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
652 qindex, tmp, e_lp);
Yushin Cho75b01002017-06-21 13:43:57 -0700653}
654
Yushin Chob7b60c52017-07-14 16:18:52 -0700655static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w,
656 int bsize_h, int qindex) {
Yushin Cho75b01002017-06-21 13:43:57 -0700657 assert(bsize_w >= 8 && bsize_h >= 8);
Yushin Chod0b77ac2017-10-20 17:33:16 -0700658
Yushin Chob7b60c52017-07-14 16:18:52 -0700659 int activity_masking = 0;
Yushin Chod0b77ac2017-10-20 17:33:16 -0700660
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400661 DECLARE_ALIGNED(16, uint16_t, y[MAX_TX_SQUARE]);
662 DECLARE_ALIGNED(16, od_coeff, tmp[MAX_TX_SQUARE]);
663 DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_TX_SQUARE]);
664 int i, j;
665 for (i = 0; i < bsize_h; i++) {
666 for (j = 0; j < bsize_w; j++) {
667 y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j];
Jean-Marc Valin79c0f322017-01-18 01:58:33 -0500668 }
Yushin Cho7a428ba2017-01-12 16:28:49 -0800669 }
Luc Trudeau1f7c4112017-09-13 15:10:08 -0400670 int mid = OD_DIST_LP_MID;
671 for (i = 0; i < bsize_h; i++) {
672 tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1];
673 tmp[i * bsize_w + bsize_w - 1] =
674 mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2];
675 for (j = 1; j < bsize_w - 1; j++) {
676 tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] +
677 e[i * bsize_w + j + 1];
678 }
679 }
680 return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h,
681 qindex, tmp, e_lp);
Yushin Cho7a428ba2017-01-12 16:28:49 -0800682}
683
Yushin Choe30a47c2017-08-15 13:08:30 -0700684int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x,
Yushin Chob7b60c52017-07-14 16:18:52 -0700685 const uint8_t *src, int src_stride, const uint8_t *dst,
686 int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
687 int bsh, int visible_w, int visible_h, int qindex) {
688 int64_t d = 0;
Yushin Cho7a428ba2017-01-12 16:28:49 -0800689 int i, j;
Yushin Choe30a47c2017-08-15 13:08:30 -0700690 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -0700691
692 DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
693 DECLARE_ALIGNED(16, uint16_t, rec[MAX_TX_SQUARE]);
Yushin Chob7b60c52017-07-14 16:18:52 -0700694
Yushin Choee810272017-09-13 17:30:25 -0700695 assert(bsw >= 8);
696 assert(bsh >= 8);
697 assert((bsw & 0x07) == 0);
698 assert((bsh & 0x07) == 0);
699
Yushin Choe30a47c2017-08-15 13:08:30 -0700700 if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
701 x->tune_metric == AOM_TUNE_DAALA_DIST) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700702#if CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700703 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
Yushin Cho75b01002017-06-21 13:43:57 -0700704 for (j = 0; j < bsh; j++)
Yushin Cho8ab875d2017-06-23 14:47:21 -0700705 for (i = 0; i < bsw; i++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700706 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
Yushin Cho75b01002017-06-21 13:43:57 -0700707
Yushin Choe30a47c2017-08-15 13:08:30 -0700708 if ((bsw == visible_w) && (bsh == visible_h)) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700709 for (j = 0; j < bsh; j++)
Yushin Cho8ab875d2017-06-23 14:47:21 -0700710 for (i = 0; i < bsw; i++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700711 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
712 } else {
713 for (j = 0; j < visible_h; j++)
714 for (i = 0; i < visible_w; i++)
715 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700716
Yushin Choe30a47c2017-08-15 13:08:30 -0700717 if (visible_w < bsw) {
718 for (j = 0; j < bsh; j++)
719 for (i = visible_w; i < bsw; i++)
720 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
721 }
722
723 if (visible_h < bsh) {
724 for (j = visible_h; j < bsh; j++)
725 for (i = 0; i < bsw; i++)
726 rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
727 }
728 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700729 } else {
Yushin Choe30a47c2017-08-15 13:08:30 -0700730#endif
731 for (j = 0; j < bsh; j++)
732 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700733
Yushin Choe30a47c2017-08-15 13:08:30 -0700734 if ((bsw == visible_w) && (bsh == visible_h)) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700735 for (j = 0; j < bsh; j++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700736 for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i];
737 } else {
738 for (j = 0; j < visible_h; j++)
739 for (i = 0; i < visible_w; i++)
740 rec[j * bsw + i] = dst[j * dst_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700741
Yushin Choe30a47c2017-08-15 13:08:30 -0700742 if (visible_w < bsw) {
743 for (j = 0; j < bsh; j++)
744 for (i = visible_w; i < bsw; i++)
745 rec[j * bsw + i] = src[j * src_stride + i];
746 }
747
748 if (visible_h < bsh) {
749 for (j = visible_h; j < bsh; j++)
750 for (i = 0; i < bsw; i++)
751 rec[j * bsw + i] = src[j * src_stride + i];
752 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700753 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700754#if CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700755 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700756#endif // CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700757 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700758
Yushin Choe30a47c2017-08-15 13:08:30 -0700759 if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
760 d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex);
761 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
Yushin Choc49177e2017-07-18 17:18:09 -0700762 int coeff_shift = AOMMAX(xd->bd - 8, 0);
763
764 for (i = 0; i < bsh; i += 8) {
765 for (j = 0; j < bsw; j += 8) {
766 d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j],
767 bsw, coeff_shift);
768 }
769 }
770#if CONFIG_HIGHBITDEPTH
771 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
772 d = ((uint64_t)d) >> 2 * coeff_shift;
773#endif
Yushin Choe30a47c2017-08-15 13:08:30 -0700774 } else {
775 // Otherwise, MSE by default
Yushin Cho2f025aa2017-09-28 17:39:21 -0700776 d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride,
777 tx_bsize, bsh, bsw, visible_h, visible_w);
Yushin Choc49177e2017-07-18 17:18:09 -0700778 }
Yushin Chob7b60c52017-07-14 16:18:52 -0700779
Yushin Cho7a428ba2017-01-12 16:28:49 -0800780 return d;
781}
Yushin Cho75b01002017-06-21 13:43:57 -0700782
Yushin Choe30a47c2017-08-15 13:08:30 -0700783static int64_t av1_dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src,
Yushin Chob7b60c52017-07-14 16:18:52 -0700784 int src_stride, const int16_t *diff,
785 int diff_stride, int bsw, int bsh,
786 int visible_w, int visible_h, int qindex) {
787 int64_t d = 0;
Yushin Cho75b01002017-06-21 13:43:57 -0700788 int i, j;
Yushin Choe30a47c2017-08-15 13:08:30 -0700789 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -0700790
791 DECLARE_ALIGNED(16, uint16_t, orig[MAX_TX_SQUARE]);
792 DECLARE_ALIGNED(16, int16_t, diff16[MAX_TX_SQUARE]);
Yushin Chob7b60c52017-07-14 16:18:52 -0700793
Yushin Choee810272017-09-13 17:30:25 -0700794 assert(bsw >= 8);
795 assert(bsh >= 8);
796 assert((bsw & 0x07) == 0);
797 assert((bsh & 0x07) == 0);
798
Yushin Choe30a47c2017-08-15 13:08:30 -0700799 if (x->tune_metric == AOM_TUNE_CDEF_DIST ||
800 x->tune_metric == AOM_TUNE_DAALA_DIST) {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700801#if CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700802 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
803 for (j = 0; j < bsh; j++)
804 for (i = 0; i < bsw; i++)
805 orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i];
806 } else {
Yushin Cho8ab875d2017-06-23 14:47:21 -0700807#endif
Yushin Choe30a47c2017-08-15 13:08:30 -0700808 for (j = 0; j < bsh; j++)
809 for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -0700810#if CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -0700811 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700812#endif // CONFIG_HIGHBITDEPTH
Yushin Cho75b01002017-06-21 13:43:57 -0700813
Yushin Choe30a47c2017-08-15 13:08:30 -0700814 if ((bsw == visible_w) && (bsh == visible_h)) {
Yushin Cho75b01002017-06-21 13:43:57 -0700815 for (j = 0; j < bsh; j++)
Yushin Choe30a47c2017-08-15 13:08:30 -0700816 for (i = 0; i < bsw; i++)
817 diff16[j * bsw + i] = diff[j * diff_stride + i];
818 } else {
819 for (j = 0; j < visible_h; j++)
820 for (i = 0; i < visible_w; i++)
821 diff16[j * bsw + i] = diff[j * diff_stride + i];
Yushin Cho75b01002017-06-21 13:43:57 -0700822
Yushin Choe30a47c2017-08-15 13:08:30 -0700823 if (visible_w < bsw) {
824 for (j = 0; j < bsh; j++)
825 for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0;
826 }
827
828 if (visible_h < bsh) {
829 for (j = visible_h; j < bsh; j++)
830 for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0;
831 }
Yushin Cho75b01002017-06-21 13:43:57 -0700832 }
833 }
Yushin Cho8ab875d2017-06-23 14:47:21 -0700834
Yushin Choe30a47c2017-08-15 13:08:30 -0700835 if (x->tune_metric == AOM_TUNE_DAALA_DIST) {
836 d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex);
837 } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) {
Yushin Choc49177e2017-07-18 17:18:09 -0700838 int coeff_shift = AOMMAX(xd->bd - 8, 0);
839 DECLARE_ALIGNED(16, uint16_t, dst16[MAX_TX_SQUARE]);
840
841 for (i = 0; i < bsh; i++) {
842 for (j = 0; j < bsw; j++) {
843 dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j];
844 }
845 }
846
847 for (i = 0; i < bsh; i += 8) {
848 for (j = 0; j < bsw; j += 8) {
849 d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j],
850 bsw, coeff_shift);
851 }
852 }
853 // Don't scale 'd' for HBD since it will be done by caller side for diff
854 // input
Yushin Choe30a47c2017-08-15 13:08:30 -0700855 } else {
856 // Otherwise, MSE by default
Yushin Cho2f025aa2017-09-28 17:39:21 -0700857 d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h);
Yushin Choc49177e2017-07-18 17:18:09 -0700858 }
Yushin Cho75b01002017-06-21 13:43:57 -0700859
860 return d;
861}
Yushin Chob7b60c52017-07-14 16:18:52 -0700862#endif // CONFIG_DIST_8X8
Yushin Cho7a428ba2017-01-12 16:28:49 -0800863
Yaowu Xuf883b422016-08-30 14:01:10 -0700864static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse9f217762017-04-20 15:34:54 -0700865 const uint8_t *src, int src_stride,
866 const uint8_t *dst, int dst_stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -0700867 double *hordist, double *verdist) {
Alex Converse9f217762017-04-20 15:34:54 -0700868 const int bw = block_size_wide[bsize];
869 const int bh = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700870 unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -0700871
872 const int f_index = bsize - BLOCK_16X16;
873 if (f_index < 0) {
Alex Converse9f217762017-04-20 15:34:54 -0700874 const int w_shift = bw == 8 ? 1 : 2;
875 const int h_shift = bh == 8 ? 1 : 2;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200876#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700877 if (cpi->common.use_highbitdepth) {
Alex Converse9f217762017-04-20 15:34:54 -0700878 const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
879 const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
880 for (int i = 0; i < bh; ++i)
881 for (int j = 0; j < bw; ++j) {
882 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700883 esq[index] +=
884 (src16[j + i * src_stride] - dst16[j + i * dst_stride]) *
885 (src16[j + i * src_stride] - dst16[j + i * dst_stride]);
886 }
887 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200888#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700889
Alex Converse9f217762017-04-20 15:34:54 -0700890 for (int i = 0; i < bh; ++i)
891 for (int j = 0; j < bw; ++j) {
892 const int index = (j >> w_shift) + ((i >> h_shift) << 2);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700893 esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) *
894 (src[j + i * src_stride] - dst[j + i * dst_stride]);
895 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200896#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700897 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +0200898#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -0700899 } else {
Alex Converse9f217762017-04-20 15:34:54 -0700900 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[0]);
901 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
902 &esq[1]);
903 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
904 &esq[2]);
905 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
906 dst_stride, &esq[3]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700907 src += bh / 4 * src_stride;
908 dst += bh / 4 * dst_stride;
909
Alex Converse9f217762017-04-20 15:34:54 -0700910 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[4]);
911 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
912 &esq[5]);
913 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
914 &esq[6]);
915 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
916 dst_stride, &esq[7]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700917 src += bh / 4 * src_stride;
918 dst += bh / 4 * dst_stride;
919
Alex Converse9f217762017-04-20 15:34:54 -0700920 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[8]);
921 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
922 &esq[9]);
923 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
924 &esq[10]);
925 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
926 dst_stride, &esq[11]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700927 src += bh / 4 * src_stride;
928 dst += bh / 4 * dst_stride;
929
Alex Converse9f217762017-04-20 15:34:54 -0700930 cpi->fn_ptr[f_index].vf(src, src_stride, dst, dst_stride, &esq[12]);
931 cpi->fn_ptr[f_index].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
932 &esq[13]);
933 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
934 &esq[14]);
935 cpi->fn_ptr[f_index].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
936 dst_stride, &esq[15]);
Yaowu Xuc27fc142016-08-22 16:08:15 -0700937 }
938
Alex Converse9f217762017-04-20 15:34:54 -0700939 double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
940 esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] +
941 esq[12] + esq[13] + esq[14] + esq[15];
Yaowu Xuc27fc142016-08-22 16:08:15 -0700942 if (total > 0) {
943 const double e_recip = 1.0 / total;
Alex Converse9f217762017-04-20 15:34:54 -0700944 hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip;
945 hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip;
946 hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip;
947 verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip;
948 verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip;
949 verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip;
Yaowu Xuc27fc142016-08-22 16:08:15 -0700950 } else {
951 hordist[0] = verdist[0] = 0.25;
952 hordist[1] = verdist[1] = 0.25;
953 hordist[2] = verdist[2] = 0.25;
954 }
Yaowu Xuc27fc142016-08-22 16:08:15 -0700955}
956
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -0700957static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize,
958 const uint8_t *src, int src_stride,
959 const uint8_t *dst, int dst_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700960 int prune_bitmask = 0;
961 double svm_proj_h = 0, svm_proj_v = 0;
Alex Converse89912f92017-04-21 13:28:50 -0700962 double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -0700963 get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride,
964 hdist, vdist);
965
966 svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] +
967 vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3];
968 svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] +
969 hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7];
970 if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
971 prune_bitmask |= 1 << FLIPADST_1D;
972 else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
973 prune_bitmask |= 1 << ADST_1D;
974
975 if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN)
976 prune_bitmask |= 1 << (FLIPADST_1D + 8);
977 else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN)
978 prune_bitmask |= 1 << (ADST_1D + 8);
979
980 return prune_bitmask;
981}
982
Alex Converse89912f92017-04-21 13:28:50 -0700983static void get_horver_correlation(const int16_t *diff, int stride, int w,
984 int h, double *hcorr, double *vcorr) {
Yaowu Xuc27fc142016-08-22 16:08:15 -0700985 // Returns hor/ver correlation coefficient
986 const int num = (h - 1) * (w - 1);
987 double num_r;
988 int i, j;
989 int64_t xy_sum = 0, xz_sum = 0;
990 int64_t x_sum = 0, y_sum = 0, z_sum = 0;
991 int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0;
992 double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n;
993 *hcorr = *vcorr = 1;
994
995 assert(num > 0);
996 num_r = 1.0 / num;
997 for (i = 1; i < h; ++i) {
998 for (j = 1; j < w; ++j) {
999 const int16_t x = diff[i * stride + j];
1000 const int16_t y = diff[i * stride + j - 1];
1001 const int16_t z = diff[(i - 1) * stride + j];
1002 xy_sum += x * y;
1003 xz_sum += x * z;
1004 x_sum += x;
1005 y_sum += y;
1006 z_sum += z;
1007 x2_sum += x * x;
1008 y2_sum += y * y;
1009 z2_sum += z * z;
1010 }
1011 }
1012 x_var_n = x2_sum - (x_sum * x_sum) * num_r;
1013 y_var_n = y2_sum - (y_sum * y_sum) * num_r;
1014 z_var_n = z2_sum - (z_sum * z_sum) * num_r;
1015 xy_var_n = xy_sum - (x_sum * y_sum) * num_r;
1016 xz_var_n = xz_sum - (x_sum * z_sum) * num_r;
1017 if (x_var_n > 0 && y_var_n > 0) {
1018 *hcorr = xy_var_n / sqrt(x_var_n * y_var_n);
1019 *hcorr = *hcorr < 0 ? 0 : *hcorr;
1020 }
1021 if (x_var_n > 0 && z_var_n > 0) {
1022 *vcorr = xz_var_n / sqrt(x_var_n * z_var_n);
1023 *vcorr = *vcorr < 0 ? 0 : *vcorr;
1024 }
1025}
1026
Alex Converse89912f92017-04-21 13:28:50 -07001027int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) {
1028 double hcorr, vcorr;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001029 int prune_bitmask = 0;
Alex Converse89912f92017-04-21 13:28:50 -07001030 get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001031
Alex Converse89912f92017-04-21 13:28:50 -07001032 if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001033 prune_bitmask |= 1 << IDTX_1D;
Alex Converse89912f92017-04-21 13:28:50 -07001034 else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001035 prune_bitmask |= 1 << DCT_1D;
1036
Alex Converse89912f92017-04-21 13:28:50 -07001037 if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001038 prune_bitmask |= 1 << (IDTX_1D + 8);
Alex Converse89912f92017-04-21 13:28:50 -07001039 else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN)
Yaowu Xuc27fc142016-08-22 16:08:15 -07001040 prune_bitmask |= 1 << (DCT_1D + 8);
1041 return prune_bitmask;
1042}
1043
1044// Performance drop: 0.5%, Speed improvement: 24%
Yaowu Xuf883b422016-08-30 14:01:10 -07001045static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse89912f92017-04-21 13:28:50 -07001046 MACROBLOCK *x, const MACROBLOCKD *xd,
1047 int adst_flipadst, int dct_idtx) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001048 int prune = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001049
Alex Converse89912f92017-04-21 13:28:50 -07001050 if (adst_flipadst) {
1051 const struct macroblock_plane *const p = &x->plane[0];
1052 const struct macroblockd_plane *const pd = &xd->plane[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001053 prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride,
Alex Converse89912f92017-04-21 13:28:50 -07001054 pd->dst.buf, pd->dst.stride);
1055 }
1056 if (dct_idtx) {
1057 av1_subtract_plane(x, bsize, 0);
1058 const struct macroblock_plane *const p = &x->plane[0];
1059 const int bw = 4 << (b_width_log2_lookup[bsize]);
1060 const int bh = 4 << (b_height_log2_lookup[bsize]);
1061 prune |= dct_vs_idtx(p->src_diff, bw, bw, bh);
1062 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001063
1064 return prune;
1065}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001066
1067// Performance drop: 0.3%, Speed improvement: 5%
Yaowu Xuf883b422016-08-30 14:01:10 -07001068static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize,
Alex Converse89912f92017-04-21 13:28:50 -07001069 const MACROBLOCK *x, const MACROBLOCKD *xd) {
1070 const struct macroblock_plane *const p = &x->plane[0];
1071 const struct macroblockd_plane *const pd = &xd->plane[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001072 return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf,
Alex Converse89912f92017-04-21 13:28:50 -07001073 pd->dst.stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001074}
1075
Hui Su032ab8b2017-09-19 14:53:40 -07001076// 1D Transforms used in inter set, this needs to be changed if
1077// ext_tx_used_inter is changed
1078static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = {
1079 { 1, 0, 0, 0 }, { 1, 1, 1, 1 }, { 1, 1, 1, 1 }, { 1, 0, 0, 1 },
Hui Su032ab8b2017-09-19 14:53:40 -07001080};
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001081
1082static void get_energy_distribution_finer(const int16_t *diff, int stride,
1083 int bw, int bh, float *hordist,
1084 float *verdist) {
1085 // First compute downscaled block energy values (esq); downscale factors
1086 // are defined by w_shift and h_shift.
1087 unsigned int esq[256];
1088 const int w_shift = bw <= 8 ? 0 : 1;
1089 const int h_shift = bh <= 8 ? 0 : 1;
1090 const int esq_w = bw <= 8 ? bw : bw / 2;
1091 const int esq_h = bh <= 8 ? bh : bh / 2;
1092 const int esq_sz = esq_w * esq_h;
1093 int i, j;
1094 memset(esq, 0, esq_sz * sizeof(esq[0]));
1095 for (i = 0; i < bh; i++) {
1096 unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w;
1097 const int16_t *cur_diff_row = diff + i * stride;
1098 for (j = 0; j < bw; j++) {
1099 cur_esq_row[j >> w_shift] += cur_diff_row[j] * cur_diff_row[j];
1100 }
1101 }
1102
1103 uint64_t total = 0;
1104 for (i = 0; i < esq_sz; i++) total += esq[i];
1105
1106 // Output hordist and verdist arrays are normalized 1D projections of esq
1107 if (total == 0) {
1108 float hor_val = 1.0f / esq_w;
1109 for (j = 0; j < esq_w - 1; j++) hordist[j] = hor_val;
1110 float ver_val = 1.0f / esq_h;
1111 for (i = 0; i < esq_h - 1; i++) verdist[i] = ver_val;
1112 return;
1113 }
1114
1115 const float e_recip = 1.0f / (float)total;
1116 memset(hordist, 0, (esq_w - 1) * sizeof(hordist[0]));
1117 memset(verdist, 0, (esq_h - 1) * sizeof(verdist[0]));
1118 const unsigned int *cur_esq_row;
1119 for (i = 0; i < esq_h - 1; i++) {
1120 cur_esq_row = esq + i * esq_w;
1121 for (j = 0; j < esq_w - 1; j++) {
1122 hordist[j] += (float)cur_esq_row[j];
1123 verdist[i] += (float)cur_esq_row[j];
1124 }
1125 verdist[i] += (float)cur_esq_row[j];
1126 }
1127 cur_esq_row = esq + i * esq_w;
1128 for (j = 0; j < esq_w - 1; j++) hordist[j] += (float)cur_esq_row[j];
1129
1130 for (j = 0; j < esq_w - 1; j++) hordist[j] *= e_recip;
1131 for (i = 0; i < esq_h - 1; i++) verdist[i] *= e_recip;
1132}
1133
Alexander Bokov79a37242017-09-29 11:25:55 -07001134// Instead of 1D projections of the block energy distribution computed by
1135// get_energy_distribution_finer() this function computes a full
1136// two-dimensional energy distribution of the input block.
1137static void get_2D_energy_distribution(const int16_t *diff, int stride, int bw,
1138 int bh, float *edist) {
1139 unsigned int esq[256] = { 0 };
1140 const int esq_w = bw >> 2;
1141 const int esq_h = bh >> 2;
1142 const int esq_sz = esq_w * esq_h;
1143 uint64_t total = 0;
1144 for (int i = 0; i < bh; i += 4) {
1145 for (int j = 0; j < bw; j += 4) {
1146 unsigned int cur_sum_energy = 0;
1147 for (int k = 0; k < 4; k++) {
1148 const int16_t *cur_diff = diff + (i + k) * stride + j;
1149 cur_sum_energy += cur_diff[0] * cur_diff[0] +
1150 cur_diff[1] * cur_diff[1] +
1151 cur_diff[2] * cur_diff[2] + cur_diff[3] * cur_diff[3];
1152 }
1153 esq[(i >> 2) * esq_w + (j >> 2)] = cur_sum_energy;
1154 total += cur_sum_energy;
1155 }
1156 }
1157
1158 const float e_recip = 1.0f / (float)total;
1159 for (int i = 0; i < esq_sz - 1; i++) edist[i] = esq[i] * e_recip;
1160}
1161
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001162// Similar to get_horver_correlation, but also takes into account first
1163// row/column, when computing horizontal/vertical correlation.
1164static void get_horver_correlation_full(const int16_t *diff, int stride, int w,
1165 int h, float *hcorr, float *vcorr) {
Yaowu Xu29373ee2017-10-19 15:50:34 -07001166 const float num_hor = (float)(h * (w - 1));
1167 const float num_ver = (float)((h - 1) * w);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001168 int i, j;
1169
1170 // The following notation is used:
1171 // x - current pixel
1172 // y - left neighbor pixel
1173 // z - top neighbor pixel
1174 int64_t xy_sum = 0, xz_sum = 0;
1175 int64_t xhor_sum = 0, xver_sum = 0, y_sum = 0, z_sum = 0;
1176 int64_t x2hor_sum = 0, x2ver_sum = 0, y2_sum = 0, z2_sum = 0;
1177
1178 int16_t x, y, z;
1179 for (j = 1; j < w; ++j) {
1180 x = diff[j];
1181 y = diff[j - 1];
1182 xy_sum += x * y;
1183 xhor_sum += x;
1184 y_sum += y;
1185 x2hor_sum += x * x;
1186 y2_sum += y * y;
1187 }
1188 for (i = 1; i < h; ++i) {
1189 x = diff[i * stride];
1190 z = diff[(i - 1) * stride];
1191 xz_sum += x * z;
1192 xver_sum += x;
1193 z_sum += z;
1194 x2ver_sum += x * x;
1195 z2_sum += z * z;
1196 for (j = 1; j < w; ++j) {
1197 x = diff[i * stride + j];
1198 y = diff[i * stride + j - 1];
1199 z = diff[(i - 1) * stride + j];
1200 xy_sum += x * y;
1201 xz_sum += x * z;
1202 xhor_sum += x;
1203 xver_sum += x;
1204 y_sum += y;
1205 z_sum += z;
1206 x2hor_sum += x * x;
1207 x2ver_sum += x * x;
1208 y2_sum += y * y;
1209 z2_sum += z * z;
1210 }
1211 }
1212 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
1213 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
1214 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
1215 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
1216 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
1217 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
1218
1219 *hcorr = *vcorr = 1;
1220 if (xhor_var_n > 0 && y_var_n > 0) {
1221 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
1222 *hcorr = *hcorr < 0 ? 0 : *hcorr;
1223 }
1224 if (xver_var_n > 0 && z_var_n > 0) {
1225 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
1226 *vcorr = *vcorr < 0 ? 0 : *vcorr;
1227 }
1228}
1229
1230// Performs a forward pass through a neural network with 2 fully-connected
1231// layers, assuming ReLU as activation function. Number of output neurons
1232// is always equal to 4.
1233// fc1, fc2 - weight matrices of the respective layers.
1234// b1, b2 - bias vectors of the respective layers.
1235static void compute_1D_scores(float *features, int num_features,
1236 const float *fc1, const float *b1,
1237 const float *fc2, const float *b2,
1238 int num_hidden_units, float *dst_scores) {
1239 assert(num_hidden_units <= 32);
1240 float hidden_layer[32];
1241 for (int i = 0; i < num_hidden_units; i++) {
1242 const float *cur_coef = fc1 + i * num_features;
1243 hidden_layer[i] = 0.0f;
1244 for (int j = 0; j < num_features; j++)
1245 hidden_layer[i] += cur_coef[j] * features[j];
1246 hidden_layer[i] = AOMMAX(hidden_layer[i] + b1[i], 0.0f);
1247 }
1248 for (int i = 0; i < 4; i++) {
1249 const float *cur_coef = fc2 + i * num_hidden_units;
1250 dst_scores[i] = 0.0f;
1251 for (int j = 0; j < num_hidden_units; j++)
1252 dst_scores[i] += cur_coef[j] * hidden_layer[j];
1253 dst_scores[i] += b2[i];
1254 }
1255}
1256
1257// Transforms raw scores into a probability distribution across 16 TX types
1258static void score_2D_transform_pow8(float *scores_2D, float shift) {
1259 float sum = 0.0f;
1260 int i;
1261
1262 for (i = 0; i < 16; i++) {
1263 float v, v2, v4;
1264 v = AOMMAX(scores_2D[i] + shift, 0.0f);
1265 v2 = v * v;
1266 v4 = v2 * v2;
1267 scores_2D[i] = v4 * v4;
1268 sum += scores_2D[i];
1269 }
1270 for (i = 0; i < 16; i++) scores_2D[i] /= sum;
1271}
1272
Alexander Bokov79a37242017-09-29 11:25:55 -07001273// Similarly to compute_1D_scores() performs a forward pass through a
1274// neural network with two fully-connected layers. The only difference
1275// is that it assumes 1 output neuron, as required by the classifier used
1276// for TX size pruning.
1277static float compute_tx_split_prune_score(float *features, int num_features,
1278 const float *fc1, const float *b1,
1279 const float *fc2, float b2,
1280 int num_hidden_units) {
1281 assert(num_hidden_units <= 64);
1282 float hidden_layer[64];
1283 for (int i = 0; i < num_hidden_units; i++) {
1284 const float *cur_coef = fc1 + i * num_features;
1285 hidden_layer[i] = 0.0f;
1286 for (int j = 0; j < num_features; j++)
1287 hidden_layer[i] += cur_coef[j] * features[j];
1288 hidden_layer[i] = AOMMAX(hidden_layer[i] + b1[i], 0.0f);
1289 }
1290 float dst_score = 0.0f;
1291 for (int j = 0; j < num_hidden_units; j++)
1292 dst_score += fc2[j] * hidden_layer[j];
1293 dst_score += b2;
1294 return dst_score;
1295}
1296
1297static int prune_tx_split(BLOCK_SIZE bsize, const int16_t *diff, float hcorr,
1298 float vcorr) {
1299 if (bsize <= BLOCK_4X4 || bsize > BLOCK_16X16) return 0;
1300
1301 float features[17];
1302 const int bw = block_size_wide[bsize], bh = block_size_high[bsize];
1303 const int feature_num = (bw / 4) * (bh / 4) + 1;
1304 assert(feature_num <= 17);
1305
1306 get_2D_energy_distribution(diff, bw, bw, bh, features);
1307 features[feature_num - 2] = hcorr;
1308 features[feature_num - 1] = vcorr;
1309
1310 const int bidx = bsize - BLOCK_4X4 - 1;
1311 const float *fc1 = av1_prune_tx_split_learned_weights[bidx];
1312 const float *b1 =
1313 fc1 + av1_prune_tx_split_num_hidden_units[bidx] * feature_num;
1314 const float *fc2 = b1 + av1_prune_tx_split_num_hidden_units[bidx];
1315 float b2 = *(fc2 + av1_prune_tx_split_num_hidden_units[bidx]);
1316 float score =
1317 compute_tx_split_prune_score(features, feature_num, fc1, b1, fc2, b2,
1318 av1_prune_tx_split_num_hidden_units[bidx]);
1319
1320 return (score > av1_prune_tx_split_thresholds[bidx]);
1321}
1322
1323static int prune_tx_2D(BLOCK_SIZE bsize, const MACROBLOCK *x, int tx_set_type,
1324 int tx_type_pruning_aggressiveness,
1325 int use_tx_split_prune) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001326 if (bsize >= BLOCK_32X32) return 0;
Hui Su9bf85992017-11-21 14:18:03 -08001327 aom_clear_system_state();
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001328 const struct macroblock_plane *const p = &x->plane[0];
1329 const int bidx = AOMMAX(bsize - BLOCK_4X4, 0);
1330 const float score_thresh =
Alexander Bokov79a37242017-09-29 11:25:55 -07001331 av1_prune_2D_adaptive_thresholds[bidx]
1332 [tx_type_pruning_aggressiveness - 1];
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001333 float hfeatures[16], vfeatures[16];
1334 float hscores[4], vscores[4];
1335 float scores_2D[16];
1336 int tx_type_table_2D[16] = {
1337 DCT_DCT, DCT_ADST, DCT_FLIPADST, V_DCT,
1338 ADST_DCT, ADST_ADST, ADST_FLIPADST, V_ADST,
1339 FLIPADST_DCT, FLIPADST_ADST, FLIPADST_FLIPADST, V_FLIPADST,
1340 H_DCT, H_ADST, H_FLIPADST, IDTX
1341 };
1342 const int bw = block_size_wide[bsize], bh = block_size_high[bsize];
1343 const int hfeatures_num = bw <= 8 ? bw : bw / 2;
1344 const int vfeatures_num = bh <= 8 ? bh : bh / 2;
1345 assert(hfeatures_num <= 16);
1346 assert(vfeatures_num <= 16);
1347
1348 get_energy_distribution_finer(p->src_diff, bw, bw, bh, hfeatures, vfeatures);
1349 get_horver_correlation_full(p->src_diff, bw, bw, bh,
1350 &hfeatures[hfeatures_num - 1],
1351 &vfeatures[vfeatures_num - 1]);
1352
1353 const float *fc1_hor = av1_prune_2D_learned_weights_hor[bidx];
1354 const float *b1_hor =
1355 fc1_hor + av1_prune_2D_num_hidden_units_hor[bidx] * hfeatures_num;
1356 const float *fc2_hor = b1_hor + av1_prune_2D_num_hidden_units_hor[bidx];
1357 const float *b2_hor = fc2_hor + av1_prune_2D_num_hidden_units_hor[bidx] * 4;
1358 compute_1D_scores(hfeatures, hfeatures_num, fc1_hor, b1_hor, fc2_hor, b2_hor,
1359 av1_prune_2D_num_hidden_units_hor[bidx], hscores);
1360
1361 const float *fc1_ver = av1_prune_2D_learned_weights_ver[bidx];
1362 const float *b1_ver =
1363 fc1_ver + av1_prune_2D_num_hidden_units_ver[bidx] * vfeatures_num;
1364 const float *fc2_ver = b1_ver + av1_prune_2D_num_hidden_units_ver[bidx];
1365 const float *b2_ver = fc2_ver + av1_prune_2D_num_hidden_units_ver[bidx] * 4;
1366 compute_1D_scores(vfeatures, vfeatures_num, fc1_ver, b1_ver, fc2_ver, b2_ver,
1367 av1_prune_2D_num_hidden_units_ver[bidx], vscores);
1368
1369 float score_2D_average = 0.0f;
1370 for (int i = 0; i < 4; i++) {
1371 float *cur_scores_2D = scores_2D + i * 4;
1372 cur_scores_2D[0] = vscores[i] * hscores[0];
1373 cur_scores_2D[1] = vscores[i] * hscores[1];
1374 cur_scores_2D[2] = vscores[i] * hscores[2];
1375 cur_scores_2D[3] = vscores[i] * hscores[3];
1376 score_2D_average += cur_scores_2D[0] + cur_scores_2D[1] + cur_scores_2D[2] +
1377 cur_scores_2D[3];
1378 }
1379 score_2D_average /= 16;
1380 score_2D_transform_pow8(scores_2D, (20 - score_2D_average));
1381
1382 // Always keep the TX type with the highest score, prune all others with
1383 // score below score_thresh.
1384 int max_score_i = 0;
1385 float max_score = 0.0f;
1386 for (int i = 0; i < 16; i++) {
1387 if (scores_2D[i] > max_score &&
1388 av1_ext_tx_used[tx_set_type][tx_type_table_2D[i]]) {
1389 max_score = scores_2D[i];
1390 max_score_i = i;
1391 }
1392 }
1393
1394 int prune_bitmask = 0;
1395 for (int i = 0; i < 16; i++) {
1396 if (scores_2D[i] < score_thresh && i != max_score_i)
1397 prune_bitmask |= (1 << tx_type_table_2D[i]);
1398 }
1399
Alexander Bokov79a37242017-09-29 11:25:55 -07001400 // Also apply TX size pruning if it's turned on. The value
1401 // of prune_tx_split_flag indicates whether we should do
1402 // full TX size search (flag=0) or use the largest available
1403 // TX size without performing any further search (flag=1).
1404 int prune_tx_split_flag = 0;
1405 if (use_tx_split_prune) {
1406 prune_tx_split_flag =
1407 prune_tx_split(bsize, p->src_diff, hfeatures[hfeatures_num - 1],
1408 vfeatures[vfeatures_num - 1]);
1409 }
1410 prune_bitmask |= (prune_tx_split_flag << TX_TYPES);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001411 return prune_bitmask;
1412}
Hui Su032ab8b2017-09-19 14:53:40 -07001413
Alexander Bokov79a37242017-09-29 11:25:55 -07001414static int prune_tx(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
1415 const MACROBLOCKD *const xd, int tx_set_type,
1416 int use_tx_split_prune) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001417 int tx_set = ext_tx_set_index[1][tx_set_type];
1418 assert(tx_set >= 0);
1419 const int *tx_set_1D = ext_tx_used_inter_1D[tx_set];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001420
1421 switch (cpi->sf.tx_type_search.prune_mode) {
1422 case NO_PRUNE: return 0; break;
1423 case PRUNE_ONE:
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001424 if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001425 return prune_one_for_sby(cpi, bsize, x, xd);
1426 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001427 case PRUNE_TWO:
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001428 if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001429 if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return 0;
1430 return prune_two_for_sby(cpi, bsize, x, xd, 0, 1);
1431 }
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001432 if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D]))
Yaowu Xuc27fc142016-08-22 16:08:15 -07001433 return prune_two_for_sby(cpi, bsize, x, xd, 1, 0);
1434 return prune_two_for_sby(cpi, bsize, x, xd, 1, 1);
1435 break;
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001436 case PRUNE_2D_ACCURATE:
1437 if (tx_set_type == EXT_TX_SET_ALL16)
Alexander Bokov79a37242017-09-29 11:25:55 -07001438 return prune_tx_2D(bsize, x, tx_set_type, 6, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001439 else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT)
Alexander Bokov79a37242017-09-29 11:25:55 -07001440 return prune_tx_2D(bsize, x, tx_set_type, 4, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001441 else
1442 return 0;
1443 break;
1444 case PRUNE_2D_FAST:
1445 if (tx_set_type == EXT_TX_SET_ALL16)
Alexander Bokov79a37242017-09-29 11:25:55 -07001446 return prune_tx_2D(bsize, x, tx_set_type, 10, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001447 else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT)
Alexander Bokov79a37242017-09-29 11:25:55 -07001448 return prune_tx_2D(bsize, x, tx_set_type, 7, use_tx_split_prune);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001449 else
1450 return 0;
1451 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001452 }
1453 assert(0);
1454 return 0;
1455}
1456
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001457static int do_tx_type_search(TX_TYPE tx_type, int prune,
1458 TX_TYPE_PRUNE_MODE mode) {
Sebastien Alaiwan3bac9922017-11-02 12:34:41 +01001459 // TODO(sarahparker) implement for non ext tx
Alexander Bokov0c7eb102017-09-07 18:49:00 -07001460 if (mode >= PRUNE_2D_ACCURATE) {
1461 return !((prune >> tx_type) & 1);
1462 } else {
1463 return !(((prune >> vtx_tab[tx_type]) & 1) |
1464 ((prune >> (htx_tab[tx_type] + 8)) & 1));
1465 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001466}
1467
Yaowu Xuf883b422016-08-30 14:01:10 -07001468static void model_rd_from_sse(const AV1_COMP *const cpi,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001469 const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
1470 int plane, int64_t sse, int *rate,
1471 int64_t *dist) {
1472 const struct macroblockd_plane *const pd = &xd->plane[plane];
1473 const int dequant_shift =
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001474#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001475 (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001476#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001477 3;
1478
1479 // Fast approximate the modelling function.
1480 if (cpi->sf.simple_model_rd_from_var) {
1481 const int64_t square_error = sse;
Monty Montgomery125c0fc2017-10-26 00:44:35 -04001482 int quantizer = (pd->dequant_Q3[1] >> dequant_shift);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001483
1484 if (quantizer < 120)
1485 *rate = (int)((square_error * (280 - quantizer)) >>
Yaowu Xuf883b422016-08-30 14:01:10 -07001486 (16 - AV1_PROB_COST_SHIFT));
Yaowu Xuc27fc142016-08-22 16:08:15 -07001487 else
1488 *rate = 0;
1489 *dist = (square_error * quantizer) >> 8;
1490 } else {
Yaowu Xuf883b422016-08-30 14:01:10 -07001491 av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[bsize],
Monty Montgomery125c0fc2017-10-26 00:44:35 -04001492 pd->dequant_Q3[1] >> dequant_shift, rate,
1493 dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001494 }
1495
1496 *dist <<= 4;
1497}
1498
Yaowu Xuf883b422016-08-30 14:01:10 -07001499static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07001500 MACROBLOCK *x, MACROBLOCKD *xd, int plane_from,
1501 int plane_to, int *out_rate_sum,
1502 int64_t *out_dist_sum, int *skip_txfm_sb,
1503 int64_t *skip_sse_sb) {
1504 // Note our transform coeffs are 8 times an orthogonal transform.
1505 // Hence quantizer step is also 8 times. To get effective quantizer
1506 // we need to divide by 8 before sending to modeling function.
1507 int plane;
1508 const int ref = xd->mi[0]->mbmi.ref_frame[0];
1509
1510 int64_t rate_sum = 0;
1511 int64_t dist_sum = 0;
1512 int64_t total_sse = 0;
1513
1514 x->pred_sse[ref] = 0;
1515
1516 for (plane = plane_from; plane <= plane_to; ++plane) {
1517 struct macroblock_plane *const p = &x->plane[plane];
1518 struct macroblockd_plane *const pd = &xd->plane[plane];
Jingning Han9ce464c2017-02-20 15:36:30 -08001519 const BLOCK_SIZE bs = AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
Yaowu Xuc27fc142016-08-22 16:08:15 -07001520 unsigned int sse;
1521 int rate;
1522 int64_t dist;
1523
Jingning Han9ce464c2017-02-20 15:36:30 -08001524 if (x->skip_chroma_rd && plane) continue;
Jingning Han9ce464c2017-02-20 15:36:30 -08001525
Yaowu Xuc27fc142016-08-22 16:08:15 -07001526 // TODO(geza): Write direct sse functions that do not compute
1527 // variance as well.
1528 cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
1529 &sse);
1530
1531 if (plane == 0) x->pred_sse[ref] = sse;
1532
1533 total_sse += sse;
1534
1535 model_rd_from_sse(cpi, xd, bs, plane, sse, &rate, &dist);
1536
1537 rate_sum += rate;
1538 dist_sum += dist;
1539 }
1540
1541 *skip_txfm_sb = total_sse == 0;
1542 *skip_sse_sb = total_sse << 4;
1543 *out_rate_sum = (int)rate_sum;
1544 *out_dist_sum = dist_sum;
1545}
1546
Yaowu Xuf883b422016-08-30 14:01:10 -07001547int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
1548 intptr_t block_size, int64_t *ssz) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001549 int i;
1550 int64_t error = 0, sqcoeff = 0;
1551
1552 for (i = 0; i < block_size; i++) {
1553 const int diff = coeff[i] - dqcoeff[i];
1554 error += diff * diff;
1555 sqcoeff += coeff[i] * coeff[i];
1556 }
1557
1558 *ssz = sqcoeff;
1559 return error;
1560}
1561
Yaowu Xuf883b422016-08-30 14:01:10 -07001562int64_t av1_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
1563 int block_size) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001564 int i;
1565 int64_t error = 0;
1566
1567 for (i = 0; i < block_size; i++) {
1568 const int diff = coeff[i] - dqcoeff[i];
1569 error += diff * diff;
1570 }
1571
1572 return error;
1573}
1574
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001575#if CONFIG_HIGHBITDEPTH
Yaowu Xuf883b422016-08-30 14:01:10 -07001576int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
1577 const tran_low_t *dqcoeff, intptr_t block_size,
1578 int64_t *ssz, int bd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001579 int i;
1580 int64_t error = 0, sqcoeff = 0;
Monty Montgomery4a05a582017-11-01 21:21:07 -04001581#if CONFIG_DAALA_TX
1582 (void)bd;
1583 int shift = 2 * (TX_COEFF_DEPTH - 11);
1584#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07001585 int shift = 2 * (bd - 8);
Monty Montgomery4a05a582017-11-01 21:21:07 -04001586#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001587 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
1588
1589 for (i = 0; i < block_size; i++) {
1590 const int64_t diff = coeff[i] - dqcoeff[i];
1591 error += diff * diff;
1592 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
1593 }
1594 assert(error >= 0 && sqcoeff >= 0);
1595 error = (error + rounding) >> shift;
1596 sqcoeff = (sqcoeff + rounding) >> shift;
1597
1598 *ssz = sqcoeff;
1599 return error;
1600}
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001601#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001602
Angie Chiang47e4b362017-03-24 11:25:10 -07001603#if !CONFIG_LV_MAP
1604static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane,
1605 int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order,
1606 const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l,
1607 int use_fast_coef_costing) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001608 MACROBLOCKD *const xd = &x->e_mbd;
1609 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1610 const struct macroblock_plane *p = &x->plane[plane];
1611 const struct macroblockd_plane *pd = &xd->plane[plane];
1612 const PLANE_TYPE type = pd->plane_type;
1613 const uint16_t *band_count = &band_count_table[tx_size][1];
1614 const int eob = p->eobs[block];
1615 const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08001616 const TX_SIZE tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001617 uint8_t token_cache[MAX_TX_SQUARE];
Angie Chiang77368af2017-03-23 16:22:07 -07001618 int pt = combine_entropy_contexts(*a, *l);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001619 int c, cost;
Angie Chiang77368af2017-03-23 16:22:07 -07001620 const int16_t *scan = scan_order->scan;
1621 const int16_t *nb = scan_order->neighbors;
Thomas Daviesed8e2d22017-01-04 16:42:09 +00001622 const int ref = is_inter_block(mbmi);
hui suc0cf71d2017-07-20 16:38:50 -07001623 int(*head_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
1624 x->token_head_costs[tx_size_ctx][type][ref];
1625 int(*tail_token_costs)[COEFF_CONTEXTS][TAIL_TOKENS] =
1626 x->token_tail_costs[tx_size_ctx][type][ref];
1627 const int seg_eob = av1_get_tx_eob(&cm->seg, mbmi->segment_id, tx_size);
Yaowu Xuabe52152017-10-20 14:37:54 -07001628 int8_t eob_val;
Thomas Davies10525752017-03-06 12:10:46 +00001629
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001630#if CONFIG_HIGHBITDEPTH
Alex Converseda3d94f2017-03-15 14:54:29 -07001631 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001632#else
Alex Converseda3d94f2017-03-15 14:54:29 -07001633 const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001634#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001635
Angie Chiang22ba7512016-10-20 17:10:33 -07001636 (void)cm;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001637
1638 if (eob == 0) {
hui suc0cf71d2017-07-20 16:38:50 -07001639 // block zero
1640 cost = (*head_token_costs)[pt][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001641 } else {
1642 if (use_fast_coef_costing) {
1643 int band_left = *band_count++;
1644
1645 // dc token
1646 int v = qcoeff[0];
1647 int16_t prev_t;
Alex Converseda3d94f2017-03-15 14:54:29 -07001648 cost = av1_get_token_cost(v, &prev_t, cat6_bits);
hui suc0cf71d2017-07-20 16:38:50 -07001649 eob_val = (eob == 1) ? EARLY_EOB : NO_EOB;
1650 cost += av1_get_coeff_token_cost(
1651 prev_t, eob_val, 1, (*head_token_costs)[pt], (*tail_token_costs)[pt]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001652
Yaowu Xuf883b422016-08-30 14:01:10 -07001653 token_cache[0] = av1_pt_energy_class[prev_t];
hui suc0cf71d2017-07-20 16:38:50 -07001654 ++head_token_costs;
1655 ++tail_token_costs;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001656
1657 // ac tokens
1658 for (c = 1; c < eob; c++) {
1659 const int rc = scan[c];
1660 int16_t t;
1661
1662 v = qcoeff[rc];
Alex Converseda3d94f2017-03-15 14:54:29 -07001663 cost += av1_get_token_cost(v, &t, cat6_bits);
hui suc0cf71d2017-07-20 16:38:50 -07001664 eob_val =
1665 (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
1666 cost += av1_get_coeff_token_cost(t, eob_val, 0,
1667 (*head_token_costs)[!prev_t],
1668 (*tail_token_costs)[!prev_t]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001669 prev_t = t;
1670 if (!--band_left) {
1671 band_left = *band_count++;
hui suc0cf71d2017-07-20 16:38:50 -07001672 ++head_token_costs;
1673 ++tail_token_costs;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001674 }
1675 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001676 } else { // !use_fast_coef_costing
1677 int band_left = *band_count++;
1678
1679 // dc token
1680 int v = qcoeff[0];
1681 int16_t tok;
Alex Converseda3d94f2017-03-15 14:54:29 -07001682 cost = av1_get_token_cost(v, &tok, cat6_bits);
hui suc0cf71d2017-07-20 16:38:50 -07001683 eob_val = (eob == 1) ? EARLY_EOB : NO_EOB;
1684 cost += av1_get_coeff_token_cost(tok, eob_val, 1, (*head_token_costs)[pt],
1685 (*tail_token_costs)[pt]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001686
Yaowu Xuf883b422016-08-30 14:01:10 -07001687 token_cache[0] = av1_pt_energy_class[tok];
hui suc0cf71d2017-07-20 16:38:50 -07001688 ++head_token_costs;
1689 ++tail_token_costs;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001690
Yaowu Xuc27fc142016-08-22 16:08:15 -07001691 // ac tokens
1692 for (c = 1; c < eob; c++) {
1693 const int rc = scan[c];
1694
1695 v = qcoeff[rc];
Alex Converseda3d94f2017-03-15 14:54:29 -07001696 cost += av1_get_token_cost(v, &tok, cat6_bits);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001697 pt = get_coef_context(nb, token_cache, c);
hui suc0cf71d2017-07-20 16:38:50 -07001698 eob_val =
1699 (c + 1 == eob) ? (c + 1 == seg_eob ? LAST_EOB : EARLY_EOB) : NO_EOB;
1700 cost += av1_get_coeff_token_cost(
1701 tok, eob_val, 0, (*head_token_costs)[pt], (*tail_token_costs)[pt]);
Yaowu Xuf883b422016-08-30 14:01:10 -07001702 token_cache[rc] = av1_pt_energy_class[tok];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001703 if (!--band_left) {
1704 band_left = *band_count++;
hui suc0cf71d2017-07-20 16:38:50 -07001705 ++head_token_costs;
1706 ++tail_token_costs;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001707 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001708 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07001709 }
1710 }
1711
Yaowu Xuc27fc142016-08-22 16:08:15 -07001712 return cost;
1713}
Angie Chiang47e4b362017-03-24 11:25:10 -07001714#endif // !CONFIG_LV_MAP
1715
Angie Chiang05917872017-04-15 12:28:56 -07001716int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane,
Jingning Han7eab9ff2017-07-06 10:12:54 -07001717 int blk_row, int blk_col, int block, TX_SIZE tx_size,
1718 const SCAN_ORDER *scan_order, const ENTROPY_CONTEXT *a,
1719 const ENTROPY_CONTEXT *l, int use_fast_coef_costing) {
Angie Chiang3627de22017-08-18 20:15:59 -07001720 const AV1_COMMON *const cm = &cpi->common;
Angie Chiang47e4b362017-03-24 11:25:10 -07001721#if !CONFIG_LV_MAP
Jingning Han7eab9ff2017-07-06 10:12:54 -07001722 (void)blk_row;
1723 (void)blk_col;
Angie Chiang47e4b362017-03-24 11:25:10 -07001724 return cost_coeffs(cm, x, plane, block, tx_size, scan_order, a, l,
1725 use_fast_coef_costing);
Hui Su9fa96232017-10-23 15:46:04 -07001726#else // !CONFIG_LV_MAP
Angie Chiang47e4b362017-03-24 11:25:10 -07001727 (void)scan_order;
1728 (void)use_fast_coef_costing;
1729 const MACROBLOCKD *xd = &x->e_mbd;
1730 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1731 const struct macroblockd_plane *pd = &xd->plane[plane];
1732 const BLOCK_SIZE bsize = mbmi->sb_type;
Angie Chiang47e4b362017-03-24 11:25:10 -07001733 const BLOCK_SIZE plane_bsize =
1734 AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd));
Angie Chiang47e4b362017-03-24 11:25:10 -07001735 TXB_CTX txb_ctx;
1736 get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
Angie Chiang3627de22017-08-18 20:15:59 -07001737 return av1_cost_coeffs_txb(cm, x, plane, blk_row, blk_col, block, tx_size,
Jingning Han7eab9ff2017-07-06 10:12:54 -07001738 &txb_ctx);
Angie Chiang47e4b362017-03-24 11:25:10 -07001739#endif // !CONFIG_LV_MAP
1740}
Yaowu Xuc27fc142016-08-22 16:08:15 -07001741
Alex Converse61f37b82017-03-29 15:26:03 -07001742// Get transform block visible dimensions cropped to the MI units.
1743static void get_txb_dimensions(const MACROBLOCKD *xd, int plane,
1744 BLOCK_SIZE plane_bsize, int blk_row, int blk_col,
1745 BLOCK_SIZE tx_bsize, int *width, int *height,
1746 int *visible_width, int *visible_height) {
1747 assert(tx_bsize <= plane_bsize);
1748 int txb_height = block_size_high[tx_bsize];
1749 int txb_width = block_size_wide[tx_bsize];
1750 const int block_height = block_size_high[plane_bsize];
1751 const int block_width = block_size_wide[plane_bsize];
1752 const struct macroblockd_plane *const pd = &xd->plane[plane];
1753 // TODO(aconverse@google.com): Investigate using crop_width/height here rather
1754 // than the MI size
1755 const int block_rows =
1756 (xd->mb_to_bottom_edge >= 0)
1757 ? block_height
1758 : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height;
1759 const int block_cols =
1760 (xd->mb_to_right_edge >= 0)
1761 ? block_width
1762 : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width;
1763 const int tx_unit_size = tx_size_wide_log2[0];
1764 if (width) *width = txb_width;
1765 if (height) *height = txb_height;
1766 *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width);
1767 *visible_height =
1768 clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height);
1769}
1770
Yushin Cho75b01002017-06-21 13:43:57 -07001771// Compute the pixel domain distortion from src and dst on all visible 4x4s in
1772// the
Alex Converse61f37b82017-03-29 15:26:03 -07001773// transform block.
Yushin Cho75b01002017-06-21 13:43:57 -07001774static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x,
1775 int plane, const uint8_t *src, const int src_stride,
1776 const uint8_t *dst, const int dst_stride,
1777 int blk_row, int blk_col,
1778 const BLOCK_SIZE plane_bsize,
1779 const BLOCK_SIZE tx_bsize) {
Alex Converse61f37b82017-03-29 15:26:03 -07001780 int txb_rows, txb_cols, visible_rows, visible_cols;
Yushin Cho75b01002017-06-21 13:43:57 -07001781 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -07001782
Alex Converse61f37b82017-03-29 15:26:03 -07001783 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize,
1784 &txb_cols, &txb_rows, &visible_cols, &visible_rows);
1785 assert(visible_rows > 0);
1786 assert(visible_cols > 0);
Yushin Cho75b01002017-06-21 13:43:57 -07001787
Yushin Chob7b60c52017-07-14 16:18:52 -07001788#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07001789 if (x->using_dist_8x8 && plane == 0 && txb_cols >= 8 && txb_rows >= 8)
Yushin Chofcddadf2017-08-30 13:49:38 -07001790 return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
1791 tx_bsize, txb_cols, txb_rows, visible_cols,
1792 visible_rows, x->qindex);
Yushin Chob7b60c52017-07-14 16:18:52 -07001793#endif // CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07001794
Yushin Cho2f025aa2017-09-28 17:39:21 -07001795 unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst,
1796 dst_stride, tx_bsize, txb_rows,
1797 txb_cols, visible_rows, visible_cols);
1798
Alex Converse61f37b82017-03-29 15:26:03 -07001799 return sse;
1800}
1801
Yushin Cho75b01002017-06-21 13:43:57 -07001802// Compute the pixel domain distortion from diff on all visible 4x4s in the
1803// transform block.
1804static int64_t pixel_diff_dist(const MACROBLOCK *x, int plane,
1805 const int16_t *diff, const int diff_stride,
1806 int blk_row, int blk_col,
1807 const BLOCK_SIZE plane_bsize,
1808 const BLOCK_SIZE tx_bsize) {
Alex Converse61f37b82017-03-29 15:26:03 -07001809 int visible_rows, visible_cols;
Yushin Cho75b01002017-06-21 13:43:57 -07001810 const MACROBLOCKD *xd = &x->e_mbd;
Yushin Chob7b60c52017-07-14 16:18:52 -07001811#if CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07001812 int txb_height = block_size_high[tx_bsize];
1813 int txb_width = block_size_wide[tx_bsize];
1814 const int src_stride = x->plane[plane].src.stride;
1815 const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0];
1816 const uint8_t *src = &x->plane[plane].src.buf[src_idx];
Yushin Cho75b01002017-06-21 13:43:57 -07001817#endif
1818
Alex Converse61f37b82017-03-29 15:26:03 -07001819 get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL,
1820 NULL, &visible_cols, &visible_rows);
Yushin Cho75b01002017-06-21 13:43:57 -07001821
Yushin Chob7b60c52017-07-14 16:18:52 -07001822#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07001823 if (x->using_dist_8x8 && plane == 0 && txb_width >= 8 && txb_height >= 8)
Yushin Choe30a47c2017-08-15 13:08:30 -07001824 return av1_dist_8x8_diff(x, src, src_stride, diff, diff_stride, txb_width,
Yushin Chob7b60c52017-07-14 16:18:52 -07001825 txb_height, visible_cols, visible_rows, x->qindex);
Yushin Cho75b01002017-06-21 13:43:57 -07001826 else
1827#endif
1828 return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols,
1829 visible_rows);
Alex Converse61f37b82017-03-29 15:26:03 -07001830}
1831
Hui Su4d51bed2017-11-29 15:52:40 -08001832int av1_count_colors(const uint8_t *src, int stride, int rows, int cols,
1833 int *val_count) {
1834 const int max_pix_val = 1 << 8;
1835 memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
hui sud9a812b2017-07-06 14:34:37 -07001836 for (int r = 0; r < rows; ++r) {
1837 for (int c = 0; c < cols; ++c) {
Hui Su4d51bed2017-11-29 15:52:40 -08001838 const int this_val = src[r * stride + c];
1839 assert(this_val < max_pix_val);
1840 ++val_count[this_val];
hui sud9a812b2017-07-06 14:34:37 -07001841 }
1842 }
1843 int n = 0;
Hui Su4d51bed2017-11-29 15:52:40 -08001844 for (int i = 0; i < max_pix_val; ++i) {
hui sud9a812b2017-07-06 14:34:37 -07001845 if (val_count[i]) ++n;
1846 }
1847 return n;
1848}
1849
1850#if CONFIG_HIGHBITDEPTH
1851int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols,
Hui Su4d51bed2017-11-29 15:52:40 -08001852 int bit_depth, int *val_count) {
hui sud9a812b2017-07-06 14:34:37 -07001853 assert(bit_depth <= 12);
Hui Su4d51bed2017-11-29 15:52:40 -08001854 const int max_pix_val = 1 << bit_depth;
hui sud9a812b2017-07-06 14:34:37 -07001855 const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
Hui Su4d51bed2017-11-29 15:52:40 -08001856 memset(val_count, 0, max_pix_val * sizeof(val_count[0]));
hui sud9a812b2017-07-06 14:34:37 -07001857 for (int r = 0; r < rows; ++r) {
1858 for (int c = 0; c < cols; ++c) {
Hui Su4d51bed2017-11-29 15:52:40 -08001859 const int this_val = src[r * stride + c];
1860 assert(this_val < max_pix_val);
1861 ++val_count[this_val];
hui sud9a812b2017-07-06 14:34:37 -07001862 }
1863 }
1864 int n = 0;
Hui Su4d51bed2017-11-29 15:52:40 -08001865 for (int i = 0; i < max_pix_val; ++i) {
hui sud9a812b2017-07-06 14:34:37 -07001866 if (val_count[i]) ++n;
1867 }
1868 return n;
1869}
1870#endif // CONFIG_HIGHBITDEPTH
hui sud9a812b2017-07-06 14:34:37 -07001871
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001872void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
Frederic Barbier33b39f02017-11-21 11:11:24 +01001873 int blk_row, int blk_col, int eob,
1874 int reduced_tx_set) {
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001875 struct macroblockd_plane *const pd = &xd->plane[plane];
1876 tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001877 const PLANE_TYPE plane_type = get_plane_type(plane);
1878 const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
1879 const TX_TYPE tx_type =
1880 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
1881 const int dst_stride = pd->dst.stride;
1882 uint8_t *dst =
1883 &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01001884 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst,
1885 dst_stride, eob, reduced_tx_set);
Sebastien Alaiwan95137bd2017-11-13 14:47:37 +01001886}
1887
Angie Chiang808d8592017-04-06 18:36:55 -07001888void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
1889 BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col,
1890 TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
1891 OUTPUT_STATUS output_status) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07001892 MACROBLOCKD *const xd = &x->e_mbd;
1893 const struct macroblock_plane *const p = &x->plane[plane];
Yushin Chob7b60c52017-07-14 16:18:52 -07001894#if CONFIG_DIST_8X8
Yushin Cho4483e3d2017-04-18 19:41:20 -07001895 struct macroblockd_plane *const pd = &xd->plane[plane];
Yushin Chob7b60c52017-07-14 16:18:52 -07001896#else // CONFIG_DIST_8X8
Yushin Cho4483e3d2017-04-18 19:41:20 -07001897 const struct macroblockd_plane *const pd = &xd->plane[plane];
Yushin Chob7b60c52017-07-14 16:18:52 -07001898#endif // CONFIG_DIST_8X8
Yushin Cho7a428ba2017-01-12 16:28:49 -08001899
Yushin Cho55104332017-08-14 16:15:43 -07001900 if (cpi->sf.use_transform_domain_distortion
1901#if CONFIG_DIST_8X8
1902 && !x->using_dist_8x8
1903#endif
1904 ) {
hui sud2f12ba2017-04-12 10:08:43 -07001905 // Transform domain distortion computation is more efficient as it does
Yaowu Xuc27fc142016-08-22 16:08:15 -07001906 // not involve an inverse transform, but it is less accurate.
Urvang Joshi80893152017-10-27 11:51:14 -07001907 const int buffer_length = av1_get_max_eob(tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001908 int64_t this_sse;
Monty Montgomery4a05a582017-11-01 21:21:07 -04001909// TX-domain results need to shift down to Q2/D10 to match pixel
1910// domain distortion values which are in Q2^2
1911#if CONFIG_DAALA_TX
1912 int shift = (TX_COEFF_DEPTH - 10) * 2;
1913#else
Jingning Hanff705452017-04-27 11:32:15 -07001914 int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
Monty Montgomery4a05a582017-11-01 21:21:07 -04001915#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07001916 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
1917 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Thomas Daede6ff6af62017-02-03 16:29:24 -08001918
Monty Montgomerya26262c2017-10-31 07:32:13 -04001919#if CONFIG_DAALA_TX
1920 *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse,
1921 xd->bd);
1922#else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001923#if CONFIG_HIGHBITDEPTH
Yi Luod61e6082017-05-26 16:14:39 -07001924 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1925 *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length,
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07001926 &this_sse, xd->bd);
Yi Luod61e6082017-05-26 16:14:39 -07001927 else
1928#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07001929 *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse);
Monty Montgomerya26262c2017-10-31 07:32:13 -04001930#endif
Yushin Chod0b77ac2017-10-20 17:33:16 -07001931
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07001932 *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift);
1933 *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001934 } else {
1935 const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
Jingning Hanb9c57272016-10-25 10:15:39 -07001936 const int bsw = block_size_wide[tx_bsize];
1937 const int bsh = block_size_high[tx_bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001938 const int src_stride = x->plane[plane].src.stride;
1939 const int dst_stride = xd->plane[plane].dst.stride;
Jingning Hanb9c57272016-10-25 10:15:39 -07001940 // Scale the transform block index to pixel unit.
1941 const int src_idx = (blk_row * src_stride + blk_col)
1942 << tx_size_wide_log2[0];
1943 const int dst_idx = (blk_row * dst_stride + blk_col)
1944 << tx_size_wide_log2[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07001945 const uint8_t *src = &x->plane[plane].src.buf[src_idx];
1946 const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx];
1947 const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
1948 const uint16_t eob = p->eobs[block];
1949
Yaowu Xuc27fc142016-08-22 16:08:15 -07001950 assert(cpi != NULL);
Jingning Hanb9c57272016-10-25 10:15:39 -07001951 assert(tx_size_wide_log2[0] == tx_size_high_log2[0]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07001952
Angie Chiangc0cf6c02017-04-04 13:47:27 -07001953 {
1954 const int diff_stride = block_size_wide[plane_bsize];
1955 const int diff_idx = (blk_row * diff_stride + blk_col)
1956 << tx_size_wide_log2[0];
1957 const int16_t *diff = &p->src_diff[diff_idx];
Yushin Cho75b01002017-06-21 13:43:57 -07001958 *out_sse = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
1959 plane_bsize, tx_bsize);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001960#if CONFIG_HIGHBITDEPTH
hui sub1cc1f92017-04-11 17:41:29 -07001961 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
Alex Conversef323f012017-04-24 09:26:33 -07001962 *out_sse = ROUND_POWER_OF_TWO(*out_sse, (xd->bd - 8) * 2);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001963#endif // CONFIG_HIGHBITDEPTH
Angie Chiangc0cf6c02017-04-04 13:47:27 -07001964 }
Alex Conversef323f012017-04-24 09:26:33 -07001965 *out_sse *= 16;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001966
1967 if (eob) {
Angie Chiang228cc182017-04-07 15:22:16 -07001968 if (output_status == OUTPUT_HAS_DECODED_PIXELS) {
Yushin Cho75b01002017-06-21 13:43:57 -07001969 *out_dist = pixel_dist(cpi, x, plane, src, src_stride, dst, dst_stride,
1970 blk_row, blk_col, plane_bsize, tx_bsize);
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001971 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001972#if CONFIG_HIGHBITDEPTH
Jingning Han6a9dbef2017-04-10 10:25:14 -07001973 uint8_t *recon;
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001974 DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]);
Jingning Han6a9dbef2017-04-10 10:25:14 -07001975
1976 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
1977 recon = CONVERT_TO_BYTEPTR(recon16);
1978 else
1979 recon = (uint8_t *)recon16;
Yaowu Xuc27fc142016-08-22 16:08:15 -07001980#else
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001981 DECLARE_ALIGNED(16, uint8_t, recon[MAX_TX_SQUARE]);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001982#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07001983
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001984#if CONFIG_HIGHBITDEPTH
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001985 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1986 aom_highbd_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0,
1987 NULL, 0, bsw, bsh, xd->bd);
1988 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001989#endif // CONFIG_HIGHBITDEPTH
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001990 aom_convolve_copy(dst, dst_stride, recon, MAX_TX_SIZE, NULL, 0, NULL,
1991 0, bsw, bsh);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001992#if CONFIG_HIGHBITDEPTH
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001993 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02001994#endif // CONFIG_HIGHBITDEPTH
Angie Chiang41fffae2017-04-03 10:33:18 -07001995
Angie Chiang8f6ddec2017-04-04 17:07:00 -07001996 const PLANE_TYPE plane_type = get_plane_type(plane);
Jingning Han19b5c8f2017-07-06 15:10:12 -07001997 TX_TYPE tx_type =
1998 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01001999 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, recon,
2000 MAX_TX_SIZE, eob,
2001 cpi->common.reduced_tx_set_used);
Angie Chiang41fffae2017-04-03 10:33:18 -07002002
Yushin Chob7b60c52017-07-14 16:18:52 -07002003#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07002004 if (x->using_dist_8x8 && plane == 0 && (bsw < 8 || bsh < 8)) {
Yushin Cho75b01002017-06-21 13:43:57 -07002005 // Save decoded pixels for inter block in pd->pred to avoid
2006 // block_8x8_rd_txfm_daala_dist() need to produce them
2007 // by calling av1_inverse_transform_block() again.
2008 const int pred_stride = block_size_wide[plane_bsize];
2009 const int pred_idx = (blk_row * pred_stride + blk_col)
2010 << tx_size_wide_log2[0];
2011 int16_t *pred = &pd->pred[pred_idx];
2012 int i, j;
Yushin Cho4483e3d2017-04-18 19:41:20 -07002013
Yushin Cho8ab875d2017-06-23 14:47:21 -07002014#if CONFIG_HIGHBITDEPTH
2015 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2016 for (j = 0; j < bsh; j++)
2017 for (i = 0; i < bsw; i++)
2018 pred[j * pred_stride + i] =
2019 CONVERT_TO_SHORTPTR(recon)[j * MAX_TX_SIZE + i];
2020 } else {
2021#endif
2022 for (j = 0; j < bsh; j++)
2023 for (i = 0; i < bsw; i++)
2024 pred[j * pred_stride + i] = recon[j * MAX_TX_SIZE + i];
2025#if CONFIG_HIGHBITDEPTH
2026 }
2027#endif // CONFIG_HIGHBITDEPTH
Angie Chiang8f6ddec2017-04-04 17:07:00 -07002028 }
Yushin Chob7b60c52017-07-14 16:18:52 -07002029#endif // CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07002030 *out_dist =
2031 pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE,
2032 blk_row, blk_col, plane_bsize, tx_bsize);
Angie Chiang8f6ddec2017-04-04 17:07:00 -07002033 }
Alex Conversef323f012017-04-24 09:26:33 -07002034 *out_dist *= 16;
2035 } else {
2036 *out_dist = *out_sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002037 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002038 }
2039}
2040
Yaowu Xuc27fc142016-08-22 16:08:15 -07002041static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
2042 BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
2043 struct rdcost_block_args *args = arg;
2044 MACROBLOCK *const x = args->x;
2045 MACROBLOCKD *const xd = &x->e_mbd;
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002046 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Angie Chiange50f3ec2017-04-10 15:50:33 -07002047 const AV1_COMP *cpi = args->cpi;
Angie Chiang65a39bb2017-04-11 16:50:04 -07002048 ENTROPY_CONTEXT *a = args->t_above + blk_col;
2049 ENTROPY_CONTEXT *l = args->t_left + blk_row;
Angie Chiang18ad8942017-04-11 12:37:07 -07002050 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002051 int64_t rd1, rd2, rd;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002052 RD_STATS this_rd_stats;
Yushin Choc00769a2017-09-14 14:44:30 -07002053
2054#if CONFIG_DIST_8X8
2055 // If sub8x8 tx, 8x8 or larger partition, and luma channel,
2056 // dist-8x8 disables early skip, because the distortion metrics for
2057 // sub8x8 tx (MSE) and reference distortion from 8x8 or larger partition
2058 // (new distortion metric) are different.
2059 // Exception is: dist-8x8 is enabled but still MSE is used,
2060 // i.e. "--tune=" encoder option is not used.
Yushin Chof9970a52017-10-13 12:57:13 -07002061 int bw = block_size_wide[plane_bsize];
2062 int bh = block_size_high[plane_bsize];
Yushin Choc00769a2017-09-14 14:44:30 -07002063 int disable_early_skip =
Yushin Chof9970a52017-10-13 12:57:13 -07002064 x->using_dist_8x8 && plane == 0 && bw >= 8 && bh >= 8 &&
Yushin Choc00769a2017-09-14 14:44:30 -07002065 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
2066 x->tune_metric != AOM_TUNE_PSNR;
Yushin Choa4817a62017-07-27 13:09:43 -07002067#endif // CONFIG_DIST_8X8
Yushin Cho6341f5c2017-03-24 14:36:28 -07002068
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002069 av1_init_rd_stats(&this_rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002070
2071 if (args->exit_early) return;
2072
2073 if (!is_inter_block(mbmi)) {
David Barker761b1ac2017-09-25 11:23:03 +01002074 av1_predict_intra_block_facade(cm, xd, plane, block, blk_col, blk_row,
2075 tx_size);
Angie Chiang62e54cd2017-04-06 10:45:56 -07002076 av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
2077 }
2078
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002079#if !CONFIG_TXK_SEL
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08002080// full forward transform and quantization
Sarah Parker345366a2017-06-15 12:13:01 -07002081#if DISABLE_TRELLISQ_SEARCH
2082 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08002083 AV1_XFORM_QUANT_B);
Sarah Parker345366a2017-06-15 12:13:01 -07002084#else
Angie Chiang62e54cd2017-04-06 10:45:56 -07002085 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08002086 AV1_XFORM_QUANT_FP);
Jingning Han3bce7542017-07-25 10:53:57 -07002087
Monty Montgomery4a05a582017-11-01 21:21:07 -04002088// TX-domain results need to shift down to Q2/D10 to match pixel
2089// domain distortion values which are in Q2^2
2090#if CONFIG_DAALA_TX
2091 const int shift = (TX_COEFF_DEPTH - 10) * 2;
2092#else
Jingning Han3bce7542017-07-25 10:53:57 -07002093 const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
Monty Montgomery4a05a582017-11-01 21:21:07 -04002094#endif
Jingning Han3bce7542017-07-25 10:53:57 -07002095 tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
2096 tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
Urvang Joshi80893152017-10-27 11:51:14 -07002097 const int buffer_length = av1_get_max_eob(tx_size);
Jingning Han3bce7542017-07-25 10:53:57 -07002098 int64_t tmp_dist;
2099 int64_t tmp;
Monty Montgomerya26262c2017-10-31 07:32:13 -04002100#if CONFIG_DAALA_TX
2101 tmp_dist =
2102 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd);
2103#else
Jingning Han3bce7542017-07-25 10:53:57 -07002104#if CONFIG_HIGHBITDEPTH
2105 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
2106 tmp_dist =
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07002107 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp, xd->bd);
Jingning Han3bce7542017-07-25 10:53:57 -07002108 else
2109#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07002110 tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp);
Monty Montgomerya26262c2017-10-31 07:32:13 -04002111#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07002112 tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
Jingning Han3bce7542017-07-25 10:53:57 -07002113
Yushin Choa4817a62017-07-27 13:09:43 -07002114 if (
2115#if CONFIG_DIST_8X8
Yushin Choc00769a2017-09-14 14:44:30 -07002116 disable_early_skip ||
Yushin Choa4817a62017-07-27 13:09:43 -07002117#endif
2118 RDCOST(x->rdmult, 0, tmp_dist) + args->this_rd < args->best_rd) {
Jingning Han3bce7542017-07-25 10:53:57 -07002119 av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
Debargha Mukherjee51666862017-10-24 14:29:13 -07002120 a, l, CONFIG_LV_MAP);
Jingning Han3bce7542017-07-25 10:53:57 -07002121 } else {
2122 args->exit_early = 1;
2123 return;
2124 }
Sarah Parker345366a2017-06-15 12:13:01 -07002125#endif // DISABLE_TRELLISQ_SEARCH
Angie Chiang62e54cd2017-04-06 10:45:56 -07002126
2127 if (!is_inter_block(mbmi)) {
2128 struct macroblock_plane *const p = &x->plane[plane];
Angie Chiangbc2288c2017-04-09 15:41:17 -07002129 av1_inverse_transform_block_facade(xd, plane, block, blk_row, blk_col,
Frederic Barbier33b39f02017-11-21 11:11:24 +01002130 p->eobs[block], cm->reduced_tx_set_used);
Angie Chiang808d8592017-04-06 18:36:55 -07002131 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
2132 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
2133 OUTPUT_HAS_DECODED_PIXELS);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002134 } else {
Angie Chiang808d8592017-04-06 18:36:55 -07002135 av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
2136 tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
2137 OUTPUT_HAS_PREDICTED_PIXELS);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002138 }
Luc Trudeauc7af36d2017-10-11 21:01:00 -04002139 rd = RDCOST(x->rdmult, 0, this_rd_stats.dist);
2140 if (args->this_rd + rd > args->best_rd) {
2141 args->exit_early = 1;
2142 return;
2143 }
Luc Trudeauaa94baa2017-04-27 10:52:21 -04002144#if CONFIG_CFL
Luc Trudeaua8474b62017-12-07 12:13:45 -05002145 if (plane == AOM_PLANE_Y && xd->cfl.store_y && is_cfl_allowed(mbmi)) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04002146 assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04002147 cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
Luc Trudeauaa94baa2017-04-27 10:52:21 -04002148 }
Luc Trudeaub05eeae2017-08-18 15:14:30 -04002149#endif // CONFIG_CFL
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002150 const PLANE_TYPE plane_type = get_plane_type(plane);
Jingning Han19b5c8f2017-07-06 15:10:12 -07002151 const TX_TYPE tx_type =
2152 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
Debargha Mukherjeed2cfbef2017-12-03 16:15:27 -08002153
Angie Chiangbd99b382017-06-20 15:11:16 -07002154 const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, mbmi);
Angie Chiang05917872017-04-15 12:28:56 -07002155 this_rd_stats.rate =
Jingning Han7eab9ff2017-07-06 10:12:54 -07002156 av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block, tx_size,
2157 scan_order, a, l, args->use_fast_coef_costing);
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002158#else // !CONFIG_TXK_SEL
Angie Chiang65a39bb2017-04-11 16:50:04 -07002159 av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
2160 tx_size, a, l, args->use_fast_coef_costing,
2161 &this_rd_stats);
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002162#endif // !CONFIG_TXK_SEL
Angie Chiang65a39bb2017-04-11 16:50:04 -07002163
Angie Chiang3963d632016-11-10 18:41:40 -08002164#if CONFIG_RD_DEBUG
Angie Chiange94556b2016-11-09 10:59:30 -08002165 av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
2166 this_rd_stats.rate);
Fergus Simpson4063a682017-02-28 16:52:22 -08002167#endif // CONFIG_RD_DEBUG
Yushin Cho6341f5c2017-03-24 14:36:28 -07002168 av1_set_txb_context(x, plane, block, tx_size, a, l);
Angie Chiangb3a12b52017-03-23 14:53:10 -07002169
Urvang Joshi70006e42017-06-14 16:08:55 -07002170 rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist);
2171 rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002172
2173 // TODO(jingning): temporarily enabled only for luma component
Yaowu Xuf883b422016-08-30 14:01:10 -07002174 rd = AOMMIN(rd1, rd2);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002175
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002176 this_rd_stats.skip &= !x->plane[plane].eobs[block];
Yushin Chod0b77ac2017-10-20 17:33:16 -07002177
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002178 av1_merge_rd_stats(&args->rd_stats, &this_rd_stats);
Yushin Cho7a428ba2017-01-12 16:28:49 -08002179
Yaowu Xuc27fc142016-08-22 16:08:15 -07002180 args->this_rd += rd;
2181
Yushin Chob7b60c52017-07-14 16:18:52 -07002182#if CONFIG_DIST_8X8
Yushin Choc00769a2017-09-14 14:44:30 -07002183 if (!disable_early_skip)
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002184#endif
2185 if (args->this_rd > args->best_rd) {
2186 args->exit_early = 1;
2187 return;
2188 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002189}
2190
Yushin Chob7b60c52017-07-14 16:18:52 -07002191#if CONFIG_DIST_8X8
2192static void dist_8x8_sub8x8_txfm_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
2193 BLOCK_SIZE bsize,
2194 struct rdcost_block_args *args) {
Yushin Cho7a428ba2017-01-12 16:28:49 -08002195 MACROBLOCKD *const xd = &x->e_mbd;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002196 const struct macroblockd_plane *const pd = &xd->plane[0];
2197 const struct macroblock_plane *const p = &x->plane[0];
Yushin Cho4483e3d2017-04-18 19:41:20 -07002198 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002199 const int src_stride = p->src.stride;
2200 const int dst_stride = pd->dst.stride;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002201 const uint8_t *src = &p->src.buf[0];
2202 const uint8_t *dst = &pd->dst.buf[0];
2203 const int16_t *pred = &pd->pred[0];
Yushin Cho2f025aa2017-09-28 17:39:21 -07002204 int bw = block_size_wide[bsize];
2205 int bh = block_size_high[bsize];
2206 int visible_w = bw;
2207 int visible_h = bh;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002208
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002209 int i, j;
2210 int64_t rd, rd1, rd2;
Yushin Chof0049ba2017-11-20 15:22:43 -08002211 int64_t sse = INT64_MAX, dist = INT64_MAX;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002212 int qindex = x->qindex;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002213
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002214 assert((bw & 0x07) == 0);
2215 assert((bh & 0x07) == 0);
Yushin Cho7a428ba2017-01-12 16:28:49 -08002216
Yushin Cho2f025aa2017-09-28 17:39:21 -07002217 get_txb_dimensions(xd, 0, bsize, 0, 0, bsize, &bw, &bh, &visible_w,
2218 &visible_h);
2219
Yushin Chof0049ba2017-11-20 15:22:43 -08002220 const int diff_stride = block_size_wide[bsize];
2221 const int16_t *diff = p->src_diff;
2222 sse = av1_dist_8x8_diff(x, src, src_stride, diff, diff_stride, bw, bh,
2223 visible_w, visible_h, qindex);
2224 sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
2225 sse *= 16;
Yushin Cho4483e3d2017-04-18 19:41:20 -07002226
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002227 if (!is_inter_block(mbmi)) {
Yushin Chof0049ba2017-11-20 15:22:43 -08002228 dist = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, bsize, bw, bh,
2229 visible_w, visible_h, qindex);
2230 dist *= 16;
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002231 } else {
Yushin Chof0049ba2017-11-20 15:22:43 -08002232// For inter mode, the decoded pixels are provided in pd->pred,
2233// while the predicted pixels are in dst.
2234#if CONFIG_HIGHBITDEPTH
2235 uint8_t *pred8;
2236 DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
2237
2238 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
2239 pred8 = CONVERT_TO_BYTEPTR(pred16);
2240 else
2241 pred8 = (uint8_t *)pred16;
2242#else
2243 DECLARE_ALIGNED(16, uint8_t, pred8[MAX_SB_SQUARE]);
2244#endif // CONFIG_HIGHBITDEPTH
2245
2246#if CONFIG_HIGHBITDEPTH
2247 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2248 for (j = 0; j < bh; j++)
2249 for (i = 0; i < bw; i++)
2250 CONVERT_TO_SHORTPTR(pred8)[j * bw + i] = pred[j * bw + i];
2251 } else {
2252#endif
2253 for (j = 0; j < bh; j++)
2254 for (i = 0; i < bw; i++) pred8[j * bw + i] = (uint8_t)pred[j * bw + i];
2255#if CONFIG_HIGHBITDEPTH
Yushin Cho2f025aa2017-09-28 17:39:21 -07002256 }
Yushin Chof0049ba2017-11-20 15:22:43 -08002257#endif // CONFIG_HIGHBITDEPTH
2258
2259 dist = av1_dist_8x8(cpi, x, src, src_stride, pred8, bw, bsize, bw, bh,
2260 visible_w, visible_h, qindex);
2261 dist *= 16;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002262 }
2263
Yushin Chof0049ba2017-11-20 15:22:43 -08002264 if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8) {
2265 assert(args->rd_stats.sse == sse);
2266 assert(args->rd_stats.dist == dist);
2267 }
2268 args->rd_stats.sse = sse;
2269 args->rd_stats.dist = dist;
2270
Urvang Joshi70006e42017-06-14 16:08:55 -07002271 rd1 = RDCOST(x->rdmult, args->rd_stats.rate, args->rd_stats.dist);
2272 rd2 = RDCOST(x->rdmult, 0, args->rd_stats.sse);
Yushin Cho7a428ba2017-01-12 16:28:49 -08002273 rd = AOMMIN(rd1, rd2);
2274
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002275 args->rd_stats.rdcost = rd;
2276 args->this_rd = rd;
Yushin Cho04eb9592017-06-21 17:35:06 -07002277
2278 if (args->this_rd > args->best_rd) args->exit_early = 1;
Yushin Cho7a428ba2017-01-12 16:28:49 -08002279}
Yushin Chob7b60c52017-07-14 16:18:52 -07002280#endif // CONFIG_DIST_8X8
Yushin Cho7a428ba2017-01-12 16:28:49 -08002281
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002282static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
2283 RD_STATS *rd_stats, int64_t ref_best_rd, int plane,
2284 BLOCK_SIZE bsize, TX_SIZE tx_size,
2285 int use_fast_coef_casting) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002286 MACROBLOCKD *const xd = &x->e_mbd;
2287 const struct macroblockd_plane *const pd = &xd->plane[plane];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002288 struct rdcost_block_args args;
Yaowu Xuf883b422016-08-30 14:01:10 -07002289 av1_zero(args);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002290 args.x = x;
2291 args.cpi = cpi;
2292 args.best_rd = ref_best_rd;
2293 args.use_fast_coef_costing = use_fast_coef_casting;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002294 av1_init_rd_stats(&args.rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002295
2296 if (plane == 0) xd->mi[0]->mbmi.tx_size = tx_size;
2297
Yaowu Xuf883b422016-08-30 14:01:10 -07002298 av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002299
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002300 av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
2301 &args);
Yushin Chob7b60c52017-07-14 16:18:52 -07002302#if CONFIG_DIST_8X8
Yushin Chof9970a52017-10-13 12:57:13 -07002303 int bw = block_size_wide[bsize];
2304 int bh = block_size_high[bsize];
2305
2306 if (x->using_dist_8x8 && !args.exit_early && plane == 0 && bw >= 8 &&
2307 bh >= 8 && (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
Yushin Chob7b60c52017-07-14 16:18:52 -07002308 dist_8x8_sub8x8_txfm_rd(cpi, x, bsize, &args);
Yushin Cho30a2c5f2017-06-07 14:18:54 -07002309#endif
Yushin Cho7a428ba2017-01-12 16:28:49 -08002310
Yaowu Xuc27fc142016-08-22 16:08:15 -07002311 if (args.exit_early) {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002312 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002313 } else {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002314 *rd_stats = args.rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002315 }
2316}
2317
Yue Chen3dd03e32017-10-17 15:39:52 -07002318static int tx_size_cost(const AV1_COMMON *const cm, const MACROBLOCK *const x,
Urvang Joshiab8840e2017-10-06 16:38:24 -07002319 BLOCK_SIZE bsize, TX_SIZE tx_size) {
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002320 const MACROBLOCKD *const xd = &x->e_mbd;
2321 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Urvang Joshifeb925f2016-12-05 10:37:29 -08002322
Rupert Swarbrickfcff0b22017-10-05 09:26:04 +01002323 if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) {
Urvang Joshifeb925f2016-12-05 10:37:29 -08002324 const int is_inter = is_inter_block(mbmi);
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08002325 const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize, is_inter);
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002326 const int depth = tx_size_to_depth(tx_size, bsize, is_inter);
Urvang Joshiab8840e2017-10-06 16:38:24 -07002327 const int tx_size_ctx = get_tx_size_context(xd);
Yue Chenb23d00a2017-07-28 17:01:21 -07002328 int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth];
Urvang Joshifeb925f2016-12-05 10:37:29 -08002329 return r_tx_size;
2330 } else {
2331 return 0;
2332 }
2333}
2334
Hui Suddbcde22017-09-18 17:22:02 -07002335// TODO(angiebird): use this function whenever it's possible
Yue Chenb23d00a2017-07-28 17:01:21 -07002336int av1_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x,
2337 const MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
2338 TX_SIZE tx_size, TX_TYPE tx_type) {
Angie Chiang05917872017-04-15 12:28:56 -07002339 if (plane > 0) return 0;
2340
Jingning Han243b66b2017-06-23 12:11:47 -07002341 tx_size = get_min_tx_size(tx_size);
Jingning Han243b66b2017-06-23 12:11:47 -07002342
Angie Chiang65201562017-04-10 15:23:28 -07002343 const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2344 const int is_inter = is_inter_block(mbmi);
Angie Chiang65201562017-04-10 15:23:28 -07002345 if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 &&
2346 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
2347 const int ext_tx_set =
2348 get_ext_tx_set(tx_size, bsize, is_inter, cm->reduced_tx_set_used);
2349 if (is_inter) {
2350 if (ext_tx_set > 0)
Yue Chenb23d00a2017-07-28 17:01:21 -07002351 return x
Angie Chiang65201562017-04-10 15:23:28 -07002352 ->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]][tx_type];
2353 } else {
Yue Chen57b8ff62017-10-10 23:37:31 -07002354 if (ext_tx_set > 0 && ALLOW_INTRA_EXT_TX) {
2355#if CONFIG_FILTER_INTRA
2356 PREDICTION_MODE intra_dir;
2357 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0])
2358 intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info
2359 .filter_intra_mode[0]];
2360 else
2361 intra_dir = mbmi->mode;
2362 return x->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
2363 [intra_dir][tx_type];
2364#else
Yue Chenb23d00a2017-07-28 17:01:21 -07002365 return x->intra_tx_type_costs[ext_tx_set][txsize_sqr_map[tx_size]]
2366 [mbmi->mode][tx_type];
Yue Chen57b8ff62017-10-10 23:37:31 -07002367#endif
2368 }
Angie Chiang65201562017-04-10 15:23:28 -07002369 }
2370 }
Angie Chiang65201562017-04-10 15:23:28 -07002371 return 0;
2372}
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002373static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2374 RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs,
Urvang Joshi9752a2e2017-10-02 17:32:27 -07002375 TX_TYPE tx_type, TX_SIZE tx_size) {
Urvang Joshi52648442016-10-13 17:27:51 -07002376 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002377 MACROBLOCKD *const xd = &x->e_mbd;
2378 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2379 int64_t rd = INT64_MAX;
Zoe Liu1eed2df2017-10-16 17:13:15 -07002380 const int skip_ctx = av1_get_skip_context(xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002381 int s0, s1;
2382 const int is_inter = is_inter_block(mbmi);
Jingning Hanbf9c6b72016-12-14 14:50:45 -08002383 const int tx_select =
Rupert Swarbrick8e5b39a2017-12-11 15:54:30 +00002384 cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type);
Urvang Joshifeb925f2016-12-05 10:37:29 -08002385
Yue Chen3dd03e32017-10-17 15:39:52 -07002386 const int r_tx_size = tx_size_cost(cm, x, bs, tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002387
Yaowu Xuc27fc142016-08-22 16:08:15 -07002388 assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs)));
Yaowu Xuc27fc142016-08-22 16:08:15 -07002389
Zoe Liu1eed2df2017-10-16 17:13:15 -07002390 s0 = x->skip_cost[skip_ctx][0];
2391 s1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002392
2393 mbmi->tx_type = tx_type;
2394 mbmi->tx_size = tx_size;
Yue Chen95e13e22017-11-01 23:56:35 -07002395#if CONFIG_FILTER_INTRA
2396 if (!is_inter_block(mbmi) &&
2397 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] &&
Yue Chen18f6c152017-11-06 11:23:47 -08002398 !av1_filter_intra_allowed_txsize(tx_size)) {
2399 rd_stats->rate = INT_MAX;
Urvang Joshife8323c2017-11-17 15:23:51 -08002400 // Note: Initialize following to avoid uninitialied warnings.
2401 rd_stats->dist = INT64_MAX;
2402 rd_stats->skip = 0;
2403 rd_stats->sse = INT64_MAX;
Yue Chen95e13e22017-11-01 23:56:35 -07002404 return INT64_MAX;
Yue Chen18f6c152017-11-06 11:23:47 -08002405 }
Yue Chen95e13e22017-11-01 23:56:35 -07002406#endif
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002407 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, tx_size,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002408 cpi->sf.use_fast_coef_costing);
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002409 if (rd_stats->rate == INT_MAX) return INT64_MAX;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002410#if !CONFIG_TXK_SEL
Angie Chiang05917872017-04-15 12:28:56 -07002411 int plane = 0;
Yue Chenb23d00a2017-07-28 17:01:21 -07002412 rd_stats->rate += av1_tx_type_cost(cm, x, xd, bs, plane, tx_size, tx_type);
Angie Chiang05917872017-04-15 12:28:56 -07002413#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002414
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002415 if (rd_stats->skip) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002416 if (is_inter) {
Urvang Joshi70006e42017-06-14 16:08:55 -07002417 rd = RDCOST(x->rdmult, s1, rd_stats->sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002418 } else {
Urvang Joshi70006e42017-06-14 16:08:55 -07002419 rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002420 }
2421 } else {
Urvang Joshi70006e42017-06-14 16:08:55 -07002422 rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select,
2423 rd_stats->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002424 }
2425
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002426 if (tx_select) rd_stats->rate += r_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002427
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002428 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
2429 !(rd_stats->skip))
Urvang Joshi70006e42017-06-14 16:08:55 -07002430 rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07002431
2432 return rd;
2433}
2434
Angie Chiang2d147c12017-04-05 11:23:59 -07002435static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs,
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002436 TX_TYPE tx_type, TX_SIZE tx_size, int prune) {
Urvang Joshi0d1e4ff2017-04-27 16:17:25 -07002437 const MACROBLOCKD *const xd = &x->e_mbd;
2438 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Angie Chiang2d147c12017-04-05 11:23:59 -07002439 const int is_inter = is_inter_block(mbmi);
Angie Chiang2d147c12017-04-05 11:23:59 -07002440
Angie Chianga4fa1902017-04-05 15:26:09 -07002441 if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002442 if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size))
2443 return 1;
2444 if (!is_inter && x->use_default_intra_tx_type &&
2445 tx_type != get_default_tx_type(0, xd, 0, tx_size))
2446 return 1;
2447 if (is_inter && x->use_default_inter_tx_type &&
2448 tx_type != get_default_tx_type(0, xd, 0, tx_size))
2449 return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002450 const AV1_COMMON *const cm = &cpi->common;
Hui Suddbcde22017-09-18 17:22:02 -07002451 const TxSetType tx_set_type =
2452 get_ext_tx_set_type(tx_size, bs, is_inter, cm->reduced_tx_set_used);
2453 if (!av1_ext_tx_used[tx_set_type][tx_type]) return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002454 if (is_inter) {
Angie Chiang2d147c12017-04-05 11:23:59 -07002455 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002456 if (!do_tx_type_search(tx_type, prune, cpi->sf.tx_type_search.prune_mode))
2457 return 1;
Angie Chiang2d147c12017-04-05 11:23:59 -07002458 }
2459 } else {
2460 if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
2461 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) return 1;
2462 }
Angie Chiang2d147c12017-04-05 11:23:59 -07002463 }
Angie Chiang2d147c12017-04-05 11:23:59 -07002464 return 0;
2465}
2466
Urvang Joshi52648442016-10-13 17:27:51 -07002467static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs,
2468 MACROBLOCK *x, int *r, int64_t *d, int *s,
2469 int64_t *sse, int64_t ref_best_rd) {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002470 RD_STATS rd_stats;
2471 int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs, DCT_DCT,
2472 max_txsize_lookup[bs]);
2473 *r = rd_stats.rate;
2474 *d = rd_stats.dist;
2475 *s = rd_stats.skip;
2476 *sse = rd_stats.sse;
2477 return rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002478}
Yaowu Xuc27fc142016-08-22 16:08:15 -07002479
Urvang Joshi52648442016-10-13 17:27:51 -07002480static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002481 RD_STATS *rd_stats, int64_t ref_best_rd,
Urvang Joshi52648442016-10-13 17:27:51 -07002482 BLOCK_SIZE bs) {
2483 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002484 MACROBLOCKD *const xd = &x->e_mbd;
2485 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2486 TX_TYPE tx_type, best_tx_type = DCT_DCT;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002487 int64_t this_rd, best_rd = INT64_MAX;
Zoe Liu1eed2df2017-10-16 17:13:15 -07002488 const int skip_ctx = av1_get_skip_context(xd);
2489 int s0 = x->skip_cost[skip_ctx][0];
2490 int s1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002491 const int is_inter = is_inter_block(mbmi);
2492 int prune = 0;
Angie Chiangaa0c34b2017-04-25 12:25:38 -07002493 const int plane = 0;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002494 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002495
2496 mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
Jingning Hane67b38a2016-11-04 10:30:00 -07002497 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
Hui Suddbcde22017-09-18 17:22:02 -07002498 const TxSetType tx_set_type =
2499 get_ext_tx_set_type(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002500
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002501 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2502 !x->use_default_inter_tx_type) {
Alexander Bokov79a37242017-09-29 11:25:55 -07002503 prune = prune_tx(cpi, bs, x, xd, tx_set_type, 0);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002504 }
Sarah Parkere68a3e42017-02-16 14:03:24 -08002505 if (get_ext_tx_types(mbmi->tx_size, bs, is_inter, cm->reduced_tx_set_used) >
2506 1 &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07002507 !xd->lossless[mbmi->segment_id]) {
Yushin Cho77bba8d2016-11-04 16:36:56 -07002508 for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
Hui Suddbcde22017-09-18 17:22:02 -07002509 if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002510 RD_STATS this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002511 if (is_inter) {
2512 if (x->use_default_inter_tx_type &&
2513 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2514 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002515 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002516 if (!do_tx_type_search(tx_type, prune,
2517 cpi->sf.tx_type_search.prune_mode))
2518 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002519 }
2520 } else {
2521 if (x->use_default_intra_tx_type &&
2522 tx_type != get_default_tx_type(0, xd, 0, mbmi->tx_size))
2523 continue;
2524 if (!ALLOW_INTRA_EXT_TX && bs >= BLOCK_8X8) {
2525 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
2526 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002527 }
2528
2529 mbmi->tx_type = tx_type;
2530
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002531 txfm_rd_in_plane(x, cpi, &this_rd_stats, ref_best_rd, 0, bs,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002532 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
Yushin Chod0b77ac2017-10-20 17:33:16 -07002533
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002534 if (this_rd_stats.rate == INT_MAX) continue;
Yue Chenb23d00a2017-07-28 17:01:21 -07002535 av1_tx_type_cost(cm, x, xd, bs, plane, mbmi->tx_size, tx_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002536
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002537 if (this_rd_stats.skip)
Urvang Joshi70006e42017-06-14 16:08:55 -07002538 this_rd = RDCOST(x->rdmult, s1, this_rd_stats.sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002539 else
Urvang Joshi70006e42017-06-14 16:08:55 -07002540 this_rd =
2541 RDCOST(x->rdmult, this_rd_stats.rate + s0, this_rd_stats.dist);
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002542 if (is_inter_block(mbmi) && !xd->lossless[mbmi->segment_id] &&
2543 !this_rd_stats.skip)
Urvang Joshi70006e42017-06-14 16:08:55 -07002544 this_rd = AOMMIN(this_rd, RDCOST(x->rdmult, s1, this_rd_stats.sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07002545
2546 if (this_rd < best_rd) {
2547 best_rd = this_rd;
2548 best_tx_type = mbmi->tx_type;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002549 *rd_stats = this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002550 }
2551 }
Yushin Chod0b77ac2017-10-20 17:33:16 -07002552
Guillaume Martres4e4d3a02016-08-21 19:02:33 -07002553 } else {
2554 mbmi->tx_type = DCT_DCT;
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002555 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2556 cpi->sf.use_fast_coef_costing);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002557 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07002558 mbmi->tx_type = best_tx_type;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002559}
2560
Urvang Joshi52648442016-10-13 17:27:51 -07002561static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x,
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002562 RD_STATS *rd_stats, int64_t ref_best_rd,
Yaowu Xuc27fc142016-08-22 16:08:15 -07002563 BLOCK_SIZE bs) {
2564 MACROBLOCKD *const xd = &x->e_mbd;
2565 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2566
2567 mbmi->tx_size = TX_4X4;
2568 mbmi->tx_type = DCT_DCT;
Jingning Hane67b38a2016-11-04 10:30:00 -07002569 mbmi->min_tx_size = get_min_tx_size(TX_4X4);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002570
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002571 txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size,
2572 cpi->sf.use_fast_coef_costing);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002573}
2574
Angie Chiangf1cb0752017-04-10 16:01:20 -07002575static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) {
2576 int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
2577 return num_blk;
2578}
Angie Chiangf1cb0752017-04-10 16:01:20 -07002579
Urvang Joshi52648442016-10-13 17:27:51 -07002580static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi,
Angie Chiang7c2b7f22016-11-07 16:00:00 -08002581 MACROBLOCK *x, RD_STATS *rd_stats,
2582 int64_t ref_best_rd, BLOCK_SIZE bs) {
Angie Chianga4fa1902017-04-05 15:26:09 -07002583 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002584 MACROBLOCKD *const xd = &x->e_mbd;
2585 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002586 int64_t rd = INT64_MAX;
Angie Chianga4fa1902017-04-05 15:26:09 -07002587 int n;
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002588 int start_tx;
2589 int depth;
Angie Chianga4fa1902017-04-05 15:26:09 -07002590 int64_t best_rd = INT64_MAX, last_rd = INT64_MAX;
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002591 const int is_inter = is_inter_block(mbmi);
2592 const TX_SIZE max_rect_tx_size = get_max_rect_tx_size(bs, is_inter);
2593 TX_SIZE best_tx_size = max_rect_tx_size;
Angie Chianga4fa1902017-04-05 15:26:09 -07002594 TX_TYPE best_tx_type = DCT_DCT;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002595#if CONFIG_TXK_SEL
Angie Chiangf1cb0752017-04-10 16:01:20 -07002596 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002597#endif // CONFIG_TXK_SEL
Angie Chianga4fa1902017-04-05 15:26:09 -07002598 const int tx_select = cm->tx_mode == TX_MODE_SELECT;
Angie Chianga4fa1902017-04-05 15:26:09 -07002599
2600 av1_invalid_rd_stats(rd_stats);
2601
Angie Chianga4fa1902017-04-05 15:26:09 -07002602 if (tx_select) {
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002603 start_tx = max_rect_tx_size;
2604 depth = 0;
Angie Chianga4fa1902017-04-05 15:26:09 -07002605 } else {
2606 const TX_SIZE chosen_tx_size =
2607 tx_size_from_tx_mode(bs, cm->tx_mode, is_inter);
2608 start_tx = chosen_tx_size;
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002609 depth = MAX_TX_DEPTH;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002610 }
2611
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002612 int prune = 0;
2613 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
2614 !x->use_default_inter_tx_type) {
Alexander Bokov79a37242017-09-29 11:25:55 -07002615 prune = prune_tx(cpi, bs, x, xd, EXT_TX_SET_ALL16, 0);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002616 }
2617
Angie Chianga4fa1902017-04-05 15:26:09 -07002618 last_rd = INT64_MAX;
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08002619 for (n = start_tx; depth <= MAX_TX_DEPTH;
2620 depth++, n = sub_tx_size_map[0][n]) {
Angie Chiangf1cb0752017-04-10 16:01:20 -07002621 TX_TYPE tx_start = DCT_DCT;
2622 TX_TYPE tx_end = TX_TYPES;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002623#if CONFIG_TXK_SEL
Angie Chiangf1cb0752017-04-10 16:01:20 -07002624 // The tx_type becomes dummy when lv_map is on. The tx_type search will be
2625 // performed in av1_search_txk_type()
2626 tx_end = DCT_DCT + 1;
2627#endif
Angie Chianga4fa1902017-04-05 15:26:09 -07002628 TX_TYPE tx_type;
Angie Chiangf1cb0752017-04-10 16:01:20 -07002629 for (tx_type = tx_start; tx_type < tx_end; ++tx_type) {
Angie Chianga4fa1902017-04-05 15:26:09 -07002630 RD_STATS this_rd_stats;
Alexander Bokov0c7eb102017-09-07 18:49:00 -07002631 if (skip_txfm_search(cpi, x, bs, tx_type, n, prune)) continue;
Angie Chianga4fa1902017-04-05 15:26:09 -07002632 rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, n);
Yushin Chod0b77ac2017-10-20 17:33:16 -07002633
Angie Chianga4fa1902017-04-05 15:26:09 -07002634 // Early termination in transform size search.
2635 if (cpi->sf.tx_size_search_breakout &&
2636 (rd == INT64_MAX ||
Debargha Mukherjee0fa057f2017-12-06 17:06:29 -08002637 (this_rd_stats.skip == 1 && tx_type != DCT_DCT && n != start_tx) ||
2638 (n != (int)start_tx && rd > last_rd))) {
Angie Chianga4fa1902017-04-05 15:26:09 -07002639 break;
Lester Lu432012f2017-08-17 14:39:29 -07002640 }
Angie Chianga4fa1902017-04-05 15:26:09 -07002641
2642 last_rd = rd;
Hui Suda816a12017-08-18 14:46:02 -07002643 ref_best_rd = AOMMIN(rd, ref_best_rd);
Angie Chianga4fa1902017-04-05 15:26:09 -07002644 if (rd < best_rd) {
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002645#if CONFIG_TXK_SEL
Angie Chiangbce07f12017-12-01 16:34:31 -08002646 memcpy(best_txk_type, mbmi->txk_type,
2647 sizeof(best_txk_type[0]) * MAX_SB_SQUARE /
2648 (TX_SIZE_W_MIN * TX_SIZE_H_MIN));
Angie Chiangf1cb0752017-04-10 16:01:20 -07002649#endif
Angie Chianga4fa1902017-04-05 15:26:09 -07002650 best_tx_type = tx_type;
2651 best_tx_size = n;
2652 best_rd = rd;
2653 *rd_stats = this_rd_stats;
2654 }
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07002655#if !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
Angie Chianga4fa1902017-04-05 15:26:09 -07002656 const int is_inter = is_inter_block(mbmi);
2657 if (mbmi->sb_type < BLOCK_8X8 && is_inter) break;
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07002658#endif // !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4
Angie Chianga4fa1902017-04-05 15:26:09 -07002659 }
2660 }
2661 mbmi->tx_size = best_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002662 mbmi->tx_type = best_tx_type;
Angie Chiangcd9b03f2017-04-16 13:37:13 -07002663#if CONFIG_TXK_SEL
Angie Chiangbce07f12017-12-01 16:34:31 -08002664 memcpy(mbmi->txk_type, best_txk_type,
2665 sizeof(best_txk_type[0]) * MAX_SB_SQUARE /
2666 (TX_SIZE_W_MIN * TX_SIZE_H_MIN));
Angie Chiangf1cb0752017-04-10 16:01:20 -07002667#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07002668
Jingning Hane67b38a2016-11-04 10:30:00 -07002669 mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002670}
2671
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002672static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
2673 RD_STATS *rd_stats, BLOCK_SIZE bs,
2674 int64_t ref_best_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002675 MACROBLOCKD *xd = &x->e_mbd;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002676 av1_init_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002677
2678 assert(bs == xd->mi[0]->mbmi.sb_type);
2679
Yaowu Xu1e2aae12017-02-27 16:33:14 -08002680 if (xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002681 choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002682 } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002683 choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002684 } else {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08002685 choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002686 }
2687}
2688
2689static int conditional_skipintra(PREDICTION_MODE mode,
2690 PREDICTION_MODE best_intra_mode) {
2691 if (mode == D117_PRED && best_intra_mode != V_PRED &&
2692 best_intra_mode != D135_PRED)
2693 return 1;
2694 if (mode == D63_PRED && best_intra_mode != V_PRED &&
2695 best_intra_mode != D45_PRED)
2696 return 1;
2697 if (mode == D207_PRED && best_intra_mode != H_PRED &&
2698 best_intra_mode != D45_PRED)
2699 return 1;
2700 if (mode == D153_PRED && best_intra_mode != H_PRED &&
2701 best_intra_mode != D135_PRED)
2702 return 1;
2703 return 0;
2704}
2705
hui su308a6392017-01-12 14:49:57 -08002706// Model based RD estimation for luma intra blocks.
2707static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
hui su9a416f52017-01-13 11:37:53 -08002708 BLOCK_SIZE bsize, int mode_cost) {
David Barker761b1ac2017-09-25 11:23:03 +01002709 const AV1_COMMON *cm = &cpi->common;
hui su308a6392017-01-12 14:49:57 -08002710 MACROBLOCKD *const xd = &x->e_mbd;
2711 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07002712 assert(!is_inter_block(mbmi));
hui su308a6392017-01-12 14:49:57 -08002713 RD_STATS this_rd_stats;
2714 int row, col;
2715 int64_t temp_sse, this_rd;
2716 const TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cpi->common.tx_mode, 0);
2717 const int stepr = tx_size_high_unit[tx_size];
2718 const int stepc = tx_size_wide_unit[tx_size];
2719 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
2720 const int max_blocks_high = max_block_high(xd, bsize, 0);
2721 mbmi->tx_size = tx_size;
2722 // Prediction.
Angie Chiang3d005e42017-04-02 16:31:35 -07002723 const int step = stepr * stepc;
2724 int block = 0;
hui su308a6392017-01-12 14:49:57 -08002725 for (row = 0; row < max_blocks_high; row += stepr) {
2726 for (col = 0; col < max_blocks_wide; col += stepc) {
David Barker761b1ac2017-09-25 11:23:03 +01002727 av1_predict_intra_block_facade(cm, xd, 0, block, col, row, tx_size);
Angie Chiang3d005e42017-04-02 16:31:35 -07002728 block += step;
hui su308a6392017-01-12 14:49:57 -08002729 }
2730 }
2731 // RD estimation.
2732 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &this_rd_stats.rate,
2733 &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse);
hui su9a416f52017-01-13 11:37:53 -08002734#if CONFIG_EXT_INTRA
Joe Young830d4ce2017-05-30 17:48:13 -07002735 if (av1_is_directional_mode(mbmi->mode, bsize) &&
2736 av1_use_angle_delta(bsize)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07002737#if CONFIG_EXT_INTRA_MOD
2738 mode_cost += x->angle_delta_cost[mbmi->mode - V_PRED]
2739 [MAX_ANGLE_DELTA + mbmi->angle_delta[0]];
2740#else
hui su0a6731f2017-04-26 15:23:47 -07002741 mode_cost += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
2742 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
Joe Young3ca43bf2017-10-06 15:12:46 -07002743#endif // CONFIG_EXT_INTRA_MOD
hui su9a416f52017-01-13 11:37:53 -08002744 }
2745#endif // CONFIG_EXT_INTRA
hui su8f4cc0a2017-01-13 15:14:49 -08002746#if CONFIG_FILTER_INTRA
Yue Chen4eba69b2017-11-09 22:37:35 -08002747 if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
hui su8f4cc0a2017-01-13 15:14:49 -08002748 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
2749 const int mode = mbmi->filter_intra_mode_info.filter_intra_mode[0];
Yue Chen4eba69b2017-11-09 22:37:35 -08002750 mode_cost += x->filter_intra_cost[mbmi->tx_size][1] +
2751 x->filter_intra_mode_cost[0][mode];
hui su8f4cc0a2017-01-13 15:14:49 -08002752 } else {
Yue Chen4eba69b2017-11-09 22:37:35 -08002753 mode_cost += x->filter_intra_cost[mbmi->tx_size][0];
hui su8f4cc0a2017-01-13 15:14:49 -08002754 }
2755 }
2756#endif // CONFIG_FILTER_INTRA
Urvang Joshi70006e42017-06-14 16:08:55 -07002757 this_rd =
2758 RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist);
hui su308a6392017-01-12 14:49:57 -08002759 return this_rd;
2760}
2761
Urvang Joshi56ba91b2017-01-10 13:22:09 -08002762// Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x
2763// new_height'. Extra rows and columns are filled in by copying last valid
2764// row/column.
2765static void extend_palette_color_map(uint8_t *const color_map, int orig_width,
2766 int orig_height, int new_width,
2767 int new_height) {
2768 int j;
2769 assert(new_width >= orig_width);
2770 assert(new_height >= orig_height);
2771 if (new_width == orig_width && new_height == orig_height) return;
2772
2773 for (j = orig_height - 1; j >= 0; --j) {
2774 memmove(color_map + j * new_width, color_map + j * orig_width, orig_width);
2775 // Copy last column to extra columns.
2776 memset(color_map + j * new_width + orig_width,
2777 color_map[j * new_width + orig_width - 1], new_width - orig_width);
2778 }
2779 // Copy last row to extra rows.
2780 for (j = orig_height; j < new_height; ++j) {
2781 memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width,
2782 new_width);
2783 }
2784}
2785
hui su33567b22017-04-30 16:40:19 -07002786#if CONFIG_PALETTE_DELTA_ENCODING
2787// Bias toward using colors in the cache.
2788// TODO(huisu): Try other schemes to improve compression.
2789static void optimize_palette_colors(uint16_t *color_cache, int n_cache,
2790 int n_colors, int stride,
2791 float *centroids) {
2792 if (n_cache <= 0) return;
2793 for (int i = 0; i < n_colors * stride; i += stride) {
2794 float min_diff = fabsf(centroids[i] - color_cache[0]);
2795 int idx = 0;
2796 for (int j = 1; j < n_cache; ++j) {
2797 float this_diff = fabsf(centroids[i] - color_cache[j]);
2798 if (this_diff < min_diff) {
2799 min_diff = this_diff;
2800 idx = j;
2801 }
2802 }
2803 if (min_diff < 1.5) centroids[i] = color_cache[idx];
2804 }
2805}
2806#endif // CONFIG_PALETTE_DELTA_ENCODING
2807
Hui Su4d51bed2017-11-29 15:52:40 -08002808// Given the base colors as specified in centroids[], calculate the RD cost
2809// of palette mode.
2810static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x,
2811 MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, int palette_ctx,
2812 int dc_mode_cost, const float *data, float *centroids,
2813 int n,
2814#if CONFIG_PALETTE_DELTA_ENCODING
2815 uint16_t *color_cache, int n_cache,
2816#endif
2817 MB_MODE_INFO *best_mbmi,
2818 uint8_t *best_palette_color_map, int64_t *best_rd,
2819 int64_t *best_model_rd, int *rate, int *rate_tokenonly,
2820 int *rate_overhead, int64_t *distortion,
2821 int *skippable) {
2822#if CONFIG_PALETTE_DELTA_ENCODING
2823 optimize_palette_colors(color_cache, n_cache, n, 1, centroids);
2824#endif // CONFIG_PALETTE_DELTA_ENCODING
2825 int k = av1_remove_duplicates(centroids, n);
2826 if (k < PALETTE_MIN_SIZE) {
2827 // Too few unique colors to create a palette. And DC_PRED will work
2828 // well for that case anyway. So skip.
2829 return;
2830 }
2831 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
2832#if CONFIG_HIGHBITDEPTH
2833 if (cpi->common.use_highbitdepth)
2834 for (int i = 0; i < k; ++i)
2835 pmi->palette_colors[i] =
2836 clip_pixel_highbd((int)centroids[i], cpi->common.bit_depth);
2837 else
2838#endif // CONFIG_HIGHBITDEPTH
2839 for (int i = 0; i < k; ++i)
2840 pmi->palette_colors[i] = clip_pixel((int)centroids[i]);
2841 pmi->palette_size[0] = k;
2842 MACROBLOCKD *const xd = &x->e_mbd;
2843 uint8_t *const color_map = xd->plane[0].color_index_map;
2844 int block_width, block_height, rows, cols;
2845 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
2846 &cols);
2847 av1_calc_indices(data, centroids, color_map, rows * cols, k, 1);
2848 extend_palette_color_map(color_map, cols, rows, block_width, block_height);
2849 int palette_mode_cost =
2850 dc_mode_cost +
2851 x->palette_y_size_cost[bsize - BLOCK_8X8][k - PALETTE_MIN_SIZE] +
2852 write_uniform_cost(k, color_map[0]) +
2853 x->palette_y_mode_cost[bsize - BLOCK_8X8][palette_ctx][1];
2854 palette_mode_cost += av1_palette_color_cost_y(pmi,
2855#if CONFIG_PALETTE_DELTA_ENCODING
2856 color_cache, n_cache,
2857#endif // CONFIG_PALETTE_DELTA_ENCODING
2858 cpi->common.bit_depth);
2859 palette_mode_cost +=
2860 av1_cost_color_map(x, 0, 0, bsize, mbmi->tx_size, PALETTE_MAP);
2861 int64_t this_model_rd = intra_model_yrd(cpi, x, bsize, palette_mode_cost);
2862 if (*best_model_rd != INT64_MAX &&
2863 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
2864 return;
2865 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
2866 RD_STATS tokenonly_rd_stats;
2867 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
2868 if (tokenonly_rd_stats.rate == INT_MAX) return;
2869 int this_rate = tokenonly_rd_stats.rate + palette_mode_cost;
2870 int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
2871 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) {
2872 tokenonly_rd_stats.rate -=
2873 tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
2874 }
2875 if (this_rd < *best_rd) {
2876 *best_rd = this_rd;
2877 memcpy(best_palette_color_map, color_map,
2878 block_width * block_height * sizeof(color_map[0]));
2879 *best_mbmi = *mbmi;
2880 *rate_overhead = this_rate - tokenonly_rd_stats.rate;
2881 if (rate) *rate = this_rate;
2882 if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate;
2883 if (distortion) *distortion = tokenonly_rd_stats.dist;
2884 if (skippable) *skippable = tokenonly_rd_stats.skip;
2885 }
2886}
2887
hui sude0c70a2017-01-09 17:12:17 -08002888static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
2889 BLOCK_SIZE bsize, int palette_ctx,
2890 int dc_mode_cost, MB_MODE_INFO *best_mbmi,
2891 uint8_t *best_palette_color_map,
hui su78c611a2017-01-13 17:06:04 -08002892 int64_t *best_rd, int64_t *best_model_rd,
2893 int *rate, int *rate_tokenonly,
2894 int64_t *distortion, int *skippable) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07002895 int rate_overhead = 0;
2896 MACROBLOCKD *const xd = &x->e_mbd;
2897 MODE_INFO *const mic = xd->mi[0];
hui sude0c70a2017-01-09 17:12:17 -08002898 MB_MODE_INFO *const mbmi = &mic->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07002899 assert(!is_inter_block(mbmi));
Urvang Joshic9e71d42017-08-09 18:58:33 -07002900 assert(bsize >= BLOCK_8X8);
Hui Su4d51bed2017-11-29 15:52:40 -08002901 int colors, n;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002902 const int src_stride = x->plane[0].src.stride;
2903 const uint8_t *const src = x->plane[0].src.buf;
hui sude0c70a2017-01-09 17:12:17 -08002904 uint8_t *const color_map = xd->plane[0].color_index_map;
Urvang Joshi56ba91b2017-01-10 13:22:09 -08002905 int block_width, block_height, rows, cols;
2906 av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
2907 &cols);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002908
2909 assert(cpi->common.allow_screen_content_tools);
2910
Hui Su4d51bed2017-11-29 15:52:40 -08002911 int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002912#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002913 if (cpi->common.use_highbitdepth)
Yaowu Xuf883b422016-08-30 14:01:10 -07002914 colors = av1_count_colors_highbd(src, src_stride, rows, cols,
Hui Su4d51bed2017-11-29 15:52:40 -08002915 cpi->common.bit_depth, count_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07002916 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002917#endif // CONFIG_HIGHBITDEPTH
Hui Su4d51bed2017-11-29 15:52:40 -08002918 colors = av1_count_colors(src, src_stride, rows, cols, count_buf);
hui su5db97432016-10-14 16:10:14 -07002919#if CONFIG_FILTER_INTRA
hui sude0c70a2017-01-09 17:12:17 -08002920 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
hui su5db97432016-10-14 16:10:14 -07002921#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07002922
2923 if (colors > 1 && colors <= 64) {
Hui Suc3769e52017-11-16 23:13:42 -08002924 aom_clear_system_state();
Hui Su4d51bed2017-11-29 15:52:40 -08002925 int r, c, i;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002926 const int max_itr = 50;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002927 float *const data = x->palette_buffer->kmeans_data_buf;
2928 float centroids[PALETTE_MAX_SIZE];
Yaowu Xuc27fc142016-08-22 16:08:15 -07002929 float lb, ub, val;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002930#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002931 uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
2932 if (cpi->common.use_highbitdepth)
2933 lb = ub = src16[0];
2934 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002935#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002936 lb = ub = src[0];
2937
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002938#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002939 if (cpi->common.use_highbitdepth) {
2940 for (r = 0; r < rows; ++r) {
2941 for (c = 0; c < cols; ++c) {
2942 val = src16[r * src_stride + c];
2943 data[r * cols + c] = val;
2944 if (val < lb)
2945 lb = val;
2946 else if (val > ub)
2947 ub = val;
2948 }
2949 }
2950 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002951#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002952 for (r = 0; r < rows; ++r) {
2953 for (c = 0; c < cols; ++c) {
2954 val = src[r * src_stride + c];
2955 data[r * cols + c] = val;
2956 if (val < lb)
2957 lb = val;
2958 else if (val > ub)
2959 ub = val;
2960 }
2961 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002962#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002963 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02002964#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07002965
2966 mbmi->mode = DC_PRED;
hui su5db97432016-10-14 16:10:14 -07002967#if CONFIG_FILTER_INTRA
2968 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
2969#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07002970
Hui Su473cf892017-11-08 18:14:31 -08002971 if (rows * cols > MAX_PALETTE_SQUARE) return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07002972
hui su33567b22017-04-30 16:40:19 -07002973#if CONFIG_PALETTE_DELTA_ENCODING
hui su33567b22017-04-30 16:40:19 -07002974 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
Hui Su3748bc22017-08-23 11:30:41 -07002975 const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
hui su33567b22017-04-30 16:40:19 -07002976#endif // CONFIG_PALETTE_DELTA_ENCODING
2977
Hui Su4d51bed2017-11-29 15:52:40 -08002978 // Find the dominant colors, stored in top_colors[].
2979 int top_colors[PALETTE_MAX_SIZE] = { 0 };
2980 for (i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i) {
2981 int max_count = 0;
2982 for (int j = 0; j < (1 << cpi->common.bit_depth); ++j) {
2983 if (count_buf[j] > max_count) {
2984 max_count = count_buf[j];
2985 top_colors[i] = j;
2986 }
2987 }
2988 assert(max_count > 0);
2989 count_buf[top_colors[i]] = 0;
2990 }
2991
2992 // Try the dominant colors directly.
2993 // TODO(huisu@google.com): Try to avoid duplicate computation in cases
2994 // where the dominant colors and the k-means results are similar.
2995 for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
2996 for (i = 0; i < n; ++i) centroids[i] = top_colors[i];
2997 palette_rd_y(cpi, x, mbmi, bsize, palette_ctx, dc_mode_cost, data,
2998 centroids, n,
2999#if CONFIG_PALETTE_DELTA_ENCODING
3000 color_cache, n_cache,
3001#endif // CONFIG_PALETTE_DELTA_ENCODING
3002 best_mbmi, best_palette_color_map, best_rd, best_model_rd,
3003 rate, rate_tokenonly, &rate_overhead, distortion, skippable);
3004 }
3005
3006 // K-means clustering.
3007 for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) {
Urvang Joshi773e3542017-05-05 18:09:42 -07003008 if (colors == PALETTE_MIN_SIZE) {
3009 // Special case: These colors automatically become the centroids.
3010 assert(colors == n);
3011 assert(colors == 2);
3012 centroids[0] = lb;
3013 centroids[1] = ub;
Urvang Joshi773e3542017-05-05 18:09:42 -07003014 } else {
3015 for (i = 0; i < n; ++i) {
3016 centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2;
3017 }
3018 av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr);
Urvang Joshi5fb95f72017-05-05 17:36:16 -07003019 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003020
Hui Su4d51bed2017-11-29 15:52:40 -08003021 palette_rd_y(cpi, x, mbmi, bsize, palette_ctx, dc_mode_cost, data,
3022 centroids, n,
hui su33567b22017-04-30 16:40:19 -07003023#if CONFIG_PALETTE_DELTA_ENCODING
Hui Su4d51bed2017-11-29 15:52:40 -08003024 color_cache, n_cache,
hui su33567b22017-04-30 16:40:19 -07003025#endif // CONFIG_PALETTE_DELTA_ENCODING
Hui Su4d51bed2017-11-29 15:52:40 -08003026 best_mbmi, best_palette_color_map, best_rd, best_model_rd,
3027 rate, rate_tokenonly, &rate_overhead, distortion, skippable);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003028 }
3029 }
hui sude0c70a2017-01-09 17:12:17 -08003030
3031 if (best_mbmi->palette_mode_info.palette_size[0] > 0) {
3032 memcpy(color_map, best_palette_color_map,
Luc Trudeau0401e892017-08-31 00:37:11 -04003033 block_width * block_height * sizeof(best_palette_color_map[0]));
hui sude0c70a2017-01-09 17:12:17 -08003034 }
3035 *mbmi = *best_mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003036 return rate_overhead;
3037}
3038
hui su5db97432016-10-14 16:10:14 -07003039#if CONFIG_FILTER_INTRA
3040// Return 1 if an filter intra mode is selected; return 0 otherwise.
3041static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
3042 int *rate, int *rate_tokenonly,
3043 int64_t *distortion, int *skippable,
3044 BLOCK_SIZE bsize, int mode_cost,
Yue Chenda2eefc2017-11-16 15:25:28 -08003045 int64_t *best_rd, int64_t *best_model_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003046 MACROBLOCKD *const xd = &x->e_mbd;
3047 MODE_INFO *const mic = xd->mi[0];
3048 MB_MODE_INFO *mbmi = &mic->mbmi;
hui su5db97432016-10-14 16:10:14 -07003049 int filter_intra_selected_flag = 0;
hui su5db97432016-10-14 16:10:14 -07003050 FILTER_INTRA_MODE mode;
Yue Chen18f6c152017-11-06 11:23:47 -08003051 TX_SIZE best_tx_size = TX_8X8;
hui su5db97432016-10-14 16:10:14 -07003052 FILTER_INTRA_MODE_INFO filter_intra_mode_info;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003053 TX_TYPE best_tx_type;
3054
hui su5db97432016-10-14 16:10:14 -07003055 av1_zero(filter_intra_mode_info);
3056 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003057 mbmi->mode = DC_PRED;
3058 mbmi->palette_mode_info.palette_size[0] = 0;
3059
3060 for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
hui su8f4cc0a2017-01-13 15:14:49 -08003061 int this_rate;
3062 int64_t this_rd, this_model_rd;
3063 RD_STATS tokenonly_rd_stats;
hui su5db97432016-10-14 16:10:14 -07003064 mbmi->filter_intra_mode_info.filter_intra_mode[0] = mode;
hui su8f4cc0a2017-01-13 15:14:49 -08003065 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3066 if (*best_model_rd != INT64_MAX &&
3067 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3068 continue;
3069 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003070 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
3071 if (tokenonly_rd_stats.rate == INT_MAX) continue;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003072 this_rate = tokenonly_rd_stats.rate +
Yue Chen4eba69b2017-11-09 22:37:35 -08003073 x->filter_intra_cost[mbmi->tx_size][1] +
Yue Chen63ce36f2017-10-10 23:37:31 -07003074 x->filter_intra_mode_cost[0][mode] + mode_cost;
Urvang Joshi70006e42017-06-14 16:08:55 -07003075 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003076
3077 if (this_rd < *best_rd) {
3078 *best_rd = this_rd;
3079 best_tx_size = mic->mbmi.tx_size;
hui su5db97432016-10-14 16:10:14 -07003080 filter_intra_mode_info = mbmi->filter_intra_mode_info;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003081 best_tx_type = mic->mbmi.tx_type;
3082 *rate = this_rate;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003083 *rate_tokenonly = tokenonly_rd_stats.rate;
3084 *distortion = tokenonly_rd_stats.dist;
3085 *skippable = tokenonly_rd_stats.skip;
hui su5db97432016-10-14 16:10:14 -07003086 filter_intra_selected_flag = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003087 }
3088 }
3089
hui su5db97432016-10-14 16:10:14 -07003090 if (filter_intra_selected_flag) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003091 mbmi->mode = DC_PRED;
3092 mbmi->tx_size = best_tx_size;
hui su5db97432016-10-14 16:10:14 -07003093 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] =
3094 filter_intra_mode_info.use_filter_intra_mode[0];
3095 mbmi->filter_intra_mode_info.filter_intra_mode[0] =
3096 filter_intra_mode_info.filter_intra_mode[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003097 mbmi->tx_type = best_tx_type;
3098 return 1;
3099 } else {
3100 return 0;
3101 }
3102}
hui su5db97432016-10-14 16:10:14 -07003103#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003104
hui su5db97432016-10-14 16:10:14 -07003105#if CONFIG_EXT_INTRA
hui su45dc5972016-12-08 17:42:50 -08003106// Run RD calculation with given luma intra prediction angle., and return
3107// the RD cost. Update the best mode info. if the RD cost is the best so far.
3108static int64_t calc_rd_given_intra_angle(
3109 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
3110 int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
3111 RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
Angie Chiang53bf1e92017-11-29 16:53:07 -08003112 TX_TYPE *best_tx_type, int64_t *best_rd, int64_t *best_model_rd,
3113 TX_TYPE *best_txk_type) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003114 int this_rate;
3115 RD_STATS tokenonly_rd_stats;
hui su9a416f52017-01-13 11:37:53 -08003116 int64_t this_rd, this_model_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003117 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07003118 assert(!is_inter_block(mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07003119
Angie Chiang53bf1e92017-11-29 16:53:07 -08003120#if !CONFIG_TXK_SEL
3121 (void)best_txk_type;
3122#endif
3123
hui su45dc5972016-12-08 17:42:50 -08003124 mbmi->angle_delta[0] = angle_delta;
hui su9a416f52017-01-13 11:37:53 -08003125 this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost);
3126 if (*best_model_rd != INT64_MAX &&
3127 this_model_rd > *best_model_rd + (*best_model_rd >> 1))
3128 return INT64_MAX;
3129 if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
hui su45dc5972016-12-08 17:42:50 -08003130 super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in);
3131 if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
3132
3133 this_rate = tokenonly_rd_stats.rate + mode_cost +
Joe Young3ca43bf2017-10-06 15:12:46 -07003134#if CONFIG_EXT_INTRA_MOD
3135 x->angle_delta_cost[mbmi->mode - V_PRED]
3136 [max_angle_delta + mbmi->angle_delta[0]];
3137#else
hui su45dc5972016-12-08 17:42:50 -08003138 write_uniform_cost(2 * max_angle_delta + 1,
3139 mbmi->angle_delta[0] + max_angle_delta);
Joe Young3ca43bf2017-10-06 15:12:46 -07003140#endif // CONFIG_EXT_INTRA_MOD
Urvang Joshi70006e42017-06-14 16:08:55 -07003141 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003142
3143 if (this_rd < *best_rd) {
Angie Chiang53bf1e92017-11-29 16:53:07 -08003144#if CONFIG_TXK_SEL
3145 memcpy(best_txk_type, mbmi->txk_type,
3146 sizeof(*best_txk_type) *
3147 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
3148#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003149 *best_rd = this_rd;
3150 *best_angle_delta = mbmi->angle_delta[0];
3151 *best_tx_size = mbmi->tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003152 *best_tx_type = mbmi->tx_type;
3153 *rate = this_rate;
hui su45dc5972016-12-08 17:42:50 -08003154 rd_stats->rate = tokenonly_rd_stats.rate;
3155 rd_stats->dist = tokenonly_rd_stats.dist;
3156 rd_stats->skip = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003157 }
hui su45dc5972016-12-08 17:42:50 -08003158 return this_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003159}
3160
hui su45dc5972016-12-08 17:42:50 -08003161// With given luma directional intra prediction mode, pick the best angle delta
3162// Return the RD cost corresponding to the best angle delta.
Urvang Joshi52648442016-10-13 17:27:51 -07003163static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
hui su45dc5972016-12-08 17:42:50 -08003164 int *rate, RD_STATS *rd_stats,
3165 BLOCK_SIZE bsize, int mode_cost,
hui su9a416f52017-01-13 11:37:53 -08003166 int64_t best_rd,
3167 int64_t *best_model_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003168 MACROBLOCKD *const xd = &x->e_mbd;
3169 MODE_INFO *const mic = xd->mi[0];
3170 MB_MODE_INFO *mbmi = &mic->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07003171 assert(!is_inter_block(mbmi));
hui su45dc5972016-12-08 17:42:50 -08003172 int i, angle_delta, best_angle_delta = 0;
hui su45dc5972016-12-08 17:42:50 -08003173 int first_try = 1;
Debargha Mukherjeedf0e0d72017-04-27 15:16:53 -07003174 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003175 TX_SIZE best_tx_size = mic->mbmi.tx_size;
3176 TX_TYPE best_tx_type = mbmi->tx_type;
Angie Chiang53bf1e92017-11-29 16:53:07 -08003177#if CONFIG_TXK_SEL
3178 TX_TYPE best_txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
3179#else
3180 TX_TYPE *best_txk_type = NULL;
3181#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003182
hui su0a6731f2017-04-26 15:23:47 -07003183 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003184
hui su0a6731f2017-04-26 15:23:47 -07003185 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
Hui Su259d4422017-10-13 10:08:17 -07003186 for (i = 0; i < 2; ++i) {
3187 best_rd_in = (best_rd == INT64_MAX)
3188 ? INT64_MAX
3189 : (best_rd + (best_rd >> (first_try ? 3 : 5)));
3190 this_rd = calc_rd_given_intra_angle(
3191 cpi, x, bsize, mode_cost, best_rd_in, (1 - 2 * i) * angle_delta,
3192 MAX_ANGLE_DELTA, rate, rd_stats, &best_angle_delta, &best_tx_size,
Angie Chiang53bf1e92017-11-29 16:53:07 -08003193 &best_tx_type, &best_rd, best_model_rd, best_txk_type);
Hui Su259d4422017-10-13 10:08:17 -07003194 rd_cost[2 * angle_delta + i] = this_rd;
3195 if (first_try && this_rd == INT64_MAX) return best_rd;
3196 first_try = 0;
3197 if (angle_delta == 0) {
3198 rd_cost[1] = this_rd;
3199 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003200 }
3201 }
hui su45dc5972016-12-08 17:42:50 -08003202 }
3203
3204 assert(best_rd != INT64_MAX);
hui su0a6731f2017-04-26 15:23:47 -07003205 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
hui su45dc5972016-12-08 17:42:50 -08003206 int64_t rd_thresh;
Hui Su259d4422017-10-13 10:08:17 -07003207 for (i = 0; i < 2; ++i) {
3208 int skip_search = 0;
3209 rd_thresh = best_rd + (best_rd >> 5);
3210 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
3211 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
3212 skip_search = 1;
3213 if (!skip_search) {
Sebastien Alaiwan7fc6b2a2017-11-02 18:14:50 +01003214 calc_rd_given_intra_angle(
3215 cpi, x, bsize, mode_cost, best_rd, (1 - 2 * i) * angle_delta,
3216 MAX_ANGLE_DELTA, rate, rd_stats, &best_angle_delta, &best_tx_size,
Angie Chiang53bf1e92017-11-29 16:53:07 -08003217 &best_tx_type, &best_rd, best_model_rd, best_txk_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003218 }
3219 }
3220 }
3221
3222 mbmi->tx_size = best_tx_size;
3223 mbmi->angle_delta[0] = best_angle_delta;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003224 mbmi->tx_type = best_tx_type;
Angie Chiang53bf1e92017-11-29 16:53:07 -08003225#if CONFIG_TXK_SEL
3226 memcpy(mbmi->txk_type, best_txk_type,
3227 sizeof(*best_txk_type) *
3228 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
3229#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003230 return best_rd;
3231}
3232
3233// Indices are sign, integer, and fractional part of the gradient value
3234static const uint8_t gradient_to_angle_bin[2][7][16] = {
3235 {
3236 { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 },
3237 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
3238 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3239 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3240 { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
3241 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3242 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3243 },
3244 {
3245 { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 },
3246 { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 },
3247 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3248 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3249 { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 },
3250 { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3251 { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 },
3252 },
3253};
3254
Yue Chen56e226e2017-05-02 16:21:40 -07003255/* clang-format off */
Yaowu Xuc27fc142016-08-22 16:08:15 -07003256static const uint8_t mode_to_angle_bin[INTRA_MODES] = {
3257 0, 2, 6, 0, 4, 3, 5, 7, 1, 0,
hui su9cc10652017-04-27 17:22:07 -07003258 0,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003259};
Yue Chen56e226e2017-05-02 16:21:40 -07003260/* clang-format on */
Yaowu Xuc27fc142016-08-22 16:08:15 -07003261
3262static void angle_estimation(const uint8_t *src, int src_stride, int rows,
hui su9cc10652017-04-27 17:22:07 -07003263 int cols, BLOCK_SIZE bsize,
3264 uint8_t *directional_mode_skip_mask) {
3265 memset(directional_mode_skip_mask, 0,
3266 INTRA_MODES * sizeof(*directional_mode_skip_mask));
Joe Young830d4ce2017-05-30 17:48:13 -07003267 // Check if angle_delta is used
3268 if (!av1_use_angle_delta(bsize)) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003269 uint64_t hist[DIRECTIONAL_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003270 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3271 src += src_stride;
hui su9cc10652017-04-27 17:22:07 -07003272 int r, c, dx, dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003273 for (r = 1; r < rows; ++r) {
3274 for (c = 1; c < cols; ++c) {
3275 dx = src[c] - src[c - 1];
3276 dy = src[c] - src[c - src_stride];
hui su9cc10652017-04-27 17:22:07 -07003277 int index;
3278 const int temp = dx * dx + dy * dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003279 if (dy == 0) {
3280 index = 2;
3281 } else {
hui su9cc10652017-04-27 17:22:07 -07003282 const int sn = (dx > 0) ^ (dy > 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003283 dx = abs(dx);
3284 dy = abs(dy);
hui su9cc10652017-04-27 17:22:07 -07003285 const int remd = (dx % dy) * 16 / dy;
3286 const int quot = dx / dy;
Yaowu Xuf883b422016-08-30 14:01:10 -07003287 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003288 }
3289 hist[index] += temp;
3290 }
3291 src += src_stride;
3292 }
3293
hui su9cc10652017-04-27 17:22:07 -07003294 int i;
3295 uint64_t hist_sum = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003296 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3297 for (i = 0; i < INTRA_MODES; ++i) {
hui su9cc10652017-04-27 17:22:07 -07003298 if (av1_is_directional_mode(i, bsize)) {
Urvang Joshida70e7b2016-10-19 11:48:54 -07003299 const uint8_t angle_bin = mode_to_angle_bin[i];
3300 uint64_t score = 2 * hist[angle_bin];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003301 int weight = 2;
Urvang Joshida70e7b2016-10-19 11:48:54 -07003302 if (angle_bin > 0) {
3303 score += hist[angle_bin - 1];
3304 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003305 }
Urvang Joshida70e7b2016-10-19 11:48:54 -07003306 if (angle_bin < DIRECTIONAL_MODES - 1) {
3307 score += hist[angle_bin + 1];
3308 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003309 }
3310 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3311 directional_mode_skip_mask[i] = 1;
3312 }
3313 }
3314}
3315
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003316#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003317static void highbd_angle_estimation(const uint8_t *src8, int src_stride,
hui su9cc10652017-04-27 17:22:07 -07003318 int rows, int cols, BLOCK_SIZE bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003319 uint8_t *directional_mode_skip_mask) {
hui su9cc10652017-04-27 17:22:07 -07003320 memset(directional_mode_skip_mask, 0,
3321 INTRA_MODES * sizeof(*directional_mode_skip_mask));
Joe Young830d4ce2017-05-30 17:48:13 -07003322 // Check if angle_delta is used
3323 if (!av1_use_angle_delta(bsize)) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003324 uint16_t *src = CONVERT_TO_SHORTPTR(src8);
hui su9cc10652017-04-27 17:22:07 -07003325 uint64_t hist[DIRECTIONAL_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003326 memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0]));
3327 src += src_stride;
hui su9cc10652017-04-27 17:22:07 -07003328 int r, c, dx, dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003329 for (r = 1; r < rows; ++r) {
3330 for (c = 1; c < cols; ++c) {
3331 dx = src[c] - src[c - 1];
3332 dy = src[c] - src[c - src_stride];
hui su9cc10652017-04-27 17:22:07 -07003333 int index;
3334 const int temp = dx * dx + dy * dy;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003335 if (dy == 0) {
3336 index = 2;
3337 } else {
hui su9cc10652017-04-27 17:22:07 -07003338 const int sn = (dx > 0) ^ (dy > 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003339 dx = abs(dx);
3340 dy = abs(dy);
hui su9cc10652017-04-27 17:22:07 -07003341 const int remd = (dx % dy) * 16 / dy;
3342 const int quot = dx / dy;
Yaowu Xuf883b422016-08-30 14:01:10 -07003343 index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003344 }
3345 hist[index] += temp;
3346 }
3347 src += src_stride;
3348 }
3349
hui su9cc10652017-04-27 17:22:07 -07003350 int i;
3351 uint64_t hist_sum = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003352 for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i];
3353 for (i = 0; i < INTRA_MODES; ++i) {
hui su9cc10652017-04-27 17:22:07 -07003354 if (av1_is_directional_mode(i, bsize)) {
Urvang Joshida70e7b2016-10-19 11:48:54 -07003355 const uint8_t angle_bin = mode_to_angle_bin[i];
3356 uint64_t score = 2 * hist[angle_bin];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003357 int weight = 2;
Urvang Joshida70e7b2016-10-19 11:48:54 -07003358 if (angle_bin > 0) {
3359 score += hist[angle_bin - 1];
3360 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003361 }
Urvang Joshida70e7b2016-10-19 11:48:54 -07003362 if (angle_bin < DIRECTIONAL_MODES - 1) {
3363 score += hist[angle_bin + 1];
3364 ++weight;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003365 }
3366 if (score * ANGLE_SKIP_THRESH < hist_sum * weight)
3367 directional_mode_skip_mask[i] = 1;
3368 }
3369 }
3370}
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003371#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003372#endif // CONFIG_EXT_INTRA
3373
3374// This function is used only for intra_only frames
Urvang Joshi52648442016-10-13 17:27:51 -07003375static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
3376 int *rate, int *rate_tokenonly,
3377 int64_t *distortion, int *skippable,
3378 BLOCK_SIZE bsize, int64_t best_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003379 MACROBLOCKD *const xd = &x->e_mbd;
3380 MODE_INFO *const mic = xd->mi[0];
hui sude0c70a2017-01-09 17:12:17 -08003381 MB_MODE_INFO *const mbmi = &mic->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07003382 assert(!is_inter_block(mbmi));
hui sude0c70a2017-01-09 17:12:17 -08003383 MB_MODE_INFO best_mbmi = *mbmi;
hui su308a6392017-01-12 14:49:57 -08003384 int64_t best_model_rd = INT64_MAX;
hui sude0c70a2017-01-09 17:12:17 -08003385#if CONFIG_EXT_INTRA
Jingning Hanae5cfde2016-11-30 12:01:44 -08003386 const int rows = block_size_high[bsize];
3387 const int cols = block_size_wide[bsize];
hui sude0c70a2017-01-09 17:12:17 -08003388 int is_directional_mode;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003389 uint8_t directional_mode_skip_mask[INTRA_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003390 const int src_stride = x->plane[0].src.stride;
3391 const uint8_t *src = x->plane[0].src.buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003392#endif // CONFIG_EXT_INTRA
hui su5db97432016-10-14 16:10:14 -07003393#if CONFIG_FILTER_INTRA
3394 int beat_best_rd = 0;
hui su5db97432016-10-14 16:10:14 -07003395#endif // CONFIG_FILTER_INTRA
Urvang Joshi52648442016-10-13 17:27:51 -07003396 const int *bmode_costs;
hui sude0c70a2017-01-09 17:12:17 -08003397 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
Urvang Joshi23a61112017-01-30 14:59:27 -08003398 int palette_y_mode_ctx = 0;
Hui Sue87fb232017-10-05 15:00:15 -07003399 const int try_palette =
3400 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
3401 uint8_t *best_palette_color_map =
3402 try_palette ? x->palette_buffer->best_palette_color_map : NULL;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003403 const MODE_INFO *above_mi = xd->above_mi;
3404 const MODE_INFO *left_mi = xd->left_mi;
Yaowu Xuf883b422016-08-30 14:01:10 -07003405 const PREDICTION_MODE A = av1_above_block_mode(mic, above_mi, 0);
3406 const PREDICTION_MODE L = av1_left_block_mode(mic, left_mi, 0);
Urvang Joshi96d1c0a2017-10-10 13:15:32 -07003407 const PREDICTION_MODE FINAL_MODE_SEARCH = PAETH_PRED + 1;
Jingning Hana45d8422017-10-05 09:57:38 -07003408
3409#if CONFIG_KF_CTX
3410 const int above_ctx = intra_mode_context[A];
3411 const int left_ctx = intra_mode_context[L];
3412 bmode_costs = x->y_mode_costs[above_ctx][left_ctx];
3413#else
Yue Chenb23d00a2017-07-28 17:01:21 -07003414 bmode_costs = x->y_mode_costs[A][L];
Jingning Hana45d8422017-10-05 09:57:38 -07003415#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003416
3417#if CONFIG_EXT_INTRA
hui sude0c70a2017-01-09 17:12:17 -08003418 mbmi->angle_delta[0] = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003419#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003420 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
hui su9cc10652017-04-27 17:22:07 -07003421 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07003422 directional_mode_skip_mask);
3423 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003424#endif // CONFIG_HIGHBITDEPTH
hui su9cc10652017-04-27 17:22:07 -07003425 angle_estimation(src, src_stride, rows, cols, bsize,
3426 directional_mode_skip_mask);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003427#endif // CONFIG_EXT_INTRA
hui su5db97432016-10-14 16:10:14 -07003428#if CONFIG_FILTER_INTRA
hui sude0c70a2017-01-09 17:12:17 -08003429 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
hui su5db97432016-10-14 16:10:14 -07003430#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003431 pmi->palette_size[0] = 0;
Hui Sue87fb232017-10-05 15:00:15 -07003432 if (try_palette) {
3433 if (above_mi) {
3434 palette_y_mode_ctx +=
3435 (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
3436 }
3437 if (left_mi) {
3438 palette_y_mode_ctx +=
3439 (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
3440 }
3441 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003442
3443 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
3444 x->use_default_intra_tx_type = 1;
3445 else
3446 x->use_default_intra_tx_type = 0;
3447
3448 /* Y Search for intra prediction mode */
hui su8a516a82017-07-06 10:00:36 -07003449 for (int mode_idx = DC_PRED; mode_idx <= FINAL_MODE_SEARCH; ++mode_idx) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003450 RD_STATS this_rd_stats;
hui su308a6392017-01-12 14:49:57 -08003451 int this_rate, this_rate_tokenonly, s;
3452 int64_t this_distortion, this_rd, this_model_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003453 if (mode_idx == FINAL_MODE_SEARCH) {
3454 if (x->use_default_intra_tx_type == 0) break;
hui sude0c70a2017-01-09 17:12:17 -08003455 mbmi->mode = best_mbmi.mode;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003456 x->use_default_intra_tx_type = 0;
3457 } else {
hui su8a516a82017-07-06 10:00:36 -07003458 assert(mode_idx < INTRA_MODES);
3459 mbmi->mode = intra_rd_search_mode_order[mode_idx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003460 }
Yushin Chod0b77ac2017-10-20 17:33:16 -07003461
Yaowu Xuc27fc142016-08-22 16:08:15 -07003462#if CONFIG_EXT_INTRA
hui su308a6392017-01-12 14:49:57 -08003463 mbmi->angle_delta[0] = 0;
3464#endif // CONFIG_EXT_INTRA
hui su9a416f52017-01-13 11:37:53 -08003465 this_model_rd = intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode]);
hui su308a6392017-01-12 14:49:57 -08003466 if (best_model_rd != INT64_MAX &&
3467 this_model_rd > best_model_rd + (best_model_rd >> 1))
3468 continue;
3469 if (this_model_rd < best_model_rd) best_model_rd = this_model_rd;
3470#if CONFIG_EXT_INTRA
hui sude0c70a2017-01-09 17:12:17 -08003471 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
3472 if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
Joe Young830d4ce2017-05-30 17:48:13 -07003473 if (is_directional_mode && av1_use_angle_delta(bsize)) {
hui su45dc5972016-12-08 17:42:50 -08003474 this_rd_stats.rate = INT_MAX;
Yue Chenb0f808b2017-04-26 11:55:14 -07003475 rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
3476 bmode_costs[mbmi->mode], best_rd, &best_model_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003477 } else {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003478 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003479 }
3480#else
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003481 super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
hui su45dc5972016-12-08 17:42:50 -08003482#endif // CONFIG_EXT_INTRA
Angie Chiang0e9a2e92016-11-08 09:45:40 -08003483 this_rate_tokenonly = this_rd_stats.rate;
3484 this_distortion = this_rd_stats.dist;
3485 s = this_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003486
3487 if (this_rate_tokenonly == INT_MAX) continue;
3488
hui sude0c70a2017-01-09 17:12:17 -08003489 this_rate = this_rate_tokenonly + bmode_costs[mbmi->mode];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003490
Rupert Swarbrickfcff0b22017-10-05 09:26:04 +01003491 if (!xd->lossless[mbmi->segment_id] &&
3492 block_signals_txsize(mbmi->sb_type)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003493 // super_block_yrd above includes the cost of the tx_size in the
3494 // tokenonly rate, but for intra blocks, tx_size is always coded
3495 // (prediction granularity), so we account for it in the full rate,
3496 // not the tokenonly rate.
Yue Chen3dd03e32017-10-17 15:39:52 -07003497 this_rate_tokenonly -=
3498 tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003499 }
hui su9bc1d8d2017-03-24 12:36:03 -07003500 if (try_palette && mbmi->mode == DC_PRED) {
Urvang Joshi23a61112017-01-30 14:59:27 -08003501 this_rate +=
Yue Chendab2ca92017-10-16 17:48:48 -07003502 x->palette_y_mode_cost[bsize - BLOCK_8X8][palette_y_mode_ctx][0];
hui su9bc1d8d2017-03-24 12:36:03 -07003503 }
hui su5db97432016-10-14 16:10:14 -07003504#if CONFIG_FILTER_INTRA
Yue Chen95e13e22017-11-01 23:56:35 -07003505 if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_txsize(mbmi->tx_size))
Yue Chen4eba69b2017-11-09 22:37:35 -08003506 this_rate += x->filter_intra_cost[mbmi->tx_size][0];
hui su5db97432016-10-14 16:10:14 -07003507#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003508#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003509 if (is_directional_mode) {
Joe Young830d4ce2017-05-30 17:48:13 -07003510 if (av1_use_angle_delta(bsize)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07003511#if CONFIG_EXT_INTRA_MOD
3512 this_rate +=
3513 x->angle_delta_cost[mbmi->mode - V_PRED]
3514 [MAX_ANGLE_DELTA + mbmi->angle_delta[0]];
3515#else
Joe Young830d4ce2017-05-30 17:48:13 -07003516 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
3517 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
Joe Young3ca43bf2017-10-06 15:12:46 -07003518#endif // CONFIG_EXT_INTRA_MOD
Joe Young830d4ce2017-05-30 17:48:13 -07003519 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003520 }
3521#endif // CONFIG_EXT_INTRA
Hui Su8dc00922017-09-14 16:15:55 -07003522#if CONFIG_INTRABC
3523 if (bsize >= BLOCK_8X8 && cpi->common.allow_screen_content_tools)
3524 this_rate += x->intrabc_cost[0];
3525#endif // CONFIG_INTRABC
Urvang Joshi70006e42017-06-14 16:08:55 -07003526 this_rd = RDCOST(x->rdmult, this_rate, this_distortion);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003527
3528 if (this_rd < best_rd) {
hui sude0c70a2017-01-09 17:12:17 -08003529 best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003530 best_rd = this_rd;
hui su5db97432016-10-14 16:10:14 -07003531#if CONFIG_FILTER_INTRA
3532 beat_best_rd = 1;
3533#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003534 *rate = this_rate;
3535 *rate_tokenonly = this_rate_tokenonly;
3536 *distortion = this_distortion;
3537 *skippable = s;
3538 }
3539 }
3540
hui su9bc1d8d2017-03-24 12:36:03 -07003541 if (try_palette) {
Urvang Joshi23a61112017-01-30 14:59:27 -08003542 rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
3543 bmode_costs[DC_PRED], &best_mbmi,
3544 best_palette_color_map, &best_rd, &best_model_rd,
3545 rate, rate_tokenonly, distortion, skippable);
hui sude0c70a2017-01-09 17:12:17 -08003546 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003547
hui su5db97432016-10-14 16:10:14 -07003548#if CONFIG_FILTER_INTRA
Yue Chen18f6c152017-11-06 11:23:47 -08003549 if (beat_best_rd && av1_filter_intra_allowed_bsize(bsize) &&
3550 !xd->lossless[mbmi->segment_id]) {
hui su5db97432016-10-14 16:10:14 -07003551 if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
3552 skippable, bsize, bmode_costs[DC_PRED],
Yue Chenda2eefc2017-11-16 15:25:28 -08003553 &best_rd, &best_model_rd)) {
hui sude0c70a2017-01-09 17:12:17 -08003554 best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003555 }
3556 }
hui su5db97432016-10-14 16:10:14 -07003557#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07003558
hui sude0c70a2017-01-09 17:12:17 -08003559 *mbmi = best_mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003560 return best_rd;
3561}
3562
Yue Chena1e48dc2016-08-29 17:29:33 -07003563// Return value 0: early termination triggered, no valid rd cost available;
3564// 1: rd cost values are valid.
Angie Chiang284d7772016-11-08 11:06:45 -08003565static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x,
3566 RD_STATS *rd_stats, BLOCK_SIZE bsize,
3567 int64_t ref_best_rd) {
Yue Chena1e48dc2016-08-29 17:29:33 -07003568 MACROBLOCKD *const xd = &x->e_mbd;
3569 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Luc Trudeau52301a22017-11-29 11:02:55 -05003570 struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_U];
3571 const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, pd);
Yue Chena1e48dc2016-08-29 17:29:33 -07003572 int plane;
Yue Chena1e48dc2016-08-29 17:29:33 -07003573 int is_cost_valid = 1;
Angie Chiang284d7772016-11-08 11:06:45 -08003574 av1_init_rd_stats(rd_stats);
Yue Chena1e48dc2016-08-29 17:29:33 -07003575
3576 if (ref_best_rd < 0) is_cost_valid = 0;
Jingning Han9ce464c2017-02-20 15:36:30 -08003577
Jingning Han9ce464c2017-02-20 15:36:30 -08003578 if (x->skip_chroma_rd) return is_cost_valid;
Jingning Han2d2dac22017-04-11 09:41:10 -07003579
Luc Trudeau52301a22017-11-29 11:02:55 -05003580 bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y);
Jingning Han9ce464c2017-02-20 15:36:30 -08003581
Yue Chena1e48dc2016-08-29 17:29:33 -07003582 if (is_inter_block(mbmi) && is_cost_valid) {
Yue Chena1e48dc2016-08-29 17:29:33 -07003583 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
3584 av1_subtract_plane(x, bsize, plane);
3585 }
Yue Chena1e48dc2016-08-29 17:29:33 -07003586
Yushin Cho09de28b2016-06-21 14:51:23 -07003587 if (is_cost_valid) {
3588 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
Angie Chiang7c2b7f22016-11-07 16:00:00 -08003589 RD_STATS pn_rd_stats;
3590 txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize,
3591 uv_tx_size, cpi->sf.use_fast_coef_costing);
3592 if (pn_rd_stats.rate == INT_MAX) {
Yushin Cho09de28b2016-06-21 14:51:23 -07003593 is_cost_valid = 0;
3594 break;
3595 }
Angie Chiang284d7772016-11-08 11:06:45 -08003596 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Urvang Joshi70006e42017-06-14 16:08:55 -07003597 if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd &&
3598 RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) {
Yushin Cho09de28b2016-06-21 14:51:23 -07003599 is_cost_valid = 0;
3600 break;
3601 }
Yue Chena1e48dc2016-08-29 17:29:33 -07003602 }
3603 }
3604
3605 if (!is_cost_valid) {
3606 // reset cost value
Angie Chiang284d7772016-11-08 11:06:45 -08003607 av1_invalid_rd_stats(rd_stats);
Yue Chena1e48dc2016-08-29 17:29:33 -07003608 }
3609
3610 return is_cost_valid;
3611}
3612
Yaowu Xuf883b422016-08-30 14:01:10 -07003613void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
3614 int blk_row, int blk_col, int plane, int block,
Angie Chiang77368af2017-03-23 16:22:07 -07003615 int plane_bsize, const ENTROPY_CONTEXT *a,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003616 const ENTROPY_CONTEXT *l, RD_STATS *rd_stats, int fast,
3617 TX_SIZE_RD_INFO *rd_info_array) {
Angie Chiang22ba7512016-10-20 17:10:33 -07003618 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003619 MACROBLOCKD *xd = &x->e_mbd;
3620 const struct macroblock_plane *const p = &x->plane[plane];
3621 struct macroblockd_plane *const pd = &xd->plane[plane];
Yaowu Xu5f2749b2017-11-02 09:12:10 -07003622#if DISABLE_TRELLISQ_SEARCH
3623 (void)fast;
3624#endif
Jingning Han243b66b2017-06-23 12:11:47 -07003625#if CONFIG_TXK_SEL
3626 av1_search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize,
3627 tx_size, a, l, 0, rd_stats);
3628 return;
3629#endif
Debargha Mukherjeed2cfbef2017-12-03 16:15:27 -08003630 // This function is used only for inter
3631 assert(is_inter_block(&xd->mi[0]->mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07003632 int64_t tmp;
3633 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
Luc Trudeau005feb62017-02-22 13:34:01 -05003634 PLANE_TYPE plane_type = get_plane_type(plane);
Jingning Han19b5c8f2017-07-06 15:10:12 -07003635 TX_TYPE tx_type =
3636 av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
Urvang Joshi03f6fdc2016-10-14 15:53:39 -07003637 const SCAN_ORDER *const scan_order =
Angie Chiangbd99b382017-06-20 15:11:16 -07003638 get_scan(cm, tx_size, tx_type, &xd->mi[0]->mbmi);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003639 BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size];
Jingning Han9fdc4222016-10-27 21:32:19 -07003640 int bh = block_size_high[txm_bsize];
3641 int bw = block_size_wide[txm_bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003642 int src_stride = p->src.stride;
Jingning Han9ca05b72017-01-03 14:41:36 -08003643 uint8_t *src =
3644 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
3645 uint8_t *dst =
3646 &pd->dst
3647 .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003648#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003649 DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
3650 uint8_t *rec_buffer;
3651#else
3652 DECLARE_ALIGNED(16, uint8_t, rec_buffer[MAX_TX_SQUARE]);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003653#endif // CONFIG_HIGHBITDEPTH
Timothy B. Terriberryd62e2a32017-06-10 16:04:21 -07003654 const int diff_stride = block_size_wide[plane_bsize];
Jingning Han9ca05b72017-01-03 14:41:36 -08003655 const int16_t *diff =
3656 &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
Angie Chiangd81fdb42016-11-03 12:20:58 -07003657 int txb_coeff_cost;
Jingning Hand3fada82016-11-22 10:46:55 -08003658
3659 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003660
Jingning Han45027c62017-12-11 11:47:15 -08003661#if CONFIG_LV_MAP
3662 TXB_CTX txb_ctx;
3663 get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx);
3664 uint16_t cur_joint_ctx = (txb_ctx.dc_sign_ctx << 8) + txb_ctx.txb_skip_ctx;
3665#else
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003666 const int coeff_ctx = get_entropy_context(tx_size, a, l);
3667 const int coeff_ctx_one_byte = combine_entropy_contexts(*a, *l);
3668 const uint8_t cur_joint_ctx = (coeff_ctx << 2) + coeff_ctx_one_byte;
Jingning Han45027c62017-12-11 11:47:15 -08003669#endif
Angie Chiang77368af2017-03-23 16:22:07 -07003670
Monty Montgomery4a05a582017-11-01 21:21:07 -04003671 // Note: tmp below is pixel distortion, not TX domain
Jingning Han1a7f0a82017-07-27 09:48:05 -07003672 tmp = pixel_diff_dist(x, plane, diff, diff_stride, blk_row, blk_col,
3673 plane_bsize, txm_bsize);
3674
3675#if CONFIG_HIGHBITDEPTH
3676 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
3677 tmp = ROUND_POWER_OF_TWO(tmp, (xd->bd - 8) * 2);
3678#endif // CONFIG_HIGHBITDEPTH
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003679
Jingning Han1a7f0a82017-07-27 09:48:05 -07003680 rd_stats->sse += tmp << 4;
3681
3682 if (rd_stats->invalid_rate) {
3683 rd_stats->dist += tmp << 4;
3684 rd_stats->rate += rd_stats->zero_rate;
3685 rd_stats->skip = 1;
3686 return;
3687 }
3688
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003689 // Look up RD and terminate early in case when we've already processed exactly
3690 // the same residual with exactly the same entropy context.
3691 if (rd_info_array != NULL && rd_info_array[tx_type].valid &&
3692 rd_info_array[tx_type].entropy_context == cur_joint_ctx &&
3693 rd_info_array[tx_type].fast == fast) {
3694 rd_stats->dist += rd_info_array[tx_type].dist;
3695 rd_stats->rate += rd_info_array[tx_type].rate;
Hui Su8c2b9132017-12-09 10:40:15 -08003696 rd_stats->skip &= rd_info_array[tx_type].eob == 0;
3697 p->eobs[block] = rd_info_array[tx_type].eob;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003698 return;
3699 }
3700
3701 int64_t cur_dist = 0;
3702 int cur_rate = 0;
3703 uint8_t cur_skip = 1;
3704
Angie Chiang808d8592017-04-06 18:36:55 -07003705// TODO(any): Use av1_dist_block to compute distortion
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003706#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003707 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3708 rec_buffer = CONVERT_TO_BYTEPTR(rec_buffer16);
Yaowu Xuf883b422016-08-30 14:01:10 -07003709 aom_highbd_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL,
Jingning Han9fdc4222016-10-27 21:32:19 -07003710 0, NULL, 0, bw, bh, xd->bd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003711 } else {
3712 rec_buffer = (uint8_t *)rec_buffer16;
Yaowu Xuf883b422016-08-30 14:01:10 -07003713 aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0,
Jingning Han9fdc4222016-10-27 21:32:19 -07003714 NULL, 0, bw, bh);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003715 }
3716#else
Yaowu Xuf883b422016-08-30 14:01:10 -07003717 aom_convolve_copy(dst, pd->dst.stride, rec_buffer, MAX_TX_SIZE, NULL, 0, NULL,
Jingning Han9fdc4222016-10-27 21:32:19 -07003718 0, bw, bh);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02003719#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07003720
Jingning Han3bce7542017-07-25 10:53:57 -07003721#if DISABLE_TRELLISQ_SEARCH
3722 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08003723 AV1_XFORM_QUANT_B);
Jingning Han3bce7542017-07-25 10:53:57 -07003724
3725#else
3726 av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size,
Sarah Parkere8d0d4c2017-12-06 15:11:37 -08003727 AV1_XFORM_QUANT_FP);
Jingning Han3bce7542017-07-25 10:53:57 -07003728
Monty Montgomery4a05a582017-11-01 21:21:07 -04003729// TX-domain results need to shift down to Q2/D10 to match pixel
3730// domain distortion values which are in Q2^2
3731#if CONFIG_DAALA_TX
3732 const int shift = (TX_COEFF_DEPTH - 10) * 2;
3733#else
Jingning Han3bce7542017-07-25 10:53:57 -07003734 const int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2;
Monty Montgomery4a05a582017-11-01 21:21:07 -04003735#endif
Jingning Han3bce7542017-07-25 10:53:57 -07003736 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
Urvang Joshi80893152017-10-27 11:51:14 -07003737 const int buffer_length = av1_get_max_eob(tx_size);
Jingning Han1a7f0a82017-07-27 09:48:05 -07003738 int64_t tmp_dist, tmp_sse;
Yunqing Wang24d2d5d2017-09-20 09:45:13 -07003739#if CONFIG_DIST_8X8
Yushin Chof9970a52017-10-13 12:57:13 -07003740 int blk_w = block_size_wide[plane_bsize];
3741 int blk_h = block_size_high[plane_bsize];
Yushin Choc00769a2017-09-14 14:44:30 -07003742 int disable_early_skip =
Yushin Chof9970a52017-10-13 12:57:13 -07003743 x->using_dist_8x8 && plane == 0 && blk_w >= 8 && blk_h >= 8 &&
Yushin Choc00769a2017-09-14 14:44:30 -07003744 (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4) &&
3745 x->tune_metric != AOM_TUNE_PSNR;
Yunqing Wang24d2d5d2017-09-20 09:45:13 -07003746#endif // CONFIG_DIST_8X8
3747
Monty Montgomerya26262c2017-10-31 07:32:13 -04003748#if CONFIG_DAALA_TX
3749 tmp_dist =
3750 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp_sse, xd->bd);
3751#else
Jingning Han3bce7542017-07-25 10:53:57 -07003752#if CONFIG_HIGHBITDEPTH
3753 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07003754 tmp_dist =
3755 av1_highbd_block_error(coeff, dqcoeff, buffer_length, &tmp_sse, xd->bd);
Jingning Han3bce7542017-07-25 10:53:57 -07003756 else
3757#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07003758 tmp_dist = av1_block_error(coeff, dqcoeff, buffer_length, &tmp_sse);
Monty Montgomerya26262c2017-10-31 07:32:13 -04003759#endif
Debargha Mukherjeeb02d2f32017-10-03 11:06:40 -07003760
3761 tmp_dist = RIGHT_SIGNED_SHIFT(tmp_dist, shift);
Jingning Han3bce7542017-07-25 10:53:57 -07003762
Yushin Choa4817a62017-07-27 13:09:43 -07003763 if (
3764#if CONFIG_DIST_8X8
Yushin Choc00769a2017-09-14 14:44:30 -07003765 disable_early_skip ||
Yushin Choa4817a62017-07-27 13:09:43 -07003766#endif
3767 RDCOST(x->rdmult, 0, tmp_dist) < rd_stats->ref_rdcost) {
Jingning Han3bce7542017-07-25 10:53:57 -07003768 av1_optimize_b(cm, x, plane, blk_row, blk_col, block, plane_bsize, tx_size,
Debargha Mukherjee51666862017-10-24 14:29:13 -07003769 a, l, fast);
Jingning Han1a7f0a82017-07-27 09:48:05 -07003770 } else {
3771 rd_stats->rate += rd_stats->zero_rate;
Yushin Cho952eae22017-10-03 16:21:06 -07003772 rd_stats->dist += tmp << 4;
Jingning Han1a7f0a82017-07-27 09:48:05 -07003773 rd_stats->skip = 1;
3774 rd_stats->invalid_rate = 1;
3775 return;
Jingning Han3bce7542017-07-25 10:53:57 -07003776 }
3777#endif // DISABLE_TRELLISQ_SEARCH
3778
Angie Chiang41fffae2017-04-03 10:33:18 -07003779 const int eob = p->eobs[block];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003780
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01003781 av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, rec_buffer,
3782 MAX_TX_SIZE, eob, cm->reduced_tx_set_used);
Angie Chiang41fffae2017-04-03 10:33:18 -07003783 if (eob > 0) {
Yushin Chob7b60c52017-07-14 16:18:52 -07003784#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07003785 if (x->using_dist_8x8 && plane == 0 && (bw < 8 && bh < 8)) {
Yushin Cho04749122017-05-25 14:19:07 -07003786 // Save sub8x8 luma decoded pixels
3787 // since 8x8 luma decoded pixels are not available for daala-dist
3788 // after recursive split of BLOCK_8x8 is done.
3789 const int pred_stride = block_size_wide[plane_bsize];
3790 const int pred_idx = (blk_row * pred_stride + blk_col)
3791 << tx_size_wide_log2[0];
3792 int16_t *decoded = &pd->pred[pred_idx];
3793 int i, j;
3794
Yushin Cho8ab875d2017-06-23 14:47:21 -07003795#if CONFIG_HIGHBITDEPTH
3796 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3797 for (j = 0; j < bh; j++)
3798 for (i = 0; i < bw; i++)
3799 decoded[j * pred_stride + i] =
3800 CONVERT_TO_SHORTPTR(rec_buffer)[j * MAX_TX_SIZE + i];
3801 } else {
3802#endif
3803 for (j = 0; j < bh; j++)
3804 for (i = 0; i < bw; i++)
3805 decoded[j * pred_stride + i] = rec_buffer[j * MAX_TX_SIZE + i];
3806#if CONFIG_HIGHBITDEPTH
3807 }
3808#endif // CONFIG_HIGHBITDEPTH
Yushin Cho04749122017-05-25 14:19:07 -07003809 }
Yushin Chob7b60c52017-07-14 16:18:52 -07003810#endif // CONFIG_DIST_8X8
Yushin Cho75b01002017-06-21 13:43:57 -07003811 tmp = pixel_dist(cpi, x, plane, src, src_stride, rec_buffer, MAX_TX_SIZE,
3812 blk_row, blk_col, plane_bsize, txm_bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003813 }
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003814 cur_dist = tmp * 16;
Jingning Han7eab9ff2017-07-06 10:12:54 -07003815 txb_coeff_cost = av1_cost_coeffs(cpi, x, plane, blk_row, blk_col, block,
3816 tx_size, scan_order, a, l, 0);
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003817 cur_rate = txb_coeff_cost;
3818 cur_skip = (eob == 0);
3819
3820 // Save RD results for possible reuse in future.
3821 if (rd_info_array != NULL) {
3822 rd_info_array[tx_type].valid = 1;
3823 rd_info_array[tx_type].entropy_context = cur_joint_ctx;
3824 rd_info_array[tx_type].fast = fast;
3825 rd_info_array[tx_type].dist = cur_dist;
3826 rd_info_array[tx_type].rate = cur_rate;
Hui Su8c2b9132017-12-09 10:40:15 -08003827 rd_info_array[tx_type].eob = eob;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003828 }
3829
3830 rd_stats->dist += cur_dist;
3831 rd_stats->rate += cur_rate;
3832 rd_stats->skip &= cur_skip;
Jingning Han63cbf342016-11-09 15:37:48 -08003833
Angie Chiangd81fdb42016-11-03 12:20:58 -07003834#if CONFIG_RD_DEBUG
Angie Chiange94556b2016-11-09 10:59:30 -08003835 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
3836 txb_coeff_cost);
Fergus Simpson4063a682017-02-28 16:52:22 -08003837#endif // CONFIG_RD_DEBUG
Yaowu Xuc27fc142016-08-22 16:08:15 -07003838}
3839
Yaowu Xuf883b422016-08-30 14:01:10 -07003840static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
Debargha Mukherjee9245d892017-10-06 13:54:31 -07003841 int blk_col, int plane, int block, TX_SIZE tx_size,
3842 int depth, BLOCK_SIZE plane_bsize,
Jingning Han94d5bfc2016-10-21 10:14:36 -07003843 ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
3844 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
Angie Chiangb5dda482016-11-02 16:19:58 -07003845 RD_STATS *rd_stats, int64_t ref_best_rd,
Alexander Bokov79a37242017-09-29 11:25:55 -07003846 int *is_cost_valid, int fast,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003847 int tx_split_prune_flag,
3848 TX_SIZE_RD_INFO_NODE *rd_info_node) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07003849 MACROBLOCKD *const xd = &x->e_mbd;
3850 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3851 struct macroblock_plane *const p = &x->plane[plane];
3852 struct macroblockd_plane *const pd = &xd->plane[plane];
3853 const int tx_row = blk_row >> (1 - pd->subsampling_y);
3854 const int tx_col = blk_col >> (1 - pd->subsampling_x);
clang-format67948d32016-09-07 22:40:40 -07003855 TX_SIZE(*const inter_tx_size)
Yaowu Xuc27fc142016-08-22 16:08:15 -07003856 [MAX_MIB_SIZE] =
3857 (TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
Jingning Hanf65b8702016-10-31 12:13:20 -07003858 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
3859 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
Jingning Han58224042016-10-27 16:35:32 -07003860 const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07003861 int64_t this_rd = INT64_MAX;
3862 ENTROPY_CONTEXT *pta = ta + blk_col;
3863 ENTROPY_CONTEXT *ptl = tl + blk_row;
Jingning Han331662e2017-05-30 17:03:32 -07003864 int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
3865 mbmi->sb_type, tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003866 int64_t sum_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003867 int tmp_eob = 0;
3868 int zero_blk_rate;
Angie Chiangd7246172016-11-03 11:49:15 -07003869 RD_STATS sum_rd_stats;
Jingning Hane3b81bc2017-06-23 11:43:52 -07003870#if CONFIG_TXK_SEL
3871 TX_TYPE best_tx_type = TX_TYPES;
Angie Chiangbce07f12017-12-01 16:34:31 -08003872 int txk_idx = (blk_row << MAX_MIB_SIZE_LOG2) + blk_col;
Jingning Hane3b81bc2017-06-23 11:43:52 -07003873#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003874
Jingning Han63cbf342016-11-09 15:37:48 -08003875 av1_init_rd_stats(&sum_rd_stats);
Jingning Hanfe45b212016-11-22 10:30:23 -08003876
Jingning Hand3fada82016-11-22 10:46:55 -08003877 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003878
3879 if (ref_best_rd < 0) {
3880 *is_cost_valid = 0;
3881 return;
3882 }
3883
Angie Chiangc0feea82016-11-03 15:36:18 -07003884 av1_init_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07003885
3886 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
3887
Jingning Han5a995d72017-07-02 15:20:54 -07003888#if CONFIG_LV_MAP
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08003889 TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
Jingning Han5a995d72017-07-02 15:20:54 -07003890 TXB_CTX txb_ctx;
3891 get_txb_ctx(plane_bsize, tx_size, plane, pta, ptl, &txb_ctx);
Jingning Hane9814912017-08-31 16:38:59 -07003892
Jingning Hane9814912017-08-31 16:38:59 -07003893 zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(plane)]
3894 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
3895#else
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08003896 TX_SIZE tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Jingning Han5a995d72017-07-02 15:20:54 -07003897 int coeff_ctx = get_entropy_context(tx_size, pta, ptl);
hui suc0cf71d2017-07-20 16:38:50 -07003898 zero_blk_rate =
3899 x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
Jingning Han5a995d72017-07-02 15:20:54 -07003900#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003901
Jingning Han3bce7542017-07-25 10:53:57 -07003902 rd_stats->ref_rdcost = ref_best_rd;
3903 rd_stats->zero_rate = zero_blk_rate;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003904 if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
3905 inter_tx_size[0][0] = tx_size;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003906 av1_tx_block_rd_b(
3907 cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, pta, ptl,
3908 rd_stats, fast,
3909 rd_info_node != NULL ? rd_info_node->rd_info_array : NULL);
Sarah Parkerde6f0722017-08-07 20:23:46 -07003910 if (rd_stats->rate == INT_MAX) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003911
Urvang Joshi70006e42017-06-14 16:08:55 -07003912 if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
3913 RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
Angie Chiangb5dda482016-11-02 16:19:58 -07003914 rd_stats->skip == 1) &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07003915 !xd->lossless[mbmi->segment_id]) {
Jingning Hanc7ea7612017-01-11 15:01:30 -08003916#if CONFIG_RD_DEBUG
3917 av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col,
3918 zero_blk_rate - rd_stats->rate);
Fergus Simpson4063a682017-02-28 16:52:22 -08003919#endif // CONFIG_RD_DEBUG
Angie Chiangb5dda482016-11-02 16:19:58 -07003920 rd_stats->rate = zero_blk_rate;
3921 rd_stats->dist = rd_stats->sse;
3922 rd_stats->skip = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003923 x->blk_skip[plane][blk_row * bw + blk_col] = 1;
3924 p->eobs[block] = 0;
Jingning Han19b5c8f2017-07-06 15:10:12 -07003925#if CONFIG_TXK_SEL
3926 mbmi->txk_type[txk_idx] = DCT_DCT;
3927#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003928 } else {
3929 x->blk_skip[plane][blk_row * bw + blk_col] = 0;
Angie Chiangb5dda482016-11-02 16:19:58 -07003930 rd_stats->skip = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003931 }
3932
Jingning Han571189c2016-10-24 10:38:43 -07003933 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
Yue Chen171c17d2017-10-16 18:08:22 -07003934 rd_stats->rate += x->txfm_partition_cost[ctx][0];
Urvang Joshi70006e42017-06-14 16:08:55 -07003935 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
Jingning Han2f42d772017-07-05 16:28:18 -07003936#if CONFIG_LV_MAP
3937 tmp_eob = p->txb_entropy_ctx[block];
3938#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07003939 tmp_eob = p->eobs[block];
Jingning Han2f42d772017-07-05 16:28:18 -07003940#endif
3941
Jingning Hane3b81bc2017-06-23 11:43:52 -07003942#if CONFIG_TXK_SEL
3943 best_tx_type = mbmi->txk_type[txk_idx];
3944#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07003945 }
3946
Sebastien Alaiwan9f001f32017-11-28 16:32:33 +01003947 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH && tx_split_prune_flag == 0) {
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08003948 const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
Yue Chen0797a202017-10-27 17:24:56 -07003949 const int bsw = tx_size_wide_unit[sub_txs];
3950 const int bsh = tx_size_high_unit[sub_txs];
3951 int sub_step = bsw * bsh;
Angie Chiangb5dda482016-11-02 16:19:58 -07003952 RD_STATS this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07003953 int this_cost_valid = 1;
3954 int64_t tmp_rd = 0;
Yushin Chob7b60c52017-07-14 16:18:52 -07003955#if CONFIG_DIST_8X8
Yue Chen0797a202017-10-27 17:24:56 -07003956 int sub8x8_eob[4] = { 0, 0, 0, 0 };
Yushin Cho04749122017-05-25 14:19:07 -07003957#endif
Yue Chen171c17d2017-10-16 18:08:22 -07003958 sum_rd_stats.rate = x->txfm_partition_cost[ctx][1];
Jingning Hand3fada82016-11-22 10:46:55 -08003959
3960 assert(tx_size < TX_SIZES_ALL);
3961
Jingning Han16a9df72017-07-26 15:27:43 -07003962 ref_best_rd = AOMMIN(this_rd, ref_best_rd);
3963
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003964 int blk_idx = 0;
Yue Chen0797a202017-10-27 17:24:56 -07003965 for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) {
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003966 for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw, ++blk_idx) {
3967 const int offsetr = blk_row + r;
3968 const int offsetc = blk_col + c;
Yue Chen0797a202017-10-27 17:24:56 -07003969 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07003970 assert(blk_idx < 4);
3971 select_tx_block(
3972 cpi, x, offsetr, offsetc, plane, block, sub_txs, depth + 1,
3973 plane_bsize, ta, tl, tx_above, tx_left, &this_rd_stats,
3974 ref_best_rd - tmp_rd, &this_cost_valid, fast, 0,
3975 (rd_info_node != NULL) ? rd_info_node->children[blk_idx] : NULL);
Jingning Han98d6a1f2016-11-03 12:47:47 -07003976
Yushin Chob7b60c52017-07-14 16:18:52 -07003977#if CONFIG_DIST_8X8
Yue Chen0797a202017-10-27 17:24:56 -07003978 if (!x->using_dist_8x8)
Yushin Cho04749122017-05-25 14:19:07 -07003979#endif
Yue Chen0797a202017-10-27 17:24:56 -07003980 if (!this_cost_valid) break;
3981#if CONFIG_DIST_8X8
3982 if (x->using_dist_8x8 && plane == 0 && tx_size == TX_8X8) {
3983 sub8x8_eob[2 * (r / bsh) + (c / bsw)] = p->eobs[block];
3984 }
3985#endif // CONFIG_DIST_8X8
3986 av1_merge_rd_stats(&sum_rd_stats, &this_rd_stats);
3987
3988 tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
3989#if CONFIG_DIST_8X8
3990 if (!x->using_dist_8x8)
3991#endif
3992 if (this_rd < tmp_rd) break;
3993 block += sub_step;
3994 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07003995 }
Yushin Chob7b60c52017-07-14 16:18:52 -07003996#if CONFIG_DIST_8X8
Yushin Cho55104332017-08-14 16:15:43 -07003997 if (x->using_dist_8x8 && this_cost_valid && plane == 0 &&
3998 tx_size == TX_8X8) {
Yushin Cho04749122017-05-25 14:19:07 -07003999 const int src_stride = p->src.stride;
4000 const int dst_stride = pd->dst.stride;
4001
4002 const uint8_t *src =
4003 &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
4004 const uint8_t *dst =
4005 &pd->dst
4006 .buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
4007
Yushin Chob7b60c52017-07-14 16:18:52 -07004008 int64_t dist_8x8;
Yushin Cho04749122017-05-25 14:19:07 -07004009 int qindex = x->qindex;
4010 const int pred_stride = block_size_wide[plane_bsize];
4011 const int pred_idx = (blk_row * pred_stride + blk_col)
4012 << tx_size_wide_log2[0];
4013 int16_t *pred = &pd->pred[pred_idx];
Yue Chen0797a202017-10-27 17:24:56 -07004014 int i, j;
Yushin Cho04749122017-05-25 14:19:07 -07004015 int row, col;
4016
Yushin Cho8ab875d2017-06-23 14:47:21 -07004017#if CONFIG_HIGHBITDEPTH
4018 uint8_t *pred8;
4019 DECLARE_ALIGNED(16, uint16_t, pred8_16[8 * 8]);
4020#else
Yushin Cho04749122017-05-25 14:19:07 -07004021 DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
Yushin Cho8ab875d2017-06-23 14:47:21 -07004022#endif // CONFIG_HIGHBITDEPTH
Yushin Cho04749122017-05-25 14:19:07 -07004023
Yushin Choe30a47c2017-08-15 13:08:30 -07004024 dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride,
Yushin Chob7b60c52017-07-14 16:18:52 -07004025 BLOCK_8X8, 8, 8, 8, 8, qindex) *
4026 16;
Yushin Chof986af12017-11-20 15:22:43 -08004027 if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
4028 assert(sum_rd_stats.sse == dist_8x8);
Yushin Chob7b60c52017-07-14 16:18:52 -07004029 sum_rd_stats.sse = dist_8x8;
Yushin Cho04749122017-05-25 14:19:07 -07004030
Yushin Cho8ab875d2017-06-23 14:47:21 -07004031#if CONFIG_HIGHBITDEPTH
4032 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
4033 pred8 = CONVERT_TO_BYTEPTR(pred8_16);
4034 else
4035 pred8 = (uint8_t *)pred8_16;
4036#endif
Yushin Cho04749122017-05-25 14:19:07 -07004037
Yushin Cho8ab875d2017-06-23 14:47:21 -07004038#if CONFIG_HIGHBITDEPTH
4039 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4040 for (row = 0; row < 2; ++row) {
4041 for (col = 0; col < 2; ++col) {
4042 int idx = row * 2 + col;
4043 int eob = sub8x8_eob[idx];
4044
4045 if (eob > 0) {
4046 for (j = 0; j < 4; j++)
4047 for (i = 0; i < 4; i++)
4048 CONVERT_TO_SHORTPTR(pred8)
4049 [(row * 4 + j) * 8 + 4 * col + i] =
4050 pred[(row * 4 + j) * pred_stride + 4 * col + i];
4051 } else {
4052 for (j = 0; j < 4; j++)
4053 for (i = 0; i < 4; i++)
4054 CONVERT_TO_SHORTPTR(pred8)
4055 [(row * 4 + j) * 8 + 4 * col + i] = CONVERT_TO_SHORTPTR(
4056 dst)[(row * 4 + j) * dst_stride + 4 * col + i];
4057 }
Yushin Cho04749122017-05-25 14:19:07 -07004058 }
4059 }
Yushin Cho8ab875d2017-06-23 14:47:21 -07004060 } else {
4061#endif
4062 for (row = 0; row < 2; ++row) {
4063 for (col = 0; col < 2; ++col) {
4064 int idx = row * 2 + col;
4065 int eob = sub8x8_eob[idx];
4066
4067 if (eob > 0) {
4068 for (j = 0; j < 4; j++)
4069 for (i = 0; i < 4; i++)
4070 pred8[(row * 4 + j) * 8 + 4 * col + i] =
Yaowu Xu7a471702017-09-29 08:38:37 -07004071 (uint8_t)pred[(row * 4 + j) * pred_stride + 4 * col + i];
Yushin Cho8ab875d2017-06-23 14:47:21 -07004072 } else {
4073 for (j = 0; j < 4; j++)
4074 for (i = 0; i < 4; i++)
4075 pred8[(row * 4 + j) * 8 + 4 * col + i] =
4076 dst[(row * 4 + j) * dst_stride + 4 * col + i];
4077 }
4078 }
4079 }
4080#if CONFIG_HIGHBITDEPTH
Yushin Cho04749122017-05-25 14:19:07 -07004081 }
Yushin Cho8ab875d2017-06-23 14:47:21 -07004082#endif // CONFIG_HIGHBITDEPTH
Yushin Choe30a47c2017-08-15 13:08:30 -07004083 dist_8x8 = av1_dist_8x8(cpi, x, src, src_stride, pred8, 8, BLOCK_8X8, 8,
Yushin Chob7b60c52017-07-14 16:18:52 -07004084 8, 8, 8, qindex) *
4085 16;
Yushin Chof986af12017-11-20 15:22:43 -08004086 if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8)
Yushin Cho1cd34622017-10-06 13:00:41 -07004087 assert(sum_rd_stats.dist == dist_8x8);
Yushin Chob7b60c52017-07-14 16:18:52 -07004088 sum_rd_stats.dist = dist_8x8;
Yushin Cho04749122017-05-25 14:19:07 -07004089 tmp_rd = RDCOST(x->rdmult, sum_rd_stats.rate, sum_rd_stats.dist);
4090 }
Yushin Chob7b60c52017-07-14 16:18:52 -07004091#endif // CONFIG_DIST_8X8
Yaowu Xuc27fc142016-08-22 16:08:15 -07004092 if (this_cost_valid) sum_rd = tmp_rd;
4093 }
4094
4095 if (this_rd < sum_rd) {
4096 int idx, idy;
Yue Chend6bdd462017-07-19 16:05:43 -07004097 TX_SIZE tx_size_selected = tx_size;
Jingning Han2f42d772017-07-05 16:28:18 -07004098
4099#if CONFIG_LV_MAP
Debargha Mukherjee35a4db32017-11-14 11:58:16 -08004100 p->txb_entropy_ctx[block] = tmp_eob;
Jingning Han2f42d772017-07-05 16:28:18 -07004101#else
4102 p->eobs[block] = tmp_eob;
4103#endif
4104
Yue Chend6bdd462017-07-19 16:05:43 -07004105 av1_set_txb_context(x, plane, block, tx_size_selected, pta, ptl);
Jingning Han2f42d772017-07-05 16:28:18 -07004106
Jingning Han331662e2017-05-30 17:03:32 -07004107 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
4108 tx_size);
Yue Chend6bdd462017-07-19 16:05:43 -07004109 inter_tx_size[0][0] = tx_size_selected;
Yue Chenc5252a62017-10-31 15:41:12 -07004110 for (idy = 0; idy < AOMMAX(1, tx_size_high_unit[tx_size] / 2); ++idy)
4111 for (idx = 0; idx < AOMMAX(1, tx_size_wide_unit[tx_size] / 2); ++idx)
Yue Chend6bdd462017-07-19 16:05:43 -07004112 inter_tx_size[idy][idx] = tx_size_selected;
4113 mbmi->tx_size = tx_size_selected;
Jingning Hane3b81bc2017-06-23 11:43:52 -07004114#if CONFIG_TXK_SEL
4115 mbmi->txk_type[txk_idx] = best_tx_type;
4116#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07004117 if (this_rd == INT64_MAX) *is_cost_valid = 0;
Debargha Mukherjee35a4db32017-11-14 11:58:16 -08004118 x->blk_skip[plane][blk_row * bw + blk_col] = rd_stats->skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004119 } else {
Angie Chiangd7246172016-11-03 11:49:15 -07004120 *rd_stats = sum_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004121 if (sum_rd == INT64_MAX) *is_cost_valid = 0;
4122 }
4123}
4124
Debargha Mukherjeeedc73462017-10-31 15:13:32 -07004125static int get_search_init_depth(int mi_width, int mi_height,
4126 const SPEED_FEATURES *sf) {
4127 if (sf->tx_size_search_method == USE_LARGESTALL) return MAX_VARTX_DEPTH;
4128 return (mi_height != mi_width) ? sf->tx_size_search_init_depth_rect
4129 : sf->tx_size_search_init_depth_sqr;
4130}
4131
Debargha Mukherjee51666862017-10-24 14:29:13 -07004132static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
4133 RD_STATS *rd_stats, BLOCK_SIZE bsize,
Alexander Bokov79a37242017-09-29 11:25:55 -07004134 int64_t ref_best_rd, int fast,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004135 int tx_split_prune_flag,
4136 TX_SIZE_RD_INFO_NODE *rd_info_tree) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07004137 MACROBLOCKD *const xd = &x->e_mbd;
4138 int is_cost_valid = 1;
4139 int64_t this_rd = 0;
4140
4141 if (ref_best_rd < 0) is_cost_valid = 0;
4142
Angie Chiangc0feea82016-11-03 15:36:18 -07004143 av1_init_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004144
4145 if (is_cost_valid) {
4146 const struct macroblockd_plane *const pd = &xd->plane[0];
4147 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
Jingning Han9ca05b72017-01-03 14:41:36 -08004148 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4149 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
Yue Chen0797a202017-10-27 17:24:56 -07004150 const TX_SIZE max_tx_size = get_max_rect_tx_size(plane_bsize, 1);
Jingning Han18482fe2016-11-02 17:01:58 -07004151 const int bh = tx_size_high_unit[max_tx_size];
4152 const int bw = tx_size_wide_unit[max_tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004153 int idx, idy;
4154 int block = 0;
Jingning Han18482fe2016-11-02 17:01:58 -07004155 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004156 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4157 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
Jingning Han331662e2017-05-30 17:03:32 -07004158 TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
4159 TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004160
Angie Chiangb5dda482016-11-02 16:19:58 -07004161 RD_STATS pn_rd_stats;
Debargha Mukherjeeedc73462017-10-31 15:13:32 -07004162 const int init_depth = get_search_init_depth(mi_width, mi_height, &cpi->sf);
Angie Chiangc0feea82016-11-03 15:36:18 -07004163 av1_init_rd_stats(&pn_rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004164
Jingning Han9ca05b72017-01-03 14:41:36 -08004165 av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
Jingning Han331662e2017-05-30 17:03:32 -07004166 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
4167 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004168
4169 for (idy = 0; idy < mi_height; idy += bh) {
Jingning Han18482fe2016-11-02 17:01:58 -07004170 for (idx = 0; idx < mi_width; idx += bw) {
Sarah Parkerd25ef8c2017-10-06 12:17:30 -07004171 select_tx_block(cpi, x, idy, idx, 0, block, max_tx_size, init_depth,
4172 plane_bsize, ctxa, ctxl, tx_above, tx_left,
Debargha Mukherjee51666862017-10-24 14:29:13 -07004173 &pn_rd_stats, ref_best_rd - this_rd, &is_cost_valid,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004174 fast, tx_split_prune_flag, rd_info_tree);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004175 if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) {
Sarah Parkerde6f0722017-08-07 20:23:46 -07004176 av1_invalid_rd_stats(rd_stats);
4177 return;
4178 }
Angie Chiangc0feea82016-11-03 15:36:18 -07004179 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Debargha Mukherjee2c50f9a2017-11-15 08:04:57 -08004180 this_rd +=
4181 AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
4182 RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07004183 block += step;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004184 if (rd_info_tree != NULL) rd_info_tree += 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004185 }
4186 }
4187 }
Debargha Mukherjee9c8decb2017-12-01 15:14:40 -08004188 int64_t zero_rd = RDCOST(x->rdmult, rd_stats->zero_rate, rd_stats->sse);
4189 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
4190 if (zero_rd < this_rd) {
4191 this_rd = zero_rd;
Debargha Mukherjee644e9662017-12-10 15:36:28 -08004192 rd_stats->rate = rd_stats->zero_rate;
4193 rd_stats->dist = rd_stats->sse;
Debargha Mukherjee9c8decb2017-12-01 15:14:40 -08004194 rd_stats->skip = 1;
4195 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07004196 if (this_rd > ref_best_rd) is_cost_valid = 0;
4197
4198 if (!is_cost_valid) {
4199 // reset cost value
Angie Chiangc0feea82016-11-03 15:36:18 -07004200 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004201 }
4202}
4203
Yaowu Xuf883b422016-08-30 14:01:10 -07004204static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x,
Angie Chiangb5dda482016-11-02 16:19:58 -07004205 RD_STATS *rd_stats, BLOCK_SIZE bsize,
Debargha Mukherjee51666862017-10-24 14:29:13 -07004206 int mi_row, int mi_col,
Alexander Bokov79a37242017-09-29 11:25:55 -07004207 int64_t ref_best_rd, TX_TYPE tx_type,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004208 int tx_split_prune_flag,
4209 TX_SIZE_RD_INFO_NODE *rd_info_tree) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07004210 const int fast = cpi->sf.tx_size_search_method > USE_FULL_RD;
Yaowu Xuf883b422016-08-30 14:01:10 -07004211 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004212 MACROBLOCKD *const xd = &x->e_mbd;
4213 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004214 const int is_inter = is_inter_block(mbmi);
Zoe Liu1eed2df2017-10-16 17:13:15 -07004215 const int skip_ctx = av1_get_skip_context(xd);
4216 int s0 = x->skip_cost[skip_ctx][0];
4217 int s1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004218 int64_t rd;
Jingning Hane67b38a2016-11-04 10:30:00 -07004219 int row, col;
4220 const int max_blocks_high = max_block_high(xd, bsize, 0);
4221 const int max_blocks_wide = max_block_wide(xd, bsize, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004222
Debargha Mukherjee51666862017-10-24 14:29:13 -07004223 // TODO(debargha): enable this as a speed feature where the
4224 // select_inter_block_yrd() function above will use a simplified search
4225 // such as not using full optimize, but the inter_block_yrd() function
4226 // will use more complex search given that the transform partitions have
4227 // already been decided.
4228
Zoe Liu1eed2df2017-10-16 17:13:15 -07004229 (void)cm;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004230 (void)mi_row;
4231 (void)mi_col;
Zoe Liu1eed2df2017-10-16 17:13:15 -07004232
Yaowu Xuc27fc142016-08-22 16:08:15 -07004233 mbmi->tx_type = tx_type;
Alexander Bokov79a37242017-09-29 11:25:55 -07004234 select_inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, fast,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004235 tx_split_prune_flag, rd_info_tree);
Angie Chiangb5dda482016-11-02 16:19:58 -07004236 if (rd_stats->rate == INT_MAX) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004237
Debargha Mukherjee51666862017-10-24 14:29:13 -07004238 mbmi->min_tx_size = get_min_tx_size(mbmi->inter_tx_size[0][0]);
Jingning Hane67b38a2016-11-04 10:30:00 -07004239 for (row = 0; row < max_blocks_high / 2; ++row)
4240 for (col = 0; col < max_blocks_wide / 2; ++col)
4241 mbmi->min_tx_size = AOMMIN(
4242 mbmi->min_tx_size, get_min_tx_size(mbmi->inter_tx_size[row][col]));
4243
Debargha Mukherjee51666862017-10-24 14:29:13 -07004244 if (fast) {
4245 // Do a better (non-fast) search with tx sizes already decided.
4246 // Currently, trellis optimization is turned on only for this pass, and
4247 // the function below performs a more accurate rd cost calculation based
4248 // on that.
4249 if (!inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, 0))
4250 return INT64_MAX;
4251 }
4252
Jingning Han1643a0a2017-07-05 15:48:25 -07004253#if !CONFIG_TXK_SEL
Sarah Parkere68a3e42017-02-16 14:03:24 -08004254 if (get_ext_tx_types(mbmi->min_tx_size, bsize, is_inter,
4255 cm->reduced_tx_set_used) > 1 &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07004256 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
Sarah Parkere68a3e42017-02-16 14:03:24 -08004257 const int ext_tx_set = get_ext_tx_set(mbmi->min_tx_size, bsize, is_inter,
4258 cm->reduced_tx_set_used);
Jingning Han13c42772017-12-11 14:31:56 -08004259 if (ext_tx_set > 0)
4260 rd_stats->rate +=
4261 x->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->min_tx_size]]
4262 [mbmi->tx_type];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004263 }
Jingning Han1643a0a2017-07-05 15:48:25 -07004264#endif // CONFIG_TXK_SEL
Yaowu Xuc27fc142016-08-22 16:08:15 -07004265
Angie Chiangb5dda482016-11-02 16:19:58 -07004266 if (rd_stats->skip)
Urvang Joshi70006e42017-06-14 16:08:55 -07004267 rd = RDCOST(x->rdmult, s1, rd_stats->sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004268 else
Urvang Joshi70006e42017-06-14 16:08:55 -07004269 rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004270
Angie Chiangb5dda482016-11-02 16:19:58 -07004271 if (is_inter && !xd->lossless[xd->mi[0]->mbmi.segment_id] &&
4272 !(rd_stats->skip))
Urvang Joshi70006e42017-06-14 16:08:55 -07004273 rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07004274
4275 return rd;
4276}
4277
Debargha Mukherjee51666862017-10-24 14:29:13 -07004278// Finds rd cost for a y block, given the transform size partitions
4279static void tx_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
4280 int blk_col, int plane, int block, TX_SIZE tx_size,
4281 BLOCK_SIZE plane_bsize, int depth,
4282 ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx,
4283 TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left,
4284 int64_t ref_best_rd, RD_STATS *rd_stats, int fast) {
4285 MACROBLOCKD *const xd = &x->e_mbd;
4286 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4287 struct macroblockd_plane *const pd = &xd->plane[plane];
4288 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
4289 const int tx_row = blk_row >> (1 - pd->subsampling_y);
4290 const int tx_col = blk_col >> (1 - pd->subsampling_x);
4291 TX_SIZE plane_tx_size;
4292 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4293 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
4294
4295 assert(tx_size < TX_SIZES_ALL);
4296
4297 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4298
4299 plane_tx_size =
4300 plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
4301 : mbmi->inter_tx_size[tx_row][tx_col];
4302
4303 int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row,
4304 mbmi->sb_type, tx_size);
4305
4306 av1_init_rd_stats(rd_stats);
Debargha Mukherjee891a8772017-11-22 10:09:37 -08004307 if (tx_size == plane_tx_size
4308#if DISABLE_VARTX_FOR_CHROMA
4309 || pd->subsampling_x || pd->subsampling_y
4310#endif // DISABLE_VARTX_FOR_CHROMA
4311 ) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07004312 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
4313 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
4314#if CONFIG_LV_MAP
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08004315 const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004316 TXB_CTX txb_ctx;
4317 get_txb_ctx(plane_bsize, tx_size, plane, ta, tl, &txb_ctx);
4318
4319 const int zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(plane)]
4320 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
4321#else
4322 const int coeff_ctx = get_entropy_context(tx_size, ta, tl);
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08004323 const TX_SIZE tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004324 const int zero_blk_rate =
4325 x->token_head_costs[tx_size_ctx][pd->plane_type][1][0][coeff_ctx][0];
4326#endif // CONFIG_LV_MAP
4327 rd_stats->zero_rate = zero_blk_rate;
4328 rd_stats->ref_rdcost = ref_best_rd;
4329 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004330 plane_bsize, ta, tl, rd_stats, fast, NULL);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004331 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4332 if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >=
4333 RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) ||
4334 rd_stats->skip == 1) {
4335 rd_stats->rate = zero_blk_rate;
4336 rd_stats->dist = rd_stats->sse;
4337 rd_stats->skip = 1;
4338 x->blk_skip[plane][blk_row * mi_width + blk_col] = 1;
4339 x->plane[plane].eobs[block] = 0;
4340#if CONFIG_LV_MAP
4341 x->plane[plane].txb_entropy_ctx[block] = 0;
4342#endif // CONFIG_LV_MAP
4343 } else {
4344 rd_stats->skip = 0;
4345 x->blk_skip[plane][blk_row * mi_width + blk_col] = 0;
4346 }
4347 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
4348 rd_stats->rate += x->txfm_partition_cost[ctx][0];
4349 av1_set_txb_context(x, plane, block, tx_size, ta, tl);
4350 txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size,
4351 tx_size);
4352 } else {
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004353 const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004354 const int bsw = tx_size_wide_unit[sub_txs];
4355 const int bsh = tx_size_high_unit[sub_txs];
4356 const int step = bsh * bsw;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004357 RD_STATS pn_rd_stats;
4358 int64_t this_rd = 0;
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004359 assert(bsw > 0 && bsh > 0);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004360
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004361 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
4362 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
4363 const int offsetr = blk_row + row;
4364 const int offsetc = blk_col + col;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004365
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004366 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004367
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004368 av1_init_rd_stats(&pn_rd_stats);
4369 tx_block_yrd(cpi, x, offsetr, offsetc, plane, block, sub_txs,
4370 plane_bsize, depth + 1, above_ctx, left_ctx, tx_above,
4371 tx_left, ref_best_rd - this_rd, &pn_rd_stats, fast);
4372 if (pn_rd_stats.rate == INT_MAX) {
4373 av1_invalid_rd_stats(rd_stats);
4374 return;
4375 }
4376 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
4377 this_rd += RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist);
4378 block += step;
Debargha Mukherjee51666862017-10-24 14:29:13 -07004379 }
Debargha Mukherjee51666862017-10-24 14:29:13 -07004380 }
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08004381
Debargha Mukherjee51666862017-10-24 14:29:13 -07004382 if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH)
4383 rd_stats->rate += x->txfm_partition_cost[ctx][1];
4384 }
4385}
4386
4387// Return value 0: early termination triggered, no valid rd cost available;
4388// 1: rd cost values are valid.
4389int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
4390 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast) {
4391 MACROBLOCKD *const xd = &x->e_mbd;
4392 int is_cost_valid = 1;
4393 int64_t this_rd = 0;
4394
4395 if (ref_best_rd < 0) is_cost_valid = 0;
4396
4397 av1_init_rd_stats(rd_stats);
4398
4399 if (is_cost_valid) {
4400 const struct macroblockd_plane *const pd = &xd->plane[0];
4401 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
4402 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
4403 const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
Debargha Mukherjee891a8772017-11-22 10:09:37 -08004404 const TX_SIZE max_tx_size = get_vartx_max_txsize(
4405 xd, plane_bsize, pd->subsampling_x || pd->subsampling_y);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004406 const int bh = tx_size_high_unit[max_tx_size];
4407 const int bw = tx_size_wide_unit[max_tx_size];
Debargha Mukherjeeedc73462017-10-31 15:13:32 -07004408 const int init_depth = get_search_init_depth(mi_width, mi_height, &cpi->sf);
Debargha Mukherjee51666862017-10-24 14:29:13 -07004409 int idx, idy;
4410 int block = 0;
4411 int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size];
4412 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4413 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
4414 TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2];
4415 TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2];
Debargha Mukherjee51666862017-10-24 14:29:13 -07004416 RD_STATS pn_rd_stats;
4417
4418 av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
4419 memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width);
4420 memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height);
4421
4422 for (idy = 0; idy < mi_height; idy += bh) {
4423 for (idx = 0; idx < mi_width; idx += bw) {
4424 av1_init_rd_stats(&pn_rd_stats);
4425 tx_block_yrd(cpi, x, idy, idx, 0, block, max_tx_size, plane_bsize,
4426 init_depth, ctxa, ctxl, tx_above, tx_left,
4427 ref_best_rd - this_rd, &pn_rd_stats, fast);
4428 if (pn_rd_stats.rate == INT_MAX) {
4429 av1_invalid_rd_stats(rd_stats);
4430 return 0;
4431 }
4432 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08004433 this_rd +=
4434 AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist),
4435 RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse));
Debargha Mukherjee51666862017-10-24 14:29:13 -07004436 block += step;
4437 }
4438 }
4439 }
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08004440 int64_t zero_rd = RDCOST(x->rdmult, rd_stats->zero_rate, rd_stats->sse);
4441 this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
4442 if (zero_rd < this_rd) {
4443 this_rd = zero_rd;
Debargha Mukherjee644e9662017-12-10 15:36:28 -08004444 rd_stats->rate = rd_stats->zero_rate;
4445 rd_stats->dist = rd_stats->sse;
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08004446 rd_stats->skip = 1;
4447 }
Debargha Mukherjee51666862017-10-24 14:29:13 -07004448 if (this_rd > ref_best_rd) is_cost_valid = 0;
4449
4450 if (!is_cost_valid) {
4451 // reset cost value
4452 av1_invalid_rd_stats(rd_stats);
4453 }
4454 return is_cost_valid;
4455}
4456
Hui Su1ddf2312017-08-19 15:21:34 -07004457static uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) {
4458 const int rows = block_size_high[bsize];
4459 const int cols = block_size_wide[bsize];
Hui Su1ddf2312017-08-19 15:21:34 -07004460 const struct macroblock_plane *const p = &x->plane[0];
4461 const int16_t *diff = &p->src_diff[0];
Debargha Mukherjeefd65c8d2017-11-07 15:45:55 -08004462 uint16_t hash_data[MAX_SB_SQUARE];
4463 memcpy(hash_data, diff, sizeof(*hash_data) * rows * cols);
4464 return (av1_get_crc_value(&x->tx_rd_record.crc_calculator,
4465 (uint8_t *)hash_data, 2 * rows * cols)
Hui Su1ddf2312017-08-19 15:21:34 -07004466 << 7) +
4467 bsize;
4468}
4469
4470static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x,
4471 const RD_STATS *const rd_stats,
Hui Su89ef4932017-11-28 10:54:31 -08004472 TX_RD_RECORD *tx_rd_record) {
4473 int index;
4474 if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) {
4475 index =
4476 (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN;
4477 ++tx_rd_record->num;
4478 } else {
4479 index = tx_rd_record->index_start;
4480 tx_rd_record->index_start =
4481 (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN;
4482 }
4483 TX_RD_INFO *const tx_rd_info = &tx_rd_record->tx_rd_info[index];
Hui Su1ddf2312017-08-19 15:21:34 -07004484 const MACROBLOCKD *const xd = &x->e_mbd;
4485 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4486 tx_rd_info->hash_value = hash;
4487 tx_rd_info->tx_type = mbmi->tx_type;
4488 tx_rd_info->tx_size = mbmi->tx_size;
Hui Su1ddf2312017-08-19 15:21:34 -07004489 tx_rd_info->min_tx_size = mbmi->min_tx_size;
4490 memcpy(tx_rd_info->blk_skip, x->blk_skip[0],
4491 sizeof(tx_rd_info->blk_skip[0]) * n4);
4492 for (int idy = 0; idy < xd->n8_h; ++idy)
4493 for (int idx = 0; idx < xd->n8_w; ++idx)
4494 tx_rd_info->inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
Hui Su1ddf2312017-08-19 15:21:34 -07004495#if CONFIG_TXK_SEL
4496 av1_copy(tx_rd_info->txk_type, mbmi->txk_type);
4497#endif // CONFIG_TXK_SEL
4498 tx_rd_info->rd_stats = *rd_stats;
4499}
4500
4501static void fetch_tx_rd_info(int n4, const TX_RD_INFO *const tx_rd_info,
4502 RD_STATS *const rd_stats, MACROBLOCK *const x) {
4503 MACROBLOCKD *const xd = &x->e_mbd;
4504 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4505 mbmi->tx_type = tx_rd_info->tx_type;
4506 mbmi->tx_size = tx_rd_info->tx_size;
Hui Su1ddf2312017-08-19 15:21:34 -07004507 mbmi->min_tx_size = tx_rd_info->min_tx_size;
4508 memcpy(x->blk_skip[0], tx_rd_info->blk_skip,
4509 sizeof(tx_rd_info->blk_skip[0]) * n4);
4510 for (int idy = 0; idy < xd->n8_h; ++idy)
4511 for (int idx = 0; idx < xd->n8_w; ++idx)
4512 mbmi->inter_tx_size[idy][idx] = tx_rd_info->inter_tx_size[idy][idx];
Hui Su1ddf2312017-08-19 15:21:34 -07004513#if CONFIG_TXK_SEL
4514 av1_copy(mbmi->txk_type, tx_rd_info->txk_type);
4515#endif // CONFIG_TXK_SEL
4516 *rd_stats = tx_rd_info->rd_stats;
4517}
4518
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004519static int find_tx_size_rd_info(TX_SIZE_RD_RECORD *cur_record,
4520 const uint32_t hash) {
4521 // Linear search through the circular buffer to find matching hash.
4522 int index;
4523 for (int i = cur_record->num - 1; i >= 0; i--) {
4524 index = (cur_record->index_start + i) % TX_SIZE_RD_RECORD_BUFFER_LEN;
4525 if (cur_record->hash_vals[index] == hash) return index;
4526 }
4527
4528 // If not found - add new RD info into the buffer and return its index
4529 if (cur_record->num < TX_SIZE_RD_RECORD_BUFFER_LEN) {
4530 index = (cur_record->index_start + cur_record->num) %
4531 TX_SIZE_RD_RECORD_BUFFER_LEN;
4532 cur_record->num++;
4533 } else {
4534 index = cur_record->index_start;
4535 cur_record->index_start =
4536 (cur_record->index_start + 1) % TX_SIZE_RD_RECORD_BUFFER_LEN;
4537 }
4538
4539 cur_record->hash_vals[index] = hash;
4540 av1_zero(cur_record->tx_rd_info[index]);
4541 return index;
4542}
4543
4544// Go through all TX blocks that could be used in TX size search, compute
4545// residual hash values for them and find matching RD info that stores previous
4546// RD search results for these TX blocks. The idea is to prevent repeated
4547// rate/distortion computations that happen because of the combination of
4548// partition and TX size search. The resulting RD info records are returned in
4549// the form of a quadtree for easier access in actual TX size search.
4550static int find_tx_size_rd_records(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
4551 int mi_col,
4552 TX_SIZE_RD_INFO_NODE *dst_rd_info) {
4553#if CONFIG_TX64X64
4554 TX_SIZE_RD_RECORD *rd_records_table[4] = { x->tx_size_rd_record_8X8,
4555 x->tx_size_rd_record_16X16,
4556 x->tx_size_rd_record_32X32,
4557 x->tx_size_rd_record_64X64 };
4558#else
4559 TX_SIZE_RD_RECORD *rd_records_table[3] = { x->tx_size_rd_record_8X8,
4560 x->tx_size_rd_record_16X16,
4561 x->tx_size_rd_record_32X32 };
4562#endif
4563 const TX_SIZE max_square_tx_size = max_txsize_lookup[bsize];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004564 const int bw = block_size_wide[bsize];
4565 const int bh = block_size_high[bsize];
Debargha Mukherjeede80e762017-11-30 13:58:56 -08004566
4567 // Hashing is performed only for square TX sizes larger than TX_4X4
4568 if (max_square_tx_size < TX_8X8 || bw != bh) return 0;
4569
Hui Sucdb89832017-11-30 17:36:21 -08004570 const int bw_mi = mi_size_wide[bsize];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004571 const int diff_stride = bw;
4572 const struct macroblock_plane *const p = &x->plane[0];
4573 const int16_t *diff = &p->src_diff[0];
4574
4575 // Coordinates of the top-left corner of current block within the superblock
4576 // measured in pixels:
4577 const int mi_row_in_sb = (mi_row % MAX_MIB_SIZE) << MI_SIZE_LOG2;
4578 const int mi_col_in_sb = (mi_col % MAX_MIB_SIZE) << MI_SIZE_LOG2;
4579 int cur_rd_info_idx = 0;
4580 int cur_tx_depth = 0;
Hui Sucdb89832017-11-30 17:36:21 -08004581 uint8_t parent_idx_buf[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004582
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004583 int cur_tx_size = max_txsize_rect_lookup[1][bsize];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004584 while (cur_tx_depth <= MAX_VARTX_DEPTH) {
Hui Sucdb89832017-11-30 17:36:21 -08004585 const int cur_tx_bw = tx_size_wide[cur_tx_size];
4586 const int cur_tx_bh = tx_size_high[cur_tx_size];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004587 if (cur_tx_bw < 8 || cur_tx_bh < 8) break;
4588
4589 for (int row = 0; row < bh; row += cur_tx_bh) {
4590 for (int col = 0; col < bw; col += cur_tx_bw) {
4591 if (cur_tx_bw != cur_tx_bh) {
4592 // Use dummy nodes for all rectangular transforms within the
4593 // TX size search tree.
4594 dst_rd_info[cur_rd_info_idx].rd_info_array = NULL;
4595 } else {
4596 // Get spatial location of this TX block within the superblock
4597 // (measured in cur_tx_bsize units).
4598 const int row_in_sb = (mi_row_in_sb + row) / cur_tx_bh;
4599 const int col_in_sb = (mi_col_in_sb + col) / cur_tx_bw;
4600
4601 // Compute FNV-1a hash for this TX block.
4602 uint32_t hash = 2166136261;
4603 for (int i = 0; i < cur_tx_bh; i++) {
4604 const int16_t *cur_diff_row = diff + (row + i) * diff_stride + col;
4605 for (int j = 0; j < cur_tx_bw; j++) {
4606 hash = hash ^ clip_pixel(cur_diff_row[j] + 128);
Yaowu Xu51021542017-11-20 18:23:42 -08004607 hash = (uint32_t)((int64_t)hash * 16777619);
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004608 }
4609 }
4610
4611 // Find corresponding RD info based on the hash value.
4612 const int rd_record_idx =
4613 row_in_sb * (MAX_MIB_SIZE >> (cur_tx_size + 1 - TX_8X8)) +
4614 col_in_sb;
4615 int idx = find_tx_size_rd_info(
4616 &rd_records_table[cur_tx_size - TX_8X8][rd_record_idx], hash);
4617 dst_rd_info[cur_rd_info_idx].rd_info_array =
4618 rd_records_table[cur_tx_size - TX_8X8][rd_record_idx]
4619 .tx_rd_info[idx];
4620 }
4621
4622 // Update the output quadtree RD info structure.
4623 av1_zero(dst_rd_info[cur_rd_info_idx].children);
Hui Sucdb89832017-11-30 17:36:21 -08004624 const int block_mi_row = row / MI_SIZE;
4625 const int block_mi_col = col / MI_SIZE;
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004626 if (cur_tx_depth > 0) {
4627 const int y_odd = (row / cur_tx_bh) % 2;
4628 const int x_odd = (col / cur_tx_bw) % 2;
4629 const int child_idx = y_odd ? (x_odd ? 3 : 2) : (x_odd ? 1 : 0);
Hui Sucdb89832017-11-30 17:36:21 -08004630 const int mi_index = block_mi_row * bw_mi + block_mi_col;
4631 dst_rd_info[parent_idx_buf[mi_index]].children[child_idx] =
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004632 &dst_rd_info[cur_rd_info_idx];
4633 }
Hui Sucdb89832017-11-30 17:36:21 -08004634 const int tx_bh_mi = cur_tx_bh / MI_SIZE;
4635 const int tx_bw_mi = cur_tx_bw / MI_SIZE;
4636 for (int i = block_mi_row; i < block_mi_row + tx_bh_mi; ++i) {
4637 memset(parent_idx_buf + i * bw_mi + block_mi_col, cur_rd_info_idx,
4638 tx_bw_mi);
4639 }
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004640 ++cur_rd_info_idx;
4641 }
4642 }
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004643 cur_tx_size = sub_tx_size_map[1][cur_tx_size];
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004644 ++cur_tx_depth;
4645 }
4646 return 1;
4647}
4648
Hui Su991dd222017-11-27 16:32:00 -08004649static const uint32_t skip_pred_threshold[3][BLOCK_SIZES_ALL] = {
4650 {
Hui Su00966432017-12-06 11:45:18 -08004651 0, 0, 0, 50, 50, 50, 55, 47, 47, 53, 53, 53, 53, 53, 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004652#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004653 53, 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004654#endif
4655 50, 50, 55, 55, 53, 53,
4656#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004657 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004658#endif
4659 },
4660 {
Hui Su00966432017-12-06 11:45:18 -08004661 0, 0, 0, 69, 69, 69, 67, 68, 68, 53, 53, 53, 53, 53, 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004662#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004663 53, 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004664#endif
4665 69, 69, 67, 67, 53, 53,
4666#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004667 53, 53,
Hui Su991dd222017-11-27 16:32:00 -08004668#endif
4669 },
4670 {
Hui Su00966432017-12-06 11:45:18 -08004671 0, 0, 0, 70, 73, 73, 70, 73, 73, 58, 58, 58, 58, 58, 58, 58,
Hui Su991dd222017-11-27 16:32:00 -08004672#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004673 58, 58, 58,
Hui Su991dd222017-11-27 16:32:00 -08004674#endif
4675 70, 70, 70, 70, 58, 58,
4676#if CONFIG_EXT_PARTITION
Hui Su00966432017-12-06 11:45:18 -08004677 58, 58,
Hui Su991dd222017-11-27 16:32:00 -08004678#endif
4679 }
4680};
4681
Alexander Bokov8829a242017-08-31 18:07:05 -07004682// Uses simple features on top of DCT coefficients to quickly predict
4683// whether optimal RD decision is to skip encoding the residual.
Hui Su3889c6d2017-12-04 17:02:44 -08004684// The sse value is stored in dist.
4685static int predict_skip_flag(MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *dist) {
Hui Su00966432017-12-06 11:45:18 -08004686 int max_tx_size =
Hui Su991dd222017-11-27 16:32:00 -08004687 get_max_rect_tx_size(bsize, is_inter_block(&x->e_mbd.mi[0]->mbmi));
Hui Su00966432017-12-06 11:45:18 -08004688 if (tx_size_high[max_tx_size] > 16 || tx_size_wide[max_tx_size] > 16)
4689 max_tx_size = AOMMIN(max_txsize_lookup[bsize], TX_16X16);
Hui Su991dd222017-11-27 16:32:00 -08004690 const int tx_h = tx_size_high[max_tx_size];
4691 const int tx_w = tx_size_wide[max_tx_size];
Alexander Bokov8829a242017-08-31 18:07:05 -07004692 const int bw = block_size_wide[bsize];
4693 const int bh = block_size_high[bsize];
Alexander Bokov80eedf22017-11-02 12:48:52 -07004694 const MACROBLOCKD *xd = &x->e_mbd;
Hui Su3889c6d2017-12-04 17:02:44 -08004695 const uint32_t dc_q = (uint32_t)av1_dc_quant_QTX(x->qindex, 0, xd->bd);
4696
4697 *dist = pixel_diff_dist(x, 0, x->plane[0].src_diff, bw, 0, 0, bsize, bsize);
4698 const int64_t mse = *dist / bw / bh;
4699 // Normalized quantizer takes the transform upscaling factor (8 for tx size
4700 // smaller than 32) into account.
4701 const uint32_t normalized_dc_q = dc_q >> 3;
4702 const int64_t mse_thresh = (int64_t)normalized_dc_q * normalized_dc_q / 8;
4703 // Predict not to skip when mse is larger than threshold.
4704 if (mse > mse_thresh) return 0;
4705
Alexander Bokovf93feec2017-10-11 14:55:50 -07004706 DECLARE_ALIGNED(32, tran_low_t, DCT_coefs[32 * 32]);
Alexander Bokov8829a242017-08-31 18:07:05 -07004707 TxfmParam param;
4708 param.tx_type = DCT_DCT;
Hui Su991dd222017-11-27 16:32:00 -08004709 param.tx_size = max_tx_size;
Alexander Bokov80eedf22017-11-02 12:48:52 -07004710 param.bd = xd->bd;
Monty Montgomery26b8a992017-11-10 22:45:23 -05004711 param.is_hbd = get_bitdepth_data_path_index(xd);
Alexander Bokov8829a242017-08-31 18:07:05 -07004712 param.lossless = 0;
Sarah Parker90024e42017-10-06 16:50:47 -07004713 const struct macroblockd_plane *const pd = &xd->plane[0];
4714 const BLOCK_SIZE plane_bsize =
4715 get_plane_block_size(xd->mi[0]->mbmi.sb_type, pd);
4716 // TODO(sarahparker) This assumes reduced_tx_set_used == 0. I will do a
4717 // follow up refactor to make the actual value of reduced_tx_set_used
4718 // within this function.
4719 param.tx_set_type = get_ext_tx_set_type(param.tx_size, plane_bsize,
4720 is_inter_block(&xd->mi[0]->mbmi), 0);
Hui Su3889c6d2017-12-04 17:02:44 -08004721 const uint32_t ac_q = (uint32_t)av1_ac_quant_QTX(x->qindex, 0, xd->bd);
Hui Su991dd222017-11-27 16:32:00 -08004722 uint32_t max_quantized_coef = 0;
Hui Su00966432017-12-06 11:45:18 -08004723 const int bd_idx = (xd->bd == 8) ? 0 : ((xd->bd == 10) ? 1 : 2);
4724 const uint32_t max_qcoef_thresh = skip_pred_threshold[bd_idx][bsize];
Hui Su991dd222017-11-27 16:32:00 -08004725 const int16_t *src_diff = x->plane[0].src_diff;
4726 for (int row = 0; row < bh; row += tx_h) {
4727 for (int col = 0; col < bw; col += tx_w) {
Alexander Bokov70109022017-10-11 15:09:24 -07004728#if CONFIG_TXMG
Hui Su991dd222017-11-27 16:32:00 -08004729 av1_highbd_fwd_txfm(src_diff + col, DCT_coefs, bw, &param);
Alexander Bokov70109022017-10-11 15:09:24 -07004730#else // CONFIG_TXMG
Hui Su991dd222017-11-27 16:32:00 -08004731 if (param.is_hbd)
4732 av1_highbd_fwd_txfm(src_diff + col, DCT_coefs, bw, &param);
4733 else
4734 av1_fwd_txfm(src_diff + col, DCT_coefs, bw, &param);
Alexander Bokov70109022017-10-11 15:09:24 -07004735#endif // CONFIG_TXMG
Alexander Bokov8829a242017-08-31 18:07:05 -07004736
Hui Su991dd222017-11-27 16:32:00 -08004737 // Operating on TX domain, not pixels; we want the QTX quantizers
4738 for (int i = 0; i < tx_w * tx_h; ++i) {
4739 uint32_t cur_quantized_coef =
Hui Su3889c6d2017-12-04 17:02:44 -08004740 (100 * (uint32_t)abs(DCT_coefs[i])) / (i ? ac_q : dc_q);
Hui Su00966432017-12-06 11:45:18 -08004741 if (cur_quantized_coef > max_quantized_coef) {
Hui Su991dd222017-11-27 16:32:00 -08004742 max_quantized_coef = cur_quantized_coef;
Hui Su00966432017-12-06 11:45:18 -08004743 if (max_quantized_coef >= max_qcoef_thresh) return 0;
4744 }
Hui Su991dd222017-11-27 16:32:00 -08004745 }
4746 }
4747 src_diff += tx_h * bw;
Alexander Bokov8829a242017-08-31 18:07:05 -07004748 }
Hui Su00966432017-12-06 11:45:18 -08004749 return max_quantized_coef < max_qcoef_thresh;
Alexander Bokov8829a242017-08-31 18:07:05 -07004750}
4751
4752// Used to set proper context for early termination with skip = 1.
4753static void set_skip_flag(const AV1_COMP *cpi, MACROBLOCK *x,
Hui Su3889c6d2017-12-04 17:02:44 -08004754 RD_STATS *rd_stats, int bsize, int64_t dist) {
Alexander Bokov8829a242017-08-31 18:07:05 -07004755 MACROBLOCKD *const xd = &x->e_mbd;
4756 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4757 const int n4 = bsize_to_num_blk(bsize);
Yue Chen0797a202017-10-27 17:24:56 -07004758 const TX_SIZE tx_size = get_max_rect_tx_size(bsize, is_inter_block(mbmi));
Alexander Bokov8829a242017-08-31 18:07:05 -07004759 mbmi->tx_type = DCT_DCT;
Angie Chiang04838132017-11-30 14:25:15 -08004760#if CONFIG_TXK_SEL
4761 memset(mbmi->txk_type, DCT_DCT,
4762 sizeof(mbmi->txk_type[0]) *
4763 (MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)));
4764#endif
Alexander Bokov8829a242017-08-31 18:07:05 -07004765 for (int idy = 0; idy < xd->n8_h; ++idy)
4766 for (int idx = 0; idx < xd->n8_w; ++idx)
4767 mbmi->inter_tx_size[idy][idx] = tx_size;
4768 mbmi->tx_size = tx_size;
4769 mbmi->min_tx_size = get_min_tx_size(tx_size);
4770 memset(x->blk_skip[0], 1, sizeof(uint8_t) * n4);
4771 rd_stats->skip = 1;
4772
Yue Chen171c17d2017-10-16 18:08:22 -07004773 (void)cpi;
4774
Alexander Bokov8829a242017-08-31 18:07:05 -07004775 // Rate.
Debargha Mukherjeeb3eda2f2017-11-28 16:00:20 -08004776 const int tx_size_ctx = get_txsize_entropy_ctx(tx_size);
Alexander Bokov8829a242017-08-31 18:07:05 -07004777 ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE];
4778 ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE];
4779 av1_get_entropy_contexts(bsize, 0, &xd->plane[0], ctxa, ctxl);
Angie Chiang4639e082017-11-30 15:35:45 -08004780#if CONFIG_LV_MAP
4781 TXB_CTX txb_ctx;
4782 // Because plane is 0, plane_bsize equal to bsize
4783 get_txb_ctx(bsize, tx_size, 0, ctxa, ctxl, &txb_ctx);
4784 int rate = x->coeff_costs[tx_size_ctx][PLANE_TYPE_Y]
4785 .txb_skip_cost[txb_ctx.txb_skip_ctx][1];
4786#else
Alexander Bokov8829a242017-08-31 18:07:05 -07004787 int coeff_ctx = get_entropy_context(tx_size, ctxa, ctxl);
4788 int rate = x->token_head_costs[tx_size_ctx][PLANE_TYPE_Y][1][0][coeff_ctx][0];
Angie Chiang4639e082017-11-30 15:35:45 -08004789#endif
Alexander Bokov8829a242017-08-31 18:07:05 -07004790 if (tx_size > TX_4X4) {
4791 int ctx = txfm_partition_context(
4792 xd->above_txfm_context, xd->left_txfm_context, mbmi->sb_type, tx_size);
Yue Chen171c17d2017-10-16 18:08:22 -07004793 rate += x->txfm_partition_cost[ctx][0];
Alexander Bokov8829a242017-08-31 18:07:05 -07004794 }
4795#if !CONFIG_TXK_SEL
Alexander Bokov8829a242017-08-31 18:07:05 -07004796 const AV1_COMMON *cm = &cpi->common;
4797 const int ext_tx_set = get_ext_tx_set(max_txsize_lookup[bsize], bsize, 1,
4798 cm->reduced_tx_set_used);
4799 if (get_ext_tx_types(mbmi->min_tx_size, bsize, 1, cm->reduced_tx_set_used) >
4800 1 &&
4801 !xd->lossless[xd->mi[0]->mbmi.segment_id]) {
4802 if (ext_tx_set > 0)
4803 rate +=
4804 x->inter_tx_type_costs[ext_tx_set][txsize_sqr_map[mbmi->min_tx_size]]
4805 [mbmi->tx_type];
4806 }
Alexander Bokov8829a242017-08-31 18:07:05 -07004807#endif // CONFIG_TXK_SEL
4808 rd_stats->rate = rate;
Alexander Bokov8829a242017-08-31 18:07:05 -07004809#if CONFIG_HIGHBITDEPTH
4810 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
Hui Su3889c6d2017-12-04 17:02:44 -08004811 dist = ROUND_POWER_OF_TWO(dist, (xd->bd - 8) * 2);
Alexander Bokov8829a242017-08-31 18:07:05 -07004812#endif // CONFIG_HIGHBITDEPTH
Hui Su3889c6d2017-12-04 17:02:44 -08004813 rd_stats->dist = rd_stats->sse = (dist << 4);
Alexander Bokov8829a242017-08-31 18:07:05 -07004814}
4815
Angie Chiangb5dda482016-11-02 16:19:58 -07004816static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
Yue Chen25dc0702017-10-18 23:36:06 -07004817 RD_STATS *rd_stats, BLOCK_SIZE bsize, int mi_row,
4818 int mi_col, int64_t ref_best_rd) {
Jingning Han2b0eeb12017-02-23 15:55:37 -08004819 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004820 const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
4821 MACROBLOCKD *const xd = &x->e_mbd;
4822 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
4823 int64_t rd = INT64_MAX;
4824 int64_t best_rd = INT64_MAX;
4825 TX_TYPE tx_type, best_tx_type = DCT_DCT;
4826 const int is_inter = is_inter_block(mbmi);
4827 TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004828 TX_SIZE best_tx = max_txsize_rect_lookup[1][bsize];
Jingning Hane67b38a2016-11-04 10:30:00 -07004829 TX_SIZE best_min_tx_size = TX_SIZES_ALL;
Jingning Han9ca05b72017-01-03 14:41:36 -08004830 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
Jingning Hane3b81bc2017-06-23 11:43:52 -07004831 TX_TYPE txk_start = DCT_DCT;
4832#if CONFIG_TXK_SEL
4833 TX_TYPE txk_end = DCT_DCT + 1;
4834#else
4835 TX_TYPE txk_end = TX_TYPES;
4836#endif
Angie Chiangf1cb0752017-04-10 16:01:20 -07004837 const int n4 = bsize_to_num_blk(bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004838 int idx, idy;
4839 int prune = 0;
Sarah Parker90024e42017-10-06 16:50:47 -07004840 // Get the tx_size 1 level down
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08004841 TX_SIZE min_tx_size = sub_tx_size_map[1][max_txsize_rect_lookup[1][bsize]];
Hui Suddbcde22017-09-18 17:22:02 -07004842 const TxSetType tx_set_type = get_ext_tx_set_type(
Sarah Parker90024e42017-10-06 16:50:47 -07004843 min_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
Jingning Han3de53532017-12-07 13:40:32 -08004844 int within_border = mi_row >= xd->tile.mi_row_start &&
4845 (mi_row + mi_size_high[bsize] < xd->tile.mi_row_end) &&
4846 mi_col >= xd->tile.mi_col_start &&
4847 (mi_col + mi_size_wide[bsize] < xd->tile.mi_col_end);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004848
Angie Chiangc0feea82016-11-03 15:36:18 -07004849 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004850
Hui Su1ddf2312017-08-19 15:21:34 -07004851 const uint32_t hash = get_block_residue_hash(x, bsize);
4852 TX_RD_RECORD *tx_rd_record = &x->tx_rd_record;
4853
Yue Chen25dc0702017-10-18 23:36:06 -07004854 if (ref_best_rd != INT64_MAX && within_border) {
Hui Su1ddf2312017-08-19 15:21:34 -07004855 for (int i = 0; i < tx_rd_record->num; ++i) {
4856 const int index = (tx_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN;
4857 // If there is a match in the tx_rd_record, fetch the RD decision and
4858 // terminate early.
4859 if (tx_rd_record->tx_rd_info[index].hash_value == hash) {
4860 TX_RD_INFO *tx_rd_info = &tx_rd_record->tx_rd_info[index];
4861 fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x);
4862 return;
4863 }
4864 }
4865 }
4866
Alexander Bokov80eedf22017-11-02 12:48:52 -07004867 // If we predict that skip is the optimal RD decision - set the respective
4868 // context and terminate early.
Hui Su3889c6d2017-12-04 17:02:44 -08004869 int64_t dist;
Alexander Bokov80eedf22017-11-02 12:48:52 -07004870 if (is_inter && cpi->sf.tx_type_search.use_skip_flag_prediction &&
Hui Su3889c6d2017-12-04 17:02:44 -08004871 predict_skip_flag(x, bsize, &dist)) {
4872 set_skip_flag(cpi, x, rd_stats, bsize, dist);
Hui Su89ef4932017-11-28 10:54:31 -08004873 // Save the RD search results into tx_rd_record.
4874 if (within_border) save_tx_rd_info(n4, hash, x, rd_stats, tx_rd_record);
Alexander Bokov80eedf22017-11-02 12:48:52 -07004875 return;
Alexander Bokov8829a242017-08-31 18:07:05 -07004876 }
4877
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004878 // Precompute residual hashes and find existing or add new RD records to
4879 // store and reuse rate and distortion values to speed up TX size search.
4880 TX_SIZE_RD_INFO_NODE matched_rd_info[16 + 64 + 256];
4881 int found_rd_info = 0;
4882 if (ref_best_rd != INT64_MAX && within_border) {
4883 found_rd_info =
4884 find_tx_size_rd_records(x, bsize, mi_row, mi_col, matched_rd_info);
4885 }
4886
Alexander Bokov0c7eb102017-09-07 18:49:00 -07004887 if (is_inter && cpi->sf.tx_type_search.prune_mode > NO_PRUNE &&
4888 !x->use_default_inter_tx_type && !xd->lossless[mbmi->segment_id]) {
Alexander Bokov79a37242017-09-29 11:25:55 -07004889 prune = prune_tx(cpi, bsize, x, xd, tx_set_type,
4890 cpi->sf.tx_type_search.use_tx_size_pruning);
Alexander Bokov0c7eb102017-09-07 18:49:00 -07004891 }
Alexander Bokov8829a242017-08-31 18:07:05 -07004892
Rupert Swarbrickde2ea942017-10-09 15:21:21 +01004893 int found = 0;
4894
Alexander Bokov79a37242017-09-29 11:25:55 -07004895 int tx_split_prune_flag = 0;
4896 if (is_inter && cpi->sf.tx_type_search.prune_mode >= PRUNE_2D_ACCURATE)
4897 tx_split_prune_flag = ((prune >> TX_TYPES) & 1);
4898
Jingning Hane3b81bc2017-06-23 11:43:52 -07004899 for (tx_type = txk_start; tx_type < txk_end; ++tx_type) {
Angie Chiangb5dda482016-11-02 16:19:58 -07004900 RD_STATS this_rd_stats;
Angie Chiangc0feea82016-11-03 15:36:18 -07004901 av1_init_rd_stats(&this_rd_stats);
Hui Suddbcde22017-09-18 17:22:02 -07004902 if (!av1_ext_tx_used[tx_set_type][tx_type]) continue;
Sarah Parker90024e42017-10-06 16:50:47 -07004903 (void)prune;
Sebastien Alaiwan3bac9922017-11-02 12:34:41 +01004904 // TODO(sarahparker) This speed feature has been temporarily disabled
4905 // with ext-tx because it is not compatible with the current
4906 // search method. It will be fixed in a followup.
4907 /*
4908 if (is_inter) {
4909 if (cpi->sf.tx_type_search.prune_mode > NO_PRUNE) {
4910 if (!do_tx_type_search(tx_type, prune,
4911 cpi->sf.tx_type_search.prune_mode))
4912 continue;
4913 }
4914 } else {
4915 if (!ALLOW_INTRA_EXT_TX && bsize >= BLOCK_8X8) {
4916 if (tx_type != intra_mode_to_tx_type_context[mbmi->mode]) continue;
4917 }
4918 }
4919 */
Yaowu Xuc27fc142016-08-22 16:08:15 -07004920 if (is_inter && x->use_default_inter_tx_type &&
4921 tx_type != get_default_tx_type(0, xd, 0, max_tx_size))
4922 continue;
4923
Jingning Hane67b38a2016-11-04 10:30:00 -07004924 if (xd->lossless[mbmi->segment_id])
4925 if (tx_type != DCT_DCT) continue;
4926
Debargha Mukherjee51666862017-10-24 14:29:13 -07004927 rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, mi_row, mi_col,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07004928 ref_best_rd, tx_type, tx_split_prune_flag,
4929 found_rd_info ? matched_rd_info : NULL);
Sarah Parker90024e42017-10-06 16:50:47 -07004930 // If the current tx_type is not included in the tx_set for the smallest
4931 // tx size found, then all vartx partitions were actually transformed with
4932 // DCT_DCT and we should avoid picking it.
4933 const TxSetType min_tx_set_type = get_ext_tx_set_type(
4934 mbmi->min_tx_size, bsize, is_inter, cm->reduced_tx_set_used);
4935 if (!av1_ext_tx_used[min_tx_set_type][tx_type]) continue;
Sarah Parker90024e42017-10-06 16:50:47 -07004936
Hui Suda816a12017-08-18 14:46:02 -07004937 ref_best_rd = AOMMIN(rd, ref_best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004938 if (rd < best_rd) {
4939 best_rd = rd;
Angie Chiangb5dda482016-11-02 16:19:58 -07004940 *rd_stats = this_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004941 best_tx_type = mbmi->tx_type;
4942 best_tx = mbmi->tx_size;
Jingning Hane67b38a2016-11-04 10:30:00 -07004943 best_min_tx_size = mbmi->min_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004944 memcpy(best_blk_skip, x->blk_skip[0], sizeof(best_blk_skip[0]) * n4);
Rupert Swarbrickde2ea942017-10-09 15:21:21 +01004945 found = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004946 for (idy = 0; idy < xd->n8_h; ++idy)
4947 for (idx = 0; idx < xd->n8_w; ++idx)
4948 best_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
4949 }
4950 }
4951
Rupert Swarbrickde2ea942017-10-09 15:21:21 +01004952 // We should always find at least one candidate unless ref_best_rd is less
4953 // than INT64_MAX (in which case, all the calls to select_tx_size_fix_type
4954 // might have failed to find something better)
4955 assert(IMPLIES(!found, ref_best_rd != INT64_MAX));
4956 if (!found) return;
4957
4958 // We found a candidate transform to use. Copy our results from the "best"
4959 // array into mbmi.
Yaowu Xuc27fc142016-08-22 16:08:15 -07004960 mbmi->tx_type = best_tx_type;
4961 for (idy = 0; idy < xd->n8_h; ++idy)
4962 for (idx = 0; idx < xd->n8_w; ++idx)
4963 mbmi->inter_tx_size[idy][idx] = best_tx_size[idy][idx];
4964 mbmi->tx_size = best_tx;
Jingning Hane67b38a2016-11-04 10:30:00 -07004965 mbmi->min_tx_size = best_min_tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004966 memcpy(x->blk_skip[0], best_blk_skip, sizeof(best_blk_skip[0]) * n4);
Hui Su1ddf2312017-08-19 15:21:34 -07004967
4968 // Save the RD search results into tx_rd_record.
Hui Su89ef4932017-11-28 10:54:31 -08004969 if (within_border) save_tx_rd_info(n4, hash, x, rd_stats, tx_rd_record);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004970}
4971
Yaowu Xuf883b422016-08-30 14:01:10 -07004972static void tx_block_rd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row,
Yaowu Xuc27fc142016-08-22 16:08:15 -07004973 int blk_col, int plane, int block, TX_SIZE tx_size,
4974 BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx,
Debargha Mukherjee51666862017-10-24 14:29:13 -07004975 ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats,
4976 int fast) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07004977 MACROBLOCKD *const xd = &x->e_mbd;
4978 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07004979 struct macroblockd_plane *const pd = &xd->plane[plane];
4980 BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
4981 const int tx_row = blk_row >> (1 - pd->subsampling_y);
4982 const int tx_col = blk_col >> (1 - pd->subsampling_x);
4983 TX_SIZE plane_tx_size;
Jingning Han18482fe2016-11-02 17:01:58 -07004984 const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
4985 const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004986
Jingning Hand3fada82016-11-22 10:46:55 -08004987 assert(tx_size < TX_SIZES_ALL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07004988
Yaowu Xuc27fc142016-08-22 16:08:15 -07004989 if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
4990
Debargha Mukherjee2f123402016-08-30 17:43:38 -07004991 plane_tx_size =
4992 plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0]
4993 : mbmi->inter_tx_size[tx_row][tx_col];
Yaowu Xuc27fc142016-08-22 16:08:15 -07004994
Debargha Mukherjee891a8772017-11-22 10:09:37 -08004995 if (tx_size == plane_tx_size
4996#if DISABLE_VARTX_FOR_CHROMA
4997 || pd->subsampling_x || pd->subsampling_y
4998#endif // DISABLE_VARTX_FOR_CHROMA
4999 ) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005000 ENTROPY_CONTEXT *ta = above_ctx + blk_col;
5001 ENTROPY_CONTEXT *tl = left_ctx + blk_row;
Yaowu Xuf883b422016-08-30 14:01:10 -07005002 av1_tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
Alexander Bokovc5ddf062017-10-17 16:41:46 -07005003 plane_bsize, ta, tl, rd_stats, fast, NULL);
Jingning Han328d57b2017-07-07 14:40:17 -07005004 av1_set_txb_context(x, plane, block, tx_size, ta, tl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005005 } else {
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08005006 const TX_SIZE sub_txs = sub_tx_size_map[1][tx_size];
Debargha Mukherjee5577bd12017-11-20 16:04:26 -08005007 assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size));
5008 assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size));
5009 const int bsw = tx_size_wide_unit[sub_txs];
5010 const int bsh = tx_size_high_unit[sub_txs];
5011 const int step = bsh * bsw;
5012 assert(bsw > 0 && bsh > 0);
5013 for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
5014 for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
5015 const int offsetr = blk_row + row;
5016 const int offsetc = blk_col + col;
5017 if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
5018 tx_block_rd(cpi, x, offsetr, offsetc, plane, block, sub_txs,
5019 plane_bsize, above_ctx, left_ctx, rd_stats, fast);
5020 block += step;
5021 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005022 }
5023 }
5024}
5025
5026// Return value 0: early termination triggered, no valid rd cost available;
5027// 1: rd cost values are valid.
Debargha Mukherjee51666862017-10-24 14:29:13 -07005028int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_stats,
5029 BLOCK_SIZE bsize, int64_t ref_best_rd, int fast) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005030 MACROBLOCKD *const xd = &x->e_mbd;
5031 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5032 int plane;
5033 int is_cost_valid = 1;
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08005034 int64_t this_rd = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005035
5036 if (ref_best_rd < 0) is_cost_valid = 0;
5037
Angie Chiangc0feea82016-11-03 15:36:18 -07005038 av1_init_rd_stats(rd_stats);
Yue Chena1e48dc2016-08-29 17:29:33 -07005039
Jingning Han9ce464c2017-02-20 15:36:30 -08005040 if (x->skip_chroma_rd) return is_cost_valid;
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005041 const BLOCK_SIZE bsizec = scale_chroma_bsize(
5042 bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y);
Jingning Han9ce464c2017-02-20 15:36:30 -08005043
Yaowu Xuc27fc142016-08-22 16:08:15 -07005044 if (is_inter_block(mbmi) && is_cost_valid) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005045 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005046 av1_subtract_plane(x, bsizec, plane);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005047 }
5048
Debargha Mukherjee51666862017-10-24 14:29:13 -07005049 if (is_cost_valid) {
5050 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
5051 const struct macroblockd_plane *const pd = &xd->plane[plane];
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005052 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsizec, pd);
Debargha Mukherjee51666862017-10-24 14:29:13 -07005053 const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
5054 const int mi_height =
5055 block_size_high[plane_bsize] >> tx_size_high_log2[0];
Debargha Mukherjee19619882017-11-22 13:13:14 -08005056 TX_SIZE max_tx_size = get_vartx_max_txsize(
Debargha Mukherjee891a8772017-11-22 10:09:37 -08005057 xd, plane_bsize, pd->subsampling_x || pd->subsampling_y);
Debargha Mukherjee19619882017-11-22 13:13:14 -08005058#if DISABLE_VARTX_FOR_CHROMA == 2
5059 // If the luma transform size is split at least one level, split the
5060 // chroma by one level. Otherwise use the largest possible trasnform size
5061 // for chroma.
5062 if (pd->subsampling_x || pd->subsampling_y) {
5063 const TX_SIZE l_max_tx_size = get_vartx_max_txsize(xd, bsizec, 0);
5064 const int is_split =
5065 (l_max_tx_size != mbmi->inter_tx_size[0][0] && bsize == bsizec &&
5066 txsize_to_bsize[l_max_tx_size] == bsizec);
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -08005067 if (is_split) max_tx_size = sub_tx_size_map[1][max_tx_size];
Debargha Mukherjee19619882017-11-22 13:13:14 -08005068 }
5069#endif // DISABLE_VARTX_FOR_CHROMA == 2
Debargha Mukherjee51666862017-10-24 14:29:13 -07005070 const int bh = tx_size_high_unit[max_tx_size];
5071 const int bw = tx_size_wide_unit[max_tx_size];
5072 int idx, idy;
5073 int block = 0;
5074 const int step = bh * bw;
5075 ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE];
5076 ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE];
5077 RD_STATS pn_rd_stats;
5078 av1_init_rd_stats(&pn_rd_stats);
Debargha Mukherjee3aa28112017-11-25 07:03:31 -08005079 av1_get_entropy_contexts(bsizec, 0, pd, ta, tl);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005080
Debargha Mukherjee51666862017-10-24 14:29:13 -07005081 for (idy = 0; idy < mi_height; idy += bh) {
5082 for (idx = 0; idx < mi_width; idx += bw) {
5083 tx_block_rd(cpi, x, idy, idx, plane, block, max_tx_size, plane_bsize,
5084 ta, tl, &pn_rd_stats, fast);
5085 block += step;
5086 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005087 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005088
Debargha Mukherjee51666862017-10-24 14:29:13 -07005089 if (pn_rd_stats.rate == INT_MAX) {
5090 is_cost_valid = 0;
5091 break;
5092 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005093
Debargha Mukherjee51666862017-10-24 14:29:13 -07005094 av1_merge_rd_stats(rd_stats, &pn_rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005095
Debargha Mukherjee51666862017-10-24 14:29:13 -07005096 this_rd = AOMMIN(RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist),
Debargha Mukherjee4d8c6fc2017-12-07 11:25:18 -08005097 RDCOST(x->rdmult, rd_stats->zero_rate, rd_stats->sse));
Yaowu Xuc27fc142016-08-22 16:08:15 -07005098
Debargha Mukherjee51666862017-10-24 14:29:13 -07005099 if (this_rd > ref_best_rd) {
5100 is_cost_valid = 0;
5101 break;
5102 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005103 }
5104 }
5105
5106 if (!is_cost_valid) {
5107 // reset cost value
Angie Chiangc0feea82016-11-03 15:36:18 -07005108 av1_invalid_rd_stats(rd_stats);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005109 }
5110
5111 return is_cost_valid;
5112}
Yaowu Xuc27fc142016-08-22 16:08:15 -07005113
hui su83c26632017-01-24 17:19:06 -08005114static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
5115 int dc_mode_cost,
5116 uint8_t *best_palette_color_map,
5117 MB_MODE_INFO *const best_mbmi,
5118 int64_t *best_rd, int *rate,
5119 int *rate_tokenonly, int64_t *distortion,
5120 int *skippable) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005121 MACROBLOCKD *const xd = &x->e_mbd;
5122 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005123 assert(!is_inter_block(mbmi));
hui sude0c70a2017-01-09 17:12:17 -08005124 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005125 const BLOCK_SIZE bsize = mbmi->sb_type;
Urvang Joshic9e71d42017-08-09 18:58:33 -07005126 assert(bsize >= BLOCK_8X8);
Angie Chiang284d7772016-11-08 11:06:45 -08005127 int this_rate;
5128 int64_t this_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005129 int colors_u, colors_v, colors;
5130 const int src_stride = x->plane[1].src.stride;
5131 const uint8_t *const src_u = x->plane[1].src.buf;
5132 const uint8_t *const src_v = x->plane[2].src.buf;
hui sude0c70a2017-01-09 17:12:17 -08005133 uint8_t *const color_map = xd->plane[1].color_index_map;
Angie Chiang284d7772016-11-08 11:06:45 -08005134 RD_STATS tokenonly_rd_stats;
Urvang Joshi56ba91b2017-01-10 13:22:09 -08005135 int plane_block_width, plane_block_height, rows, cols;
5136 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
5137 &plane_block_height, &rows, &cols);
Hui Su473cf892017-11-08 18:14:31 -08005138 if (rows * cols > MAX_PALETTE_SQUARE) return;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005139
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005140 mbmi->uv_mode = UV_DC_PRED;
hui su5db97432016-10-14 16:10:14 -07005141#if CONFIG_FILTER_INTRA
5142 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
5143#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07005144
Hui Su4d51bed2017-11-29 15:52:40 -08005145 int count_buf[1 << 12]; // Maximum (1 << 12) color levels.
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005146#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005147 if (cpi->common.use_highbitdepth) {
Yaowu Xuf883b422016-08-30 14:01:10 -07005148 colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols,
Hui Su4d51bed2017-11-29 15:52:40 -08005149 cpi->common.bit_depth, count_buf);
Yaowu Xuf883b422016-08-30 14:01:10 -07005150 colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols,
Hui Su4d51bed2017-11-29 15:52:40 -08005151 cpi->common.bit_depth, count_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005152 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005153#endif // CONFIG_HIGHBITDEPTH
Hui Su4d51bed2017-11-29 15:52:40 -08005154 colors_u = av1_count_colors(src_u, src_stride, rows, cols, count_buf);
5155 colors_v = av1_count_colors(src_v, src_stride, rows, cols, count_buf);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005156#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005157 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005158#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005159
hui su33567b22017-04-30 16:40:19 -07005160#if CONFIG_PALETTE_DELTA_ENCODING
hui su33567b22017-04-30 16:40:19 -07005161 uint16_t color_cache[2 * PALETTE_MAX_SIZE];
Hui Su3748bc22017-08-23 11:30:41 -07005162 const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
hui su33567b22017-04-30 16:40:19 -07005163#endif // CONFIG_PALETTE_DELTA_ENCODING
5164
Yaowu Xuc27fc142016-08-22 16:08:15 -07005165 colors = colors_u > colors_v ? colors_u : colors_v;
5166 if (colors > 1 && colors <= 64) {
Hui Suc3769e52017-11-16 23:13:42 -08005167 aom_clear_system_state();
Yaowu Xuc27fc142016-08-22 16:08:15 -07005168 int r, c, n, i, j;
5169 const int max_itr = 50;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005170 float lb_u, ub_u, val_u;
5171 float lb_v, ub_v, val_v;
5172 float *const data = x->palette_buffer->kmeans_data_buf;
5173 float centroids[2 * PALETTE_MAX_SIZE];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005174
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005175#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005176 uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u);
5177 uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v);
5178 if (cpi->common.use_highbitdepth) {
5179 lb_u = src_u16[0];
5180 ub_u = src_u16[0];
5181 lb_v = src_v16[0];
5182 ub_v = src_v16[0];
5183 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005184#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005185 lb_u = src_u[0];
5186 ub_u = src_u[0];
5187 lb_v = src_v[0];
5188 ub_v = src_v[0];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005189#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005190 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005191#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005192
Yaowu Xuc27fc142016-08-22 16:08:15 -07005193 for (r = 0; r < rows; ++r) {
5194 for (c = 0; c < cols; ++c) {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005195#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005196 if (cpi->common.use_highbitdepth) {
5197 val_u = src_u16[r * src_stride + c];
5198 val_v = src_v16[r * src_stride + c];
5199 data[(r * cols + c) * 2] = val_u;
5200 data[(r * cols + c) * 2 + 1] = val_v;
5201 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005202#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005203 val_u = src_u[r * src_stride + c];
5204 val_v = src_v[r * src_stride + c];
5205 data[(r * cols + c) * 2] = val_u;
5206 data[(r * cols + c) * 2 + 1] = val_v;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005207#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005208 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005209#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005210 if (val_u < lb_u)
5211 lb_u = val_u;
5212 else if (val_u > ub_u)
5213 ub_u = val_u;
5214 if (val_v < lb_v)
5215 lb_v = val_v;
5216 else if (val_v > ub_v)
5217 ub_v = val_v;
5218 }
5219 }
5220
5221 for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2;
5222 --n) {
5223 for (i = 0; i < n; ++i) {
5224 centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2;
5225 centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2;
5226 }
Yaowu Xuf883b422016-08-30 14:01:10 -07005227 av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
hui sud13c24a2017-04-07 16:13:07 -07005228#if CONFIG_PALETTE_DELTA_ENCODING
hui su33567b22017-04-30 16:40:19 -07005229 optimize_palette_colors(color_cache, n_cache, n, 2, centroids);
hui sud13c24a2017-04-07 16:13:07 -07005230 // Sort the U channel colors in ascending order.
5231 for (i = 0; i < 2 * (n - 1); i += 2) {
5232 int min_idx = i;
5233 float min_val = centroids[i];
5234 for (j = i + 2; j < 2 * n; j += 2)
5235 if (centroids[j] < min_val) min_val = centroids[j], min_idx = j;
5236 if (min_idx != i) {
5237 float temp_u = centroids[i], temp_v = centroids[i + 1];
5238 centroids[i] = centroids[min_idx];
5239 centroids[i + 1] = centroids[min_idx + 1];
5240 centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v;
5241 }
5242 }
5243 av1_calc_indices(data, centroids, color_map, rows * cols, n, 2);
5244#endif // CONFIG_PALETTE_DELTA_ENCODING
Urvang Joshi56ba91b2017-01-10 13:22:09 -08005245 extend_palette_color_map(color_map, cols, rows, plane_block_width,
5246 plane_block_height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005247 pmi->palette_size[1] = n;
5248 for (i = 1; i < 3; ++i) {
5249 for (j = 0; j < n; ++j) {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005250#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005251 if (cpi->common.use_highbitdepth)
5252 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd(
5253 (int)centroids[j * 2 + i - 1], cpi->common.bit_depth);
5254 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005255#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005256 pmi->palette_colors[i * PALETTE_MAX_SIZE + j] =
5257 clip_pixel((int)centroids[j * 2 + i - 1]);
5258 }
5259 }
5260
Angie Chiang284d7772016-11-08 11:06:45 -08005261 super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd);
5262 if (tokenonly_rd_stats.rate == INT_MAX) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005263 this_rate =
Angie Chiang284d7772016-11-08 11:06:45 -08005264 tokenonly_rd_stats.rate + dc_mode_cost +
Yue Chenb23d00a2017-07-28 17:01:21 -07005265 x->palette_uv_size_cost[bsize - BLOCK_8X8][n - PALETTE_MIN_SIZE] +
Yaowu Xuc27fc142016-08-22 16:08:15 -07005266 write_uniform_cost(n, color_map[0]) +
Yue Chendab2ca92017-10-16 17:48:48 -07005267 x->palette_uv_mode_cost[pmi->palette_size[0] > 0][1];
hui su33567b22017-04-30 16:40:19 -07005268 this_rate += av1_palette_color_cost_uv(pmi,
5269#if CONFIG_PALETTE_DELTA_ENCODING
5270 color_cache, n_cache,
5271#endif // CONFIG_PALETTE_DELTA_ENCODING
5272 cpi->common.bit_depth);
Sarah Parker99e7daa2017-08-29 10:30:13 -07005273 this_rate +=
5274 av1_cost_color_map(x, 1, 0, bsize, mbmi->tx_size, PALETTE_MAP);
Urvang Joshi70006e42017-06-14 16:08:55 -07005275 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005276 if (this_rd < *best_rd) {
5277 *best_rd = this_rd;
hui su83c26632017-01-24 17:19:06 -08005278 *best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005279 memcpy(best_palette_color_map, color_map,
Urvang Joshi56ba91b2017-01-10 13:22:09 -08005280 plane_block_width * plane_block_height *
5281 sizeof(best_palette_color_map[0]));
Yaowu Xuc27fc142016-08-22 16:08:15 -07005282 *rate = this_rate;
Angie Chiang284d7772016-11-08 11:06:45 -08005283 *distortion = tokenonly_rd_stats.dist;
5284 *rate_tokenonly = tokenonly_rd_stats.rate;
5285 *skippable = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005286 }
5287 }
5288 }
hui su83c26632017-01-24 17:19:06 -08005289 if (best_mbmi->palette_mode_info.palette_size[1] > 0) {
hui sude0c70a2017-01-09 17:12:17 -08005290 memcpy(color_map, best_palette_color_map,
Luc Trudeau0401e892017-08-31 00:37:11 -04005291 plane_block_width * plane_block_height *
5292 sizeof(best_palette_color_map[0]));
hui sude0c70a2017-01-09 17:12:17 -08005293 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005294}
5295
hui su5db97432016-10-14 16:10:14 -07005296#if CONFIG_EXT_INTRA
hui su45dc5972016-12-08 17:42:50 -08005297// Run RD calculation with given chroma intra prediction angle., and return
5298// the RD cost. Update the best mode info. if the RD cost is the best so far.
5299static int64_t pick_intra_angle_routine_sbuv(
5300 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
5301 int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats,
5302 int *best_angle_delta, int64_t *best_rd) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005303 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005304 assert(!is_inter_block(mbmi));
Angie Chiang284d7772016-11-08 11:06:45 -08005305 int this_rate;
5306 int64_t this_rd;
5307 RD_STATS tokenonly_rd_stats;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005308
hui su45dc5972016-12-08 17:42:50 -08005309 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in))
5310 return INT64_MAX;
Angie Chiang284d7772016-11-08 11:06:45 -08005311 this_rate = tokenonly_rd_stats.rate + rate_overhead;
Joe Young3ca43bf2017-10-06 15:12:46 -07005312#if CONFIG_EXT_INTRA_MOD
5313 this_rate += x->angle_delta_cost[mbmi->uv_mode - V_PRED]
5314 [mbmi->angle_delta[1] + MAX_ANGLE_DELTA];
5315#endif // CONFIG_EXT_INTRA_MOD
Urvang Joshi70006e42017-06-14 16:08:55 -07005316 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005317 if (this_rd < *best_rd) {
5318 *best_rd = this_rd;
5319 *best_angle_delta = mbmi->angle_delta[1];
5320 *rate = this_rate;
hui su45dc5972016-12-08 17:42:50 -08005321 rd_stats->rate = tokenonly_rd_stats.rate;
5322 rd_stats->dist = tokenonly_rd_stats.dist;
5323 rd_stats->skip = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005324 }
hui su45dc5972016-12-08 17:42:50 -08005325 return this_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005326}
5327
hui su45dc5972016-12-08 17:42:50 -08005328// With given chroma directional intra prediction mode, pick the best angle
5329// delta. Return true if a RD cost that is smaller than the input one is found.
Urvang Joshi52648442016-10-13 17:27:51 -07005330static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
Urvang Joshi52648442016-10-13 17:27:51 -07005331 BLOCK_SIZE bsize, int rate_overhead,
hui su45dc5972016-12-08 17:42:50 -08005332 int64_t best_rd, int *rate,
5333 RD_STATS *rd_stats) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005334 MACROBLOCKD *const xd = &x->e_mbd;
5335 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005336 assert(!is_inter_block(mbmi));
hui su45dc5972016-12-08 17:42:50 -08005337 int i, angle_delta, best_angle_delta = 0;
hui su0a6731f2017-04-26 15:23:47 -07005338 int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005339
hui su45dc5972016-12-08 17:42:50 -08005340 rd_stats->rate = INT_MAX;
5341 rd_stats->skip = 0;
5342 rd_stats->dist = INT64_MAX;
hui su0a6731f2017-04-26 15:23:47 -07005343 for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005344
hui su0a6731f2017-04-26 15:23:47 -07005345 for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
hui su45dc5972016-12-08 17:42:50 -08005346 for (i = 0; i < 2; ++i) {
5347 best_rd_in = (best_rd == INT64_MAX)
5348 ? INT64_MAX
5349 : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5)));
5350 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
5351 this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead,
5352 best_rd_in, rate, rd_stats,
5353 &best_angle_delta, &best_rd);
5354 rd_cost[2 * angle_delta + i] = this_rd;
5355 if (angle_delta == 0) {
5356 if (this_rd == INT64_MAX) return 0;
5357 rd_cost[1] = this_rd;
5358 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005359 }
5360 }
hui su45dc5972016-12-08 17:42:50 -08005361 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005362
hui su45dc5972016-12-08 17:42:50 -08005363 assert(best_rd != INT64_MAX);
hui su0a6731f2017-04-26 15:23:47 -07005364 for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
hui su45dc5972016-12-08 17:42:50 -08005365 int64_t rd_thresh;
5366 for (i = 0; i < 2; ++i) {
5367 int skip_search = 0;
5368 rd_thresh = best_rd + (best_rd >> 5);
5369 if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
5370 rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
5371 skip_search = 1;
5372 if (!skip_search) {
5373 mbmi->angle_delta[1] = (1 - 2 * i) * angle_delta;
Yue Chenb0f808b2017-04-26 11:55:14 -07005374 pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd,
5375 rate, rd_stats, &best_angle_delta,
5376 &best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005377 }
5378 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005379 }
5380
5381 mbmi->angle_delta[1] = best_angle_delta;
hui su45dc5972016-12-08 17:42:50 -08005382 return rd_stats->rate != INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005383}
5384#endif // CONFIG_EXT_INTRA
5385
David Michael Barr2510f642017-07-11 23:39:20 +09005386#if CONFIG_CFL
David Michael Barr1f8d0952017-10-11 17:46:39 +09005387static void txfm_rd_in_plane_once(MACROBLOCK *const x,
5388 const AV1_COMP *const cpi, BLOCK_SIZE bsize,
David Michael Barr91345862017-10-14 22:20:36 +09005389 TX_SIZE tx_size, int plane, int64_t best_rd,
5390 int64_t *dist, int *rate) {
David Michael Barr1f8d0952017-10-11 17:46:39 +09005391 RD_STATS rd_stats;
5392 av1_init_rd_stats(&rd_stats);
David Michael Barr91345862017-10-14 22:20:36 +09005393 txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane, bsize, tx_size,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005394 cpi->sf.use_fast_coef_costing);
5395 *dist = rd_stats.dist;
5396 *rate = rd_stats.rate;
Luc Trudeau056d1f42017-09-15 17:38:14 -04005397}
David Michael Barr2510f642017-07-11 23:39:20 +09005398
David Michael Barr1f8d0952017-10-11 17:46:39 +09005399static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
David Michael Barr91345862017-10-14 22:20:36 +09005400 BLOCK_SIZE bsize, TX_SIZE tx_size,
5401 int64_t best_rd) {
David Michael Barr2510f642017-07-11 23:39:20 +09005402 MACROBLOCKD *const xd = &x->e_mbd;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005403 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
5404 bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
5405 xd->plane[AOM_PLANE_U].subsampling_y);
David Michael Barr2510f642017-07-11 23:39:20 +09005406
David Michael Barr1f8d0952017-10-11 17:46:39 +09005407 int rates[CFL_PRED_PLANES][CFL_MAGS_SIZE];
5408 int64_t dists[CFL_PRED_PLANES][CFL_MAGS_SIZE];
5409 mbmi->cfl_alpha_idx = 0;
5410 mbmi->cfl_alpha_signs = CFL_SIGN_ZERO * CFL_SIGNS + CFL_SIGN_POS - 1;
David Michael Barr91345862017-10-14 22:20:36 +09005411 txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U, best_rd,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005412 &dists[CFL_PRED_U][0], &rates[CFL_PRED_U][0]);
5413 mbmi->cfl_alpha_signs = CFL_SIGN_POS * CFL_SIGNS + CFL_SIGN_ZERO - 1;
David Michael Barr91345862017-10-14 22:20:36 +09005414 txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V, best_rd,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005415 &dists[CFL_PRED_V][0], &rates[CFL_PRED_V][0]);
David Michael Barr2510f642017-07-11 23:39:20 +09005416
David Michael Barrf6eaa152017-07-19 19:42:28 +09005417 for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
David Michael Barr1f8d0952017-10-11 17:46:39 +09005418 mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c;
5419 for (int sign = CFL_SIGN_NEG; sign < CFL_SIGNS; sign++) {
5420 const int m = c * 2 + 1 + (sign == CFL_SIGN_NEG);
5421 mbmi->cfl_alpha_signs = sign * CFL_SIGNS + sign - 1;
David Michael Barr91345862017-10-14 22:20:36 +09005422 txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U, best_rd,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005423 &dists[CFL_PRED_U][m], &rates[CFL_PRED_U][m]);
David Michael Barr91345862017-10-14 22:20:36 +09005424 txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V, best_rd,
David Michael Barr1f8d0952017-10-11 17:46:39 +09005425 &dists[CFL_PRED_V][m], &rates[CFL_PRED_V][m]);
5426 }
David Michael Barr2510f642017-07-11 23:39:20 +09005427 }
5428
Luc Trudeau4c5df102017-07-08 14:43:27 -04005429 int64_t dist;
David Michael Barr2510f642017-07-11 23:39:20 +09005430 int64_t cost;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005431 int64_t best_cost = INT64_MAX;
David Michael Barr91345862017-10-14 22:20:36 +09005432 int best_rate_overhead = INT_MAX;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005433#if CONFIG_DEBUG
David Michael Barr91345862017-10-14 22:20:36 +09005434 int best_rate = INT_MAX;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005435#endif // CONFIG_DEBUG
David Michael Barr2510f642017-07-11 23:39:20 +09005436
David Michael Barr2510f642017-07-11 23:39:20 +09005437 int ind = 0;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005438 int signs = 0;
David Michael Barr2510f642017-07-11 23:39:20 +09005439
David Michael Barrf6eaa152017-07-19 19:42:28 +09005440 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
5441 const int sign_u = CFL_SIGN_U(joint_sign);
5442 const int sign_v = CFL_SIGN_V(joint_sign);
5443 const int size_u = (sign_u == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
5444 const int size_v = (sign_v == CFL_SIGN_ZERO) ? 1 : CFL_ALPHABET_SIZE;
5445 for (int u = 0; u < size_u; u++) {
David Michael Barr91345862017-10-14 22:20:36 +09005446 const int idx_u = ((sign_u == CFL_SIGN_ZERO) ? 0 : u * 2 + 1) +
5447 (sign_u == CFL_SIGN_NEG);
5448 if (rates[CFL_PRED_U][idx_u] == INT_MAX) continue;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005449 for (int v = 0; v < size_v; v++) {
David Michael Barr91345862017-10-14 22:20:36 +09005450 const int idx_v = ((sign_v == CFL_SIGN_ZERO) ? 0 : v * 2 + 1) +
5451 (sign_v == CFL_SIGN_NEG);
5452 if (rates[CFL_PRED_V][idx_v] == INT_MAX) continue;
5453 dist = dists[CFL_PRED_U][idx_u] + dists[CFL_PRED_V][idx_v];
David Michael Barr1f8d0952017-10-11 17:46:39 +09005454 int rate_overhead = x->cfl_cost[joint_sign][CFL_PRED_U][u] +
5455 x->cfl_cost[joint_sign][CFL_PRED_V][v];
5456 int rate = x->intra_uv_mode_cost[mbmi->mode][UV_CFL_PRED] +
David Michael Barr91345862017-10-14 22:20:36 +09005457 rate_overhead + rates[CFL_PRED_U][idx_u] +
5458 rates[CFL_PRED_V][idx_v];
David Michael Barrf6eaa152017-07-19 19:42:28 +09005459 cost = RDCOST(x->rdmult, rate, dist);
David Michael Barr2510f642017-07-11 23:39:20 +09005460 if (cost < best_cost) {
5461 best_cost = cost;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005462 best_rate_overhead = rate_overhead;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005463 ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
5464 signs = joint_sign;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005465#if CONFIG_DEBUG
5466 best_rate = rate;
5467#endif // CONFIG_DEBUG
David Michael Barr2510f642017-07-11 23:39:20 +09005468 }
5469 }
5470 }
5471 }
5472
5473 mbmi->cfl_alpha_idx = ind;
David Michael Barrf6eaa152017-07-19 19:42:28 +09005474 mbmi->cfl_alpha_signs = signs;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005475#if CONFIG_DEBUG
Luc Trudeau1e84af52017-11-25 15:00:28 -05005476 xd->cfl.rate = best_rate;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005477#endif // CONFIG_DEBUG
5478 return best_rate_overhead;
David Michael Barr2510f642017-07-11 23:39:20 +09005479}
5480#endif // CONFIG_CFL
5481
hui sueaddeee2017-05-30 12:19:38 -07005482static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005483 mbmi->uv_mode = UV_DC_PRED;
hui sueaddeee2017-05-30 12:19:38 -07005484 mbmi->palette_mode_info.palette_size[1] = 0;
hui sueaddeee2017-05-30 12:19:38 -07005485#if CONFIG_FILTER_INTRA
5486 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
5487#endif // CONFIG_FILTER_INTRA
5488}
5489
Urvang Joshi52648442016-10-13 17:27:51 -07005490static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
5491 int *rate, int *rate_tokenonly,
5492 int64_t *distortion, int *skippable,
5493 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005494 MACROBLOCKD *xd = &x->e_mbd;
5495 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Urvang Joshi330aec82017-05-08 15:37:42 -07005496 assert(!is_inter_block(mbmi));
hui su83c26632017-01-24 17:19:06 -08005497 MB_MODE_INFO best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005498 int64_t best_rd = INT64_MAX, this_rd;
hui sude0c70a2017-01-09 17:12:17 -08005499 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
Hui Sue87fb232017-10-05 15:00:15 -07005500 const int try_palette =
5501 av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type);
hui su5db97432016-10-14 16:10:14 -07005502
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005503 for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) {
hui su8a516a82017-07-06 10:00:36 -07005504 int this_rate;
5505 RD_STATS tokenonly_rd_stats;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005506 UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx];
hui su83c26632017-01-24 17:19:06 -08005507#if CONFIG_EXT_INTRA
5508 const int is_directional_mode =
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005509 av1_is_directional_mode(get_uv_mode(mode), mbmi->sb_type);
hui su83c26632017-01-24 17:19:06 -08005510#endif // CONFIG_EXT_INTRA
Urvang Joshifeb925f2016-12-05 10:37:29 -08005511 if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] &
5512 (1 << mode)))
5513 continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005514
5515 mbmi->uv_mode = mode;
David Michael Barr2510f642017-07-11 23:39:20 +09005516#if CONFIG_CFL
5517 int cfl_alpha_rate = 0;
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005518 if (mode == UV_CFL_PRED) {
Luc Trudeau4d6ea542017-11-22 21:24:42 -05005519 if (!is_cfl_allowed(mbmi)) continue;
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005520 assert(!is_directional_mode);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005521 const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
David Michael Barr91345862017-10-14 22:20:36 +09005522 cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, bsize, uv_tx_size, best_rd);
5523 if (cfl_alpha_rate == INT_MAX) continue;
David Michael Barr2510f642017-07-11 23:39:20 +09005524 }
5525#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07005526#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07005527 mbmi->angle_delta[1] = 0;
Joe Young830d4ce2017-05-30 17:48:13 -07005528 if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005529 const int rate_overhead = x->intra_uv_mode_cost[mbmi->mode][mode] +
Joe Young3ca43bf2017-10-06 15:12:46 -07005530#if CONFIG_EXT_INTRA_MOD
5531 0;
5532#else
hui su0a6731f2017-04-26 15:23:47 -07005533 write_uniform_cost(2 * MAX_ANGLE_DELTA + 1, 0);
Joe Young3ca43bf2017-10-06 15:12:46 -07005534#endif // CONFIG_EXT_INTRA_MOD
hui su45dc5972016-12-08 17:42:50 -08005535 if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd,
5536 &this_rate, &tokenonly_rd_stats))
Yaowu Xuc27fc142016-08-22 16:08:15 -07005537 continue;
5538 } else {
hui su83c26632017-01-24 17:19:06 -08005539#endif // CONFIG_EXT_INTRA
Angie Chiang284d7772016-11-08 11:06:45 -08005540 if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005541 continue;
Yushin Cho77bba8d2016-11-04 16:36:56 -07005542 }
hui su83c26632017-01-24 17:19:06 -08005543#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07005544 }
hui su83c26632017-01-24 17:19:06 -08005545#endif // CONFIG_EXT_INTRA
Angie Chiang284d7772016-11-08 11:06:45 -08005546 this_rate =
Yue Chenb23d00a2017-07-28 17:01:21 -07005547 tokenonly_rd_stats.rate + x->intra_uv_mode_cost[mbmi->mode][mode];
hui su83c26632017-01-24 17:19:06 -08005548
Luc Trudeaudff41922017-07-07 09:47:58 -04005549#if CONFIG_CFL
Luc Trudeau6e1cd782017-06-21 13:52:36 -04005550 if (mode == UV_CFL_PRED) {
Luc Trudeau4d6ea542017-11-22 21:24:42 -05005551 assert(is_cfl_allowed(mbmi));
David Michael Barr2510f642017-07-11 23:39:20 +09005552 this_rate += cfl_alpha_rate;
David Michael Barr1f8d0952017-10-11 17:46:39 +09005553#if CONFIG_DEBUG
Luc Trudeau1e84af52017-11-25 15:00:28 -05005554 assert(xd->cfl.rate == this_rate);
David Michael Barr1f8d0952017-10-11 17:46:39 +09005555#endif // CONFIG_DEBUG
Luc Trudeaudff41922017-07-07 09:47:58 -04005556 }
5557#endif
hui su83c26632017-01-24 17:19:06 -08005558#if CONFIG_EXT_INTRA
Joe Young830d4ce2017-05-30 17:48:13 -07005559 if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07005560#if CONFIG_EXT_INTRA_MOD
5561 this_rate += x->angle_delta_cost[mode - V_PRED]
5562 [mbmi->angle_delta[1] + MAX_ANGLE_DELTA];
5563#else
hui su0a6731f2017-04-26 15:23:47 -07005564 this_rate += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
5565 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
Joe Young3ca43bf2017-10-06 15:12:46 -07005566#endif // CONFIG_EXT_INTRA_MOD
hui su45dc5972016-12-08 17:42:50 -08005567 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005568#endif // CONFIG_EXT_INTRA
Yue Chen57b8ff62017-10-10 23:37:31 -07005569
Rupert Swarbrick6f9cd942017-08-02 15:57:18 +01005570 if (try_palette && mode == UV_DC_PRED)
Yue Chendab2ca92017-10-16 17:48:48 -07005571 this_rate += x->palette_uv_mode_cost[pmi->palette_size[0] > 0][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005572
Urvang Joshi70006e42017-06-14 16:08:55 -07005573 this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005574
5575 if (this_rd < best_rd) {
hui su83c26632017-01-24 17:19:06 -08005576 best_mbmi = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005577 best_rd = this_rd;
5578 *rate = this_rate;
Angie Chiang284d7772016-11-08 11:06:45 -08005579 *rate_tokenonly = tokenonly_rd_stats.rate;
5580 *distortion = tokenonly_rd_stats.dist;
5581 *skippable = tokenonly_rd_stats.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005582 }
5583 }
5584
Rupert Swarbrick6f9cd942017-08-02 15:57:18 +01005585 if (try_palette) {
hui su8a516a82017-07-06 10:00:36 -07005586 uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map;
hui su83c26632017-01-24 17:19:06 -08005587 rd_pick_palette_intra_sbuv(cpi, x,
Yue Chenb23d00a2017-07-28 17:01:21 -07005588 x->intra_uv_mode_cost[mbmi->mode][UV_DC_PRED],
hui su83c26632017-01-24 17:19:06 -08005589 best_palette_color_map, &best_mbmi, &best_rd,
5590 rate, rate_tokenonly, distortion, skippable);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005591 }
5592
hui su83c26632017-01-24 17:19:06 -08005593 *mbmi = best_mbmi;
Urvang Joshifeb925f2016-12-05 10:37:29 -08005594 // Make sure we actually chose a mode
5595 assert(best_rd < INT64_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005596 return best_rd;
5597}
5598
Urvang Joshi52648442016-10-13 17:27:51 -07005599static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x,
Luc Trudeau9d4cbb82017-07-27 17:01:32 -04005600 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
5601 int *rate_uv, int *rate_uv_tokenonly,
5602 int64_t *dist_uv, int *skip_uv,
5603 UV_PREDICTION_MODE *mode_uv) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005604 MACROBLOCKD *xd = &x->e_mbd;
5605 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005606 // Use an estimated rd for uv_intra based on DC_PRED if the
5607 // appropriate speed flag is set.
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005608 init_sbuv_mode(mbmi);
Jingning Han9ce464c2017-02-20 15:36:30 -08005609 if (x->skip_chroma_rd) {
5610 *rate_uv = 0;
5611 *rate_uv_tokenonly = 0;
5612 *dist_uv = 0;
5613 *skip_uv = 1;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04005614 *mode_uv = UV_DC_PRED;
Jingning Han9ce464c2017-02-20 15:36:30 -08005615 return;
5616 }
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005617 bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
5618 xd->plane[AOM_PLANE_U].subsampling_y);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005619#if CONFIG_CFL
5620 // Only store reconstructed luma when there's chroma RDO. When there's no
5621 // chroma RDO, the reconstructed luma will be stored in encode_superblock().
Luc Trudeau1e84af52017-11-25 15:00:28 -05005622 xd->cfl.store_y = !x->skip_chroma_rd;
5623 if (xd->cfl.store_y) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005624 // Perform one extra call to txfm_rd_in_plane(), with the values chosen
5625 // during luma RDO, so we can store reconstructed luma values
5626 RD_STATS this_rd_stats;
5627 txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
5628 mbmi->sb_type, mbmi->tx_size,
5629 cpi->sf.use_fast_coef_costing);
Luc Trudeau1e84af52017-11-25 15:00:28 -05005630 xd->cfl.store_y = 0;
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005631 }
5632#endif // CONFIG_CFL
Luc Trudeau9d4cbb82017-07-27 17:01:32 -04005633 rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
5634 bsize, max_tx_size);
Luc Trudeaub05eeae2017-08-18 15:14:30 -04005635 *mode_uv = mbmi->uv_mode;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005636}
5637
Yue Chenb23d00a2017-07-28 17:01:21 -07005638static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode,
Yaowu Xuc27fc142016-08-22 16:08:15 -07005639 int16_t mode_context) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005640 if (is_inter_compound_mode(mode)) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005641 return x
clang-format55ce9e02017-02-15 22:27:12 -08005642 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005643 }
David Barkercb03dc32017-04-07 13:05:09 +01005644
David Barkercb03dc32017-04-07 13:05:09 +01005645 int mode_cost = 0;
5646 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
5647 int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET);
5648
5649 assert(is_inter_mode(mode));
5650
5651 if (mode == NEWMV) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005652 mode_cost = x->newmv_mode_cost[mode_ctx][0];
David Barkercb03dc32017-04-07 13:05:09 +01005653 return mode_cost;
5654 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07005655 mode_cost = x->newmv_mode_cost[mode_ctx][1];
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005656 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
David Barkercb03dc32017-04-07 13:05:09 +01005657
5658 if (is_all_zero_mv) return mode_cost;
5659
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005660 if (mode == GLOBALMV) {
Yue Chenb23d00a2017-07-28 17:01:21 -07005661 mode_cost += x->zeromv_mode_cost[mode_ctx][0];
David Barkercb03dc32017-04-07 13:05:09 +01005662 return mode_cost;
5663 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07005664 mode_cost += x->zeromv_mode_cost[mode_ctx][1];
David Barkercb03dc32017-04-07 13:05:09 +01005665 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
5666
5667 if (mode_context & (1 << SKIP_NEARESTMV_OFFSET)) mode_ctx = 6;
5668 if (mode_context & (1 << SKIP_NEARMV_OFFSET)) mode_ctx = 7;
5669 if (mode_context & (1 << SKIP_NEARESTMV_SUB8X8_OFFSET)) mode_ctx = 8;
5670
Yue Chenb23d00a2017-07-28 17:01:21 -07005671 mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
David Barkercb03dc32017-04-07 13:05:09 +01005672 return mode_cost;
5673 }
5674 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005675}
5676
Sarah Parker6fdc8532016-11-16 17:47:13 -08005677static int get_interinter_compound_type_bits(BLOCK_SIZE bsize,
5678 COMPOUND_TYPE comp_type) {
Debargha Mukherjeec5f735f2017-04-26 03:25:28 +00005679 (void)bsize;
Sarah Parker6fdc8532016-11-16 17:47:13 -08005680 switch (comp_type) {
5681 case COMPOUND_AVERAGE: return 0;
5682 case COMPOUND_WEDGE: return get_interinter_wedge_bits(bsize);
Sarah Parker569edda2016-12-14 14:57:38 -08005683 case COMPOUND_SEG: return 1;
Sarah Parker6fdc8532016-11-16 17:47:13 -08005684 default: assert(0); return 0;
5685 }
5686}
Sarah Parker6fdc8532016-11-16 17:47:13 -08005687
Yaowu Xuc27fc142016-08-22 16:08:15 -07005688typedef struct {
5689 int eobs;
5690 int brate;
5691 int byrate;
5692 int64_t bdist;
5693 int64_t bsse;
5694 int64_t brdcost;
5695 int_mv mvs[2];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005696 int_mv pred_mv[2];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005697 int_mv ref_mv[2];
Jingning Han276c2942016-12-05 12:37:02 -08005698
Yaowu Xuc27fc142016-08-22 16:08:15 -07005699 ENTROPY_CONTEXT ta[2];
5700 ENTROPY_CONTEXT tl[2];
5701} SEG_RDSTAT;
5702
5703typedef struct {
5704 int_mv *ref_mv[2];
5705 int_mv mvp;
5706
5707 int64_t segment_rd;
5708 int r;
5709 int64_t d;
5710 int64_t sse;
5711 int segment_yrate;
5712 PREDICTION_MODE modes[4];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005713 SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005714 int mvthresh;
5715} BEST_SEG_INFO;
5716
Alex Converse0fa0f422017-04-24 12:51:14 -07005717static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
5718 return (mv->row >> 3) < mv_limits->row_min ||
5719 (mv->row >> 3) > mv_limits->row_max ||
5720 (mv->col >> 3) < mv_limits->col_min ||
5721 (mv->col >> 3) > mv_limits->col_max;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005722}
5723
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005724// Check if NEARESTMV/NEARMV/GLOBALMV is the cheapest way encode zero motion.
Yaowu Xuc27fc142016-08-22 16:08:15 -07005725// TODO(aconverse): Find out if this is still productive then clean up or remove
5726static int check_best_zero_mv(
Yue Chenb23d00a2017-07-28 17:01:21 -07005727 const AV1_COMP *const cpi, const MACROBLOCK *const x,
5728 const int16_t mode_context[TOTAL_REFS_PER_FRAME],
Yaowu Xuc27fc142016-08-22 16:08:15 -07005729 const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME],
Yaowu Xuc27fc142016-08-22 16:08:15 -07005730 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode,
David Barker45390c12017-02-20 14:44:40 +00005731 const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block,
5732 int mi_row, int mi_col) {
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005733 int_mv zeromv[2] = { {.as_int = 0 } };
Sarah Parkerc2d38712017-01-24 15:15:41 -08005734 int comp_pred_mode = ref_frames[1] > INTRA_FRAME;
David Barker45390c12017-02-20 14:44:40 +00005735 (void)mi_row;
5736 (void)mi_col;
Zoe Liubc030ee2017-07-31 15:20:46 -07005737 (void)cpi;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005738 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005739 for (int cur_frm = 0; cur_frm < 1 + comp_pred_mode; cur_frm++) {
Sarah Parkerc2d38712017-01-24 15:15:41 -08005740 zeromv[cur_frm].as_int =
5741 gm_get_motion_vector(&cpi->common.global_motion[ref_frames[cur_frm]],
Sarah Parkerae7c4582017-02-28 16:30:30 -08005742 cpi->common.allow_high_precision_mv, bsize,
RogerZhou3b635242017-09-19 10:06:46 -07005743 mi_col, mi_row, block
5744#if CONFIG_AMVR
5745 ,
RogerZhou10a03802017-10-26 11:49:48 -07005746 cpi->common.cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07005747#endif
5748 )
Sarah Parkerc2d38712017-01-24 15:15:41 -08005749 .as_int;
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005750 }
Sarah Parkerc2d38712017-01-24 15:15:41 -08005751 }
Luc Trudeaud28e91d2017-06-05 14:42:26 -04005752
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005753 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
5754 this_mode == GLOBALMV) &&
Sarah Parkerc2d38712017-01-24 15:15:41 -08005755 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
Yaowu Xuc27fc142016-08-22 16:08:15 -07005756 (ref_frames[1] <= INTRA_FRAME ||
Sarah Parkerc2d38712017-01-24 15:15:41 -08005757 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005758 int16_t rfc =
Yaowu Xuf883b422016-08-30 14:01:10 -07005759 av1_mode_context_analyzer(mode_context, ref_frames, bsize, block);
Yue Chenb23d00a2017-07-28 17:01:21 -07005760 int c1 = cost_mv_ref(x, NEARMV, rfc);
5761 int c2 = cost_mv_ref(x, NEARESTMV, rfc);
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005762 int c3 = cost_mv_ref(x, GLOBALMV, rfc);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005763
Yaowu Xuc27fc142016-08-22 16:08:15 -07005764 if (this_mode == NEARMV) {
5765 if (c1 > c3) return 0;
5766 } else if (this_mode == NEARESTMV) {
5767 if (c2 > c3) return 0;
5768 } else {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005769 assert(this_mode == GLOBALMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005770 if (ref_frames[1] <= INTRA_FRAME) {
5771 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
5772 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
5773 return 0;
5774 } else {
5775 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
5776 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
5777 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
5778 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
5779 return 0;
5780 }
5781 }
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005782 } else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005783 this_mode == GLOBAL_GLOBALMV) &&
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005784 frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int &&
5785 frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005786 int16_t rfc = compound_mode_context[ref_frames[0]];
Yue Chenb23d00a2017-07-28 17:01:21 -07005787 int c2 = cost_mv_ref(x, NEAREST_NEARESTMV, rfc);
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005788 int c3 = cost_mv_ref(x, GLOBAL_GLOBALMV, rfc);
Yue Chenb23d00a2017-07-28 17:01:21 -07005789 int c5 = cost_mv_ref(x, NEAR_NEARMV, rfc);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005790
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -07005791 if (this_mode == NEAREST_NEARESTMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005792 if (c2 > c3) return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005793 } else if (this_mode == NEAR_NEARMV) {
5794 if (c5 > c3) return 0;
5795 } else {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07005796 assert(this_mode == GLOBAL_GLOBALMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005797 if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 &&
5798 frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) ||
Yaowu Xuc27fc142016-08-22 16:08:15 -07005799 (c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 &&
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -07005800 frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0))
Yaowu Xuc27fc142016-08-22 16:08:15 -07005801 return 0;
5802 }
5803 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005804 return 1;
5805}
5806
Urvang Joshi52648442016-10-13 17:27:51 -07005807static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005808 BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row,
5809 int mi_col, int_mv *ref_mv_sub8x8[2],
5810 const uint8_t *mask, int mask_stride,
5811 int *rate_mv, const int block) {
Yaowu Xuf883b422016-08-30 14:01:10 -07005812 const AV1_COMMON *const cm = &cpi->common;
Jingning Hanae5cfde2016-11-30 12:01:44 -08005813 const int pw = block_size_wide[bsize];
5814 const int ph = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005815 MACROBLOCKD *xd = &x->e_mbd;
5816 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005817 // This function should only ever be called for compound modes
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005818 assert(has_second_ref(mbmi));
Zoe Liu122f3942017-04-25 11:18:38 -07005819 const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
Yaowu Xuc27fc142016-08-22 16:08:15 -07005820 int_mv ref_mv[2];
5821 int ite, ref;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005822 struct scale_factors sf;
James Zern89a015b2017-08-08 12:39:00 -04005823 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005824 const int ic = block & 1;
5825 const int ir = (block - ic) >> 1;
Jingning Hancb637672017-06-22 09:14:40 -07005826 struct macroblockd_plane *const pd = &xd->plane[0];
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005827 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
5828 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
5829 int is_global[2];
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005830 for (ref = 0; ref < 2; ++ref) {
Luc Trudeauf3bf8b12017-12-08 14:38:41 -05005831 const WarpedMotionParams *const wm =
Sarah Parkerb3ebed12017-03-09 10:52:03 -08005832 &xd->global_motion[xd->mi[0]->mbmi.ref_frame[ref]];
5833 is_global[ref] = is_global_mv_block(xd->mi[0], block, wm->wmtype);
5834 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005835
5836 // Do joint motion search in compound mode to get more accurate mv.
5837 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
5838 int last_besterr[2] = { INT_MAX, INT_MAX };
5839 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
Zoe Liu122f3942017-04-25 11:18:38 -07005840 av1_get_scaled_ref_frame(cpi, refs[0]),
5841 av1_get_scaled_ref_frame(cpi, refs[1])
Yaowu Xuc27fc142016-08-22 16:08:15 -07005842 };
5843
5844// Prediction buffer from second frame.
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005845#if CONFIG_HIGHBITDEPTH
Cheng Chenefc55fd2017-10-10 12:08:28 -07005846 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
5847 uint8_t *second_pred;
Cheng Chenefc55fd2017-10-10 12:08:28 -07005848#else // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005849 DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005850#endif // CONFIG_HIGHBITDEPTH
Jingning Han61418bb2017-01-23 17:12:48 -08005851 (void)ref_mv_sub8x8;
Jingning Han61418bb2017-01-23 17:12:48 -08005852
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005853 for (ref = 0; ref < 2; ++ref) {
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07005854 ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005855
5856 if (scaled_ref_frame[ref]) {
5857 int i;
5858 // Swap out the reference frame for a version that's been scaled to
5859 // match the resolution of the current frame, allowing the existing
5860 // motion search code to be used without additional modifications.
5861 for (i = 0; i < MAX_MB_PLANE; i++)
5862 backup_yv12[ref][i] = xd->plane[i].pre[ref];
Yaowu Xuf883b422016-08-30 14:01:10 -07005863 av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
5864 NULL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005865 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005866 }
5867
5868// Since we have scaled the reference frames to match the size of the current
5869// frame we must use a unit scaling factor during mode selection.
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005870#if CONFIG_HIGHBITDEPTH
Yaowu Xuf883b422016-08-30 14:01:10 -07005871 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5872 cm->height, cm->use_highbitdepth);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005873#else
Yaowu Xuf883b422016-08-30 14:01:10 -07005874 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
5875 cm->height);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005876#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005877
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005878 // Allow joint search multiple times iteratively for each reference frame
5879 // and break out of the search loop if it couldn't find a better mv.
5880 for (ite = 0; ite < 4; ite++) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005881 struct buf_2d ref_yv12[2];
5882 int bestsme = INT_MAX;
5883 int sadpb = x->sadperbit16;
5884 MV *const best_mv = &x->best_mv.as_mv;
5885 int search_range = 3;
5886
Alex Converse0fa0f422017-04-24 12:51:14 -07005887 MvLimits tmp_mv_limits = x->mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005888 int id = ite % 2; // Even iterations search in the first reference frame,
5889 // odd iterations search in the second. The predictor
5890 // found for the 'other' reference frame is factored in.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005891 const int plane = 0;
David Barkere64d51a2017-06-09 14:52:42 +01005892 ConvolveParams conv_params = get_conv_params(!id, 0, plane);
Cheng Chenefc55fd2017-10-10 12:08:28 -07005893#if CONFIG_JNT_COMP
Cheng Chen8b1732a2017-11-22 18:38:49 -08005894 conv_params.use_jnt_comp_avg = 0;
Cheng Chenefc55fd2017-10-10 12:08:28 -07005895#endif
Sarah Parker4c10a3c2017-04-10 19:37:59 -07005896 WarpTypesAllowed warp_types;
Sarah Parker4c10a3c2017-04-10 19:37:59 -07005897 warp_types.global_warp_allowed = is_global[!id];
Sarah Parker4c10a3c2017-04-10 19:37:59 -07005898 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005899
5900 // Initialized here because of compiler problem in Visual Studio.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005901 ref_yv12[0] = xd->plane[plane].pre[0];
5902 ref_yv12[1] = xd->plane[plane].pre[1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005903
Yaowu Xuc27fc142016-08-22 16:08:15 -07005904// Get the prediction block from the 'other' reference frame.
Cheng Chenefc55fd2017-10-10 12:08:28 -07005905#if CONFIG_JNT_COMP
5906 InterpFilters interp_filters = EIGHTTAP_REGULAR;
5907#endif // CONFIG_JNT_COMP
Zoe Liu85b66462017-04-20 14:28:19 -07005908
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005909#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005910 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
5911 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
Yaowu Xuf883b422016-08-30 14:01:10 -07005912 av1_highbd_build_inter_predictor(
Yaowu Xuc27fc142016-08-22 16:08:15 -07005913 ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
Zoe Liu85b66462017-04-20 14:28:19 -07005914 &frame_mv[refs[!id]].as_mv,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005915#if CONFIG_JNT_COMP
5916 &sf, pw, ph, 0, interp_filters,
5917#else
Rupert Swarbrick27e90292017-09-28 17:46:50 +01005918 &sf, pw, ph, 0, mbmi->interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005919#endif // CONFIG_JNT_COMP
Sebastien Alaiwan48795802017-10-30 12:07:13 +01005920 &warp_types, p_col, p_row, plane, MV_PRECISION_Q3, mi_col * MI_SIZE,
5921 mi_row * MI_SIZE, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005922 } else {
5923 second_pred = (uint8_t *)second_pred_alloc_16;
Zoe Liu76fcff72017-04-24 17:50:53 -07005924#endif // CONFIG_HIGHBITDEPTH
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005925 av1_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
5926 second_pred, pw, &frame_mv[refs[!id]].as_mv,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005927#if CONFIG_JNT_COMP
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005928 &sf, pw, ph, &conv_params, interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005929#else
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005930 &sf, pw, ph, &conv_params, mbmi->interp_filters,
Cheng Chenefc55fd2017-10-10 12:08:28 -07005931#endif // CONFIG_JNT_COMP
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005932 &warp_types, p_col, p_row, plane, !id,
5933 MV_PRECISION_Q3, mi_col * MI_SIZE,
5934 mi_row * MI_SIZE, xd);
Zoe Liu76fcff72017-04-24 17:50:53 -07005935#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005936 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02005937#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07005938
Cheng Chenefc55fd2017-10-10 12:08:28 -07005939#if CONFIG_JNT_COMP
5940 const int order_idx = id != 0;
Cheng Chenf78632e2017-10-20 15:30:51 -07005941 av1_jnt_comp_weight_assign(cm, mbmi, order_idx, &xd->jcp_param.fwd_offset,
Cheng Chen8263f802017-11-14 15:50:00 -08005942 &xd->jcp_param.bck_offset,
5943 &xd->jcp_param.use_jnt_comp_avg, 1);
Cheng Chenefc55fd2017-10-10 12:08:28 -07005944#endif // CONFIG_JNT_COMP
5945
Yaowu Xuc27fc142016-08-22 16:08:15 -07005946 // Do compound motion search on the current reference frame.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005947 if (id) xd->plane[plane].pre[0] = ref_yv12[id];
Alex Converse0fa0f422017-04-24 12:51:14 -07005948 av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005949
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005950 // Use the mv result from the single mode as mv predictor.
5951 // Use the mv result from the single mode as mv predictor.
5952 *best_mv = frame_mv[refs[id]].as_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005953
5954 best_mv->col >>= 3;
5955 best_mv->row >>= 3;
5956
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005957 av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005958
5959 // Small-range full-pixel motion search.
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005960 bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
5961 &cpi->fn_ptr[bsize], mask, mask_stride,
5962 id, &ref_mv[id].as_mv, second_pred);
David Barkerc155e012017-05-11 13:54:54 +01005963 if (bestsme < INT_MAX) {
David Barkerc155e012017-05-11 13:54:54 +01005964 if (mask)
5965 bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv,
5966 second_pred, mask, mask_stride, id,
5967 &cpi->fn_ptr[bsize], 1);
5968 else
David Barkerc155e012017-05-11 13:54:54 +01005969 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv,
5970 second_pred, &cpi->fn_ptr[bsize], 1);
5971 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07005972
Alex Converse0fa0f422017-04-24 12:51:14 -07005973 x->mv_limits = tmp_mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07005974
RogerZhou3b635242017-09-19 10:06:46 -07005975#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -07005976 if (cpi->common.cur_frame_force_integer_mv) {
RogerZhou3b635242017-09-19 10:06:46 -07005977 x->best_mv.as_mv.row *= 8;
5978 x->best_mv.as_mv.col *= 8;
5979 }
RogerZhou10a03802017-10-26 11:49:48 -07005980 if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0)
RogerZhou3b635242017-09-19 10:06:46 -07005981#else
Cheng Chen1483a712017-10-08 13:07:02 -07005982 if (bestsme < INT_MAX)
RogerZhou3b635242017-09-19 10:06:46 -07005983#endif
Cheng Chen1483a712017-10-08 13:07:02 -07005984 {
Yaowu Xuc27fc142016-08-22 16:08:15 -07005985 int dis; /* TODO: use dis in distortion calculation later. */
5986 unsigned int sse;
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07005987 bestsme = cpi->find_fractional_mv_step(
5988 x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
5989 x->errorperbit, &cpi->fn_ptr[bsize], 0,
5990 cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02005991 &dis, &sse, second_pred, mask, mask_stride, id, pw, ph,
5992 cpi->sf.use_upsampled_references);
Yaowu Xuc27fc142016-08-22 16:08:15 -07005993 }
5994
5995 // Restore the pointer to the first (possibly scaled) prediction buffer.
Angie Chiange3a4c1c2017-02-10 16:26:49 -08005996 if (id) xd->plane[plane].pre[0] = ref_yv12[0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07005997
5998 if (bestsme < last_besterr[id]) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01005999 frame_mv[refs[id]].as_mv = *best_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006000 last_besterr[id] = bestsme;
6001 } else {
6002 break;
6003 }
6004 }
6005
6006 *rate_mv = 0;
6007
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006008 for (ref = 0; ref < 2; ++ref) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006009 if (scaled_ref_frame[ref]) {
6010 // Restore the prediction frame pointers to their unscaled versions.
6011 int i;
6012 for (i = 0; i < MAX_MB_PLANE; i++)
6013 xd->plane[i].pre[ref] = backup_yv12[ref][i];
6014 }
Zoe Liu85b66462017-04-20 14:28:19 -07006015
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006016 av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
Zoe Liu85b66462017-04-20 14:28:19 -07006017
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006018 *rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
6019 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
6020 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006021 }
6022}
6023
Zoe Liuc082bbc2017-05-17 13:31:37 -07006024static void estimate_ref_frame_costs(
Yue Chen170678a2017-10-17 13:43:10 -07006025 const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x,
6026 int segment_id, unsigned int *ref_costs_single,
Zoe Liuc082bbc2017-05-17 13:31:37 -07006027#if CONFIG_EXT_COMP_REFS
6028 unsigned int (*ref_costs_comp)[TOTAL_REFS_PER_FRAME],
6029#else
6030 unsigned int *ref_costs_comp,
6031#endif // CONFIG_EXT_COMP_REFS
6032 aom_prob *comp_mode_p) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006033 int seg_ref_active =
6034 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
6035 if (seg_ref_active) {
6036 memset(ref_costs_single, 0,
6037 TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_single));
Zoe Liuc082bbc2017-05-17 13:31:37 -07006038#if CONFIG_EXT_COMP_REFS
6039 int ref_frame;
6040 for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
6041 memset(ref_costs_comp[ref_frame], 0,
6042 TOTAL_REFS_PER_FRAME * sizeof((*ref_costs_comp)[0]));
6043#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07006044 memset(ref_costs_comp, 0, TOTAL_REFS_PER_FRAME * sizeof(*ref_costs_comp));
Zoe Liuc082bbc2017-05-17 13:31:37 -07006045#endif // CONFIG_EXT_COMP_REFS
6046
Yaowu Xuc27fc142016-08-22 16:08:15 -07006047 *comp_mode_p = 128;
6048 } else {
Yue Chen170678a2017-10-17 13:43:10 -07006049 int intra_inter_ctx = av1_get_intra_inter_context(xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006050
6051 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
Sebastien Alaiwanfeca25a2017-11-29 11:22:23 +01006052 *comp_mode_p = av1_get_reference_mode_prob(cm, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006053 } else {
6054 *comp_mode_p = 128;
6055 }
6056
Yue Chen170678a2017-10-17 13:43:10 -07006057 ref_costs_single[INTRA_FRAME] = x->intra_inter_cost[intra_inter_ctx][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006058
Zoe Liud4d8b862017-12-06 10:56:01 -08006059 unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1];
6060
6061#if !CONFIG_REF_ADAPT
Yaowu Xuc27fc142016-08-22 16:08:15 -07006062 if (cm->reference_mode != COMPOUND_REFERENCE) {
Zoe Liud4d8b862017-12-06 10:56:01 -08006063#endif // !CONFIG_REF_ADAPT
Yaowu Xuf883b422016-08-30 14:01:10 -07006064 aom_prob ref_single_p1 = av1_get_pred_prob_single_ref_p1(cm, xd);
6065 aom_prob ref_single_p2 = av1_get_pred_prob_single_ref_p2(cm, xd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006066 aom_prob ref_single_p3 = av1_get_pred_prob_single_ref_p3(cm, xd);
6067 aom_prob ref_single_p4 = av1_get_pred_prob_single_ref_p4(cm, xd);
6068 aom_prob ref_single_p5 = av1_get_pred_prob_single_ref_p5(cm, xd);
Zoe Liue9b15e22017-07-19 15:53:01 -07006069 aom_prob ref_single_p6 = av1_get_pred_prob_single_ref_p6(cm, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006070
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006071 ref_costs_single[LAST_FRAME] = ref_costs_single[LAST2_FRAME] =
6072 ref_costs_single[LAST3_FRAME] = ref_costs_single[BWDREF_FRAME] =
6073 ref_costs_single[ALTREF2_FRAME] = ref_costs_single[GOLDEN_FRAME] =
6074 ref_costs_single[ALTREF_FRAME] = base_cost;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006075
Zoe Liufcf5fa22017-06-26 16:00:38 -07006076 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p1, 0);
6077 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p1, 0);
6078 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p1, 0);
6079 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p1, 0);
6080 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
Zoe Liue9b15e22017-07-19 15:53:01 -07006081 ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p1, 1);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006082 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p1, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006083
Zoe Liufcf5fa22017-06-26 16:00:38 -07006084 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p3, 0);
6085 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p3, 0);
6086 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p3, 1);
6087 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p3, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006088
Zoe Liufcf5fa22017-06-26 16:00:38 -07006089 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p2, 0);
Zoe Liue9b15e22017-07-19 15:53:01 -07006090 ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p2, 0);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006091 ref_costs_single[ALTREF_FRAME] += av1_cost_bit(ref_single_p2, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006092
Zoe Liufcf5fa22017-06-26 16:00:38 -07006093 ref_costs_single[LAST_FRAME] += av1_cost_bit(ref_single_p4, 0);
6094 ref_costs_single[LAST2_FRAME] += av1_cost_bit(ref_single_p4, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006095
Zoe Liufcf5fa22017-06-26 16:00:38 -07006096 ref_costs_single[LAST3_FRAME] += av1_cost_bit(ref_single_p5, 0);
6097 ref_costs_single[GOLDEN_FRAME] += av1_cost_bit(ref_single_p5, 1);
Zoe Liue9b15e22017-07-19 15:53:01 -07006098
Zoe Liue9b15e22017-07-19 15:53:01 -07006099 ref_costs_single[BWDREF_FRAME] += av1_cost_bit(ref_single_p6, 0);
6100 ref_costs_single[ALTREF2_FRAME] += av1_cost_bit(ref_single_p6, 1);
Zoe Liud4d8b862017-12-06 10:56:01 -08006101#if !CONFIG_REF_ADAPT
Yaowu Xuc27fc142016-08-22 16:08:15 -07006102 } else {
6103 ref_costs_single[LAST_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006104 ref_costs_single[LAST2_FRAME] = 512;
6105 ref_costs_single[LAST3_FRAME] = 512;
6106 ref_costs_single[BWDREF_FRAME] = 512;
Zoe Liue9b15e22017-07-19 15:53:01 -07006107 ref_costs_single[ALTREF2_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006108 ref_costs_single[GOLDEN_FRAME] = 512;
6109 ref_costs_single[ALTREF_FRAME] = 512;
6110 }
Zoe Liud4d8b862017-12-06 10:56:01 -08006111#endif // !CONFIG_REF_ADAPT
Yaowu Xuc27fc142016-08-22 16:08:15 -07006112
6113 if (cm->reference_mode != SINGLE_REFERENCE) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006114 aom_prob ref_comp_p = av1_get_pred_prob_comp_ref_p(cm, xd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006115 aom_prob ref_comp_p1 = av1_get_pred_prob_comp_ref_p1(cm, xd);
6116 aom_prob ref_comp_p2 = av1_get_pred_prob_comp_ref_p2(cm, xd);
6117 aom_prob bwdref_comp_p = av1_get_pred_prob_comp_bwdref_p(cm, xd);
Zoe Liue9b15e22017-07-19 15:53:01 -07006118 aom_prob bwdref_comp_p1 = av1_get_pred_prob_comp_bwdref_p1(cm, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006119
Zoe Liuc082bbc2017-05-17 13:31:37 -07006120#if CONFIG_EXT_COMP_REFS
6121 aom_prob comp_ref_type_p = av1_get_comp_reference_type_prob(cm, xd);
6122 unsigned int ref_bicomp_costs[TOTAL_REFS_PER_FRAME] = { 0 };
6123
6124 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
6125 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
Zoe Liuc082bbc2017-05-17 13:31:37 -07006126 base_cost + av1_cost_bit(comp_ref_type_p, 1);
Zoe Liu3ac20932017-08-30 16:35:55 -07006127 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
Zoe Liuac889702017-08-23 14:22:58 -07006128 ref_bicomp_costs[ALTREF_FRAME] = 0;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006129
6130 ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
6131 ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
6132 ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
6133 ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
6134
Zoe Liu87818282017-11-26 17:09:59 -08006135 ref_bicomp_costs[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 0);
6136 ref_bicomp_costs[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 1);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006137
6138 ref_bicomp_costs[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
6139 ref_bicomp_costs[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
6140
6141 ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
Zoe Liuac889702017-08-23 14:22:58 -07006142 ref_bicomp_costs[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006143 ref_bicomp_costs[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
6144
Zoe Liuac889702017-08-23 14:22:58 -07006145 ref_bicomp_costs[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p1, 0);
6146 ref_bicomp_costs[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p1, 1);
Zoe Liuac889702017-08-23 14:22:58 -07006147
6148 int ref0, ref1;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006149 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
Zoe Liuac889702017-08-23 14:22:58 -07006150 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
6151 ref_costs_comp[ref0][ref1] =
6152 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
6153 }
Zoe Liuc082bbc2017-05-17 13:31:37 -07006154 }
6155
6156 aom_prob uni_comp_ref_p = av1_get_pred_prob_uni_comp_ref_p(cm, xd);
6157 aom_prob uni_comp_ref_p1 = av1_get_pred_prob_uni_comp_ref_p1(cm, xd);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006158 aom_prob uni_comp_ref_p2 = av1_get_pred_prob_uni_comp_ref_p2(cm, xd);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006159
6160 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
6161 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
6162 av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 0);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006163 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
6164 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
6165 av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
6166 av1_cost_bit(uni_comp_ref_p2, 0);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006167 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
6168 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
Zoe Liufcf5fa22017-06-26 16:00:38 -07006169 av1_cost_bit(uni_comp_ref_p, 0) + av1_cost_bit(uni_comp_ref_p1, 1) +
6170 av1_cost_bit(uni_comp_ref_p2, 1);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006171
6172 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
6173 base_cost + av1_cost_bit(comp_ref_type_p, 0) +
6174 av1_cost_bit(uni_comp_ref_p, 1);
6175
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006176#else // !CONFIG_EXT_COMP_REFS
Zoe Liuc082bbc2017-05-17 13:31:37 -07006177
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006178 ref_costs_comp[LAST_FRAME] = ref_costs_comp[LAST2_FRAME] =
6179 ref_costs_comp[LAST3_FRAME] = ref_costs_comp[GOLDEN_FRAME] =
6180 base_cost;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006181
Zoe Liu3ac20932017-08-30 16:35:55 -07006182 ref_costs_comp[BWDREF_FRAME] = ref_costs_comp[ALTREF2_FRAME] =
6183 ref_costs_comp[ALTREF_FRAME] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006184
Zoe Liufcf5fa22017-06-26 16:00:38 -07006185 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p, 0);
6186 ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p, 0);
6187 ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p, 1);
6188 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006189
Zoe Liu87818282017-11-26 17:09:59 -08006190 ref_costs_comp[LAST_FRAME] += av1_cost_bit(ref_comp_p1, 0);
6191 ref_costs_comp[LAST2_FRAME] += av1_cost_bit(ref_comp_p1, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006192
Zoe Liufcf5fa22017-06-26 16:00:38 -07006193 ref_costs_comp[LAST3_FRAME] += av1_cost_bit(ref_comp_p2, 0);
6194 ref_costs_comp[GOLDEN_FRAME] += av1_cost_bit(ref_comp_p2, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006195
Zoe Liufcf5fa22017-06-26 16:00:38 -07006196 // NOTE(zoeliu): BWDREF and ALTREF each add an extra cost by coding 1
6197 // more bit.
6198 ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
Zoe Liue9b15e22017-07-19 15:53:01 -07006199 ref_costs_comp[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p, 0);
Zoe Liufcf5fa22017-06-26 16:00:38 -07006200 ref_costs_comp[ALTREF_FRAME] += av1_cost_bit(bwdref_comp_p, 1);
Zoe Liue9b15e22017-07-19 15:53:01 -07006201
Zoe Liue9b15e22017-07-19 15:53:01 -07006202 ref_costs_comp[BWDREF_FRAME] += av1_cost_bit(bwdref_comp_p1, 0);
6203 ref_costs_comp[ALTREF2_FRAME] += av1_cost_bit(bwdref_comp_p1, 1);
Zoe Liuc082bbc2017-05-17 13:31:37 -07006204#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07006205 } else {
Zoe Liuc082bbc2017-05-17 13:31:37 -07006206#if CONFIG_EXT_COMP_REFS
Zoe Liuac889702017-08-23 14:22:58 -07006207 int ref0, ref1;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006208 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
Zoe Liuac889702017-08-23 14:22:58 -07006209 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
6210 ref_costs_comp[ref0][ref1] = 512;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006211 }
6212 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
Zoe Liufcf5fa22017-06-26 16:00:38 -07006213 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006214 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
6215 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02006216#else // !CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07006217 ref_costs_comp[LAST_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006218 ref_costs_comp[LAST2_FRAME] = 512;
6219 ref_costs_comp[LAST3_FRAME] = 512;
6220 ref_costs_comp[BWDREF_FRAME] = 512;
Zoe Liue9b15e22017-07-19 15:53:01 -07006221 ref_costs_comp[ALTREF2_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006222 ref_costs_comp[ALTREF_FRAME] = 512;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006223 ref_costs_comp[GOLDEN_FRAME] = 512;
Zoe Liuc082bbc2017-05-17 13:31:37 -07006224#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07006225 }
6226 }
6227}
6228
6229static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
6230 int mode_index,
6231 int64_t comp_pred_diff[REFERENCE_MODES],
6232 int skippable) {
6233 MACROBLOCKD *const xd = &x->e_mbd;
6234
6235 // Take a snapshot of the coding context so it can be
6236 // restored if we decide to encode this way
6237 ctx->skip = x->skip;
6238 ctx->skippable = skippable;
6239 ctx->best_mode_index = mode_index;
6240 ctx->mic = *xd->mi[0];
6241 ctx->mbmi_ext = *x->mbmi_ext;
6242 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
6243 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
6244 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
6245}
6246
clang-format55ce9e02017-02-15 22:27:12 -08006247static void setup_buffer_inter(
6248 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
6249 BLOCK_SIZE block_size, int mi_row, int mi_col,
6250 int_mv frame_nearest_mv[TOTAL_REFS_PER_FRAME],
6251 int_mv frame_near_mv[TOTAL_REFS_PER_FRAME],
6252 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006253 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006254 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
6255 MACROBLOCKD *const xd = &x->e_mbd;
6256 MODE_INFO *const mi = xd->mi[0];
6257 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
6258 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
6259 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
6260
6261 assert(yv12 != NULL);
6262
6263 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
6264 // use the UV scaling factors.
Yaowu Xuf883b422016-08-30 14:01:10 -07006265 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006266
6267 // Gets an initial list of candidate vectors from neighbours and orders them
Sebastien Alaiwane140c502017-04-27 09:52:34 +02006268 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
6269 mbmi_ext->ref_mv_stack[ref_frame],
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006270 mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
6271 NULL, NULL, mbmi_ext->mode_context);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006272
RogerZhou3b635242017-09-19 10:06:46 -07006273// Candidate refinement carried out at encoder and decoder
6274#if CONFIG_AMVR
6275 av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
6276 &frame_nearest_mv[ref_frame], &frame_near_mv[ref_frame],
RogerZhou10a03802017-10-26 11:49:48 -07006277 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07006278#else
Yaowu Xuf883b422016-08-30 14:01:10 -07006279 av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates,
6280 &frame_nearest_mv[ref_frame],
6281 &frame_near_mv[ref_frame]);
RogerZhou3b635242017-09-19 10:06:46 -07006282#endif
Debargha Mukherjee6ea917e2017-10-19 09:31:29 -07006283 // Further refinement that is encode side only to test the top few candidates
6284 // in full and choose the best as the centre point for subsequent searches.
6285 // The current implementation doesn't support scaling.
Jingning Han271bb2c2016-12-14 12:34:46 -08006286 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
6287 block_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006288}
6289
Urvang Joshi52648442016-10-13 17:27:51 -07006290static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
6291 BLOCK_SIZE bsize, int mi_row, int mi_col,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006292 int ref_idx, int *rate_mv) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006293 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xuf883b422016-08-30 14:01:10 -07006294 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006295 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6296 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
6297 int bestsme = INT_MAX;
6298 int step_param;
6299 int sadpb = x->sadperbit16;
6300 MV mvp_full;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006301 int ref = mbmi->ref_frame[ref_idx];
Zoe Liu7f24e1b2017-03-17 17:42:05 -07006302 MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006303
Alex Converse0fa0f422017-04-24 12:51:14 -07006304 MvLimits tmp_mv_limits = x->mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006305 int cost_list[5];
6306
6307 const YV12_BUFFER_CONFIG *scaled_ref_frame =
Yaowu Xuf883b422016-08-30 14:01:10 -07006308 av1_get_scaled_ref_frame(cpi, ref);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006309
6310 MV pred_mv[3];
6311 pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
6312 pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
6313 pred_mv[2] = x->pred_mv[ref];
6314
Yaowu Xuc27fc142016-08-22 16:08:15 -07006315 if (scaled_ref_frame) {
6316 int i;
6317 // Swap out the reference frame for a version that's been scaled to
6318 // match the resolution of the current frame, allowing the existing
6319 // motion search code to be used without additional modifications.
6320 for (i = 0; i < MAX_MB_PLANE; i++)
6321 backup_yv12[i] = xd->plane[i].pre[ref_idx];
6322
Yaowu Xuf883b422016-08-30 14:01:10 -07006323 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006324 }
6325
Alex Converse0fa0f422017-04-24 12:51:14 -07006326 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
Yaowu Xu4306b6e2016-09-27 12:55:32 -07006327
Yaowu Xu4306b6e2016-09-27 12:55:32 -07006328 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
Yaowu Xu4306b6e2016-09-27 12:55:32 -07006329
Yaowu Xuc27fc142016-08-22 16:08:15 -07006330 // Work out the size of the first step in the mv step search.
Yaowu Xuf883b422016-08-30 14:01:10 -07006331 // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc.
Yaowu Xuc27fc142016-08-22 16:08:15 -07006332 if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
James Zern89a015b2017-08-08 12:39:00 -04006333 // Take the weighted average of the step_params based on the last frame's
Yaowu Xuc27fc142016-08-22 16:08:15 -07006334 // max mv magnitude and that based on the best ref mvs of the current
6335 // block for the given reference.
6336 step_param =
Yaowu Xuf883b422016-08-30 14:01:10 -07006337 (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
Yaowu Xuc27fc142016-08-22 16:08:15 -07006338 2;
6339 } else {
6340 step_param = cpi->mv_step_param;
6341 }
6342
6343 if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size) {
6344 int boffset =
6345 2 * (b_width_log2_lookup[cm->sb_size] -
Yaowu Xuf883b422016-08-30 14:01:10 -07006346 AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
6347 step_param = AOMMAX(step_param, boffset);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006348 }
6349
6350 if (cpi->sf.adaptive_motion_search) {
6351 int bwl = b_width_log2_lookup[bsize];
6352 int bhl = b_height_log2_lookup[bsize];
6353 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
6354
Debargha Mukherjee27be8742017-10-07 23:51:10 -07006355 if (tlevel < 5) {
6356 step_param += 2;
6357 step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 1);
6358 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006359
6360 // prev_mv_sad is not setup for dynamically scaled frames.
Debargha Mukherjee7166f222017-09-05 21:32:42 -07006361 if (cpi->oxcf.resize_mode != RESIZE_RANDOM) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006362 int i;
6363 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
6364 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
6365 x->pred_mv[ref].row = 0;
6366 x->pred_mv[ref].col = 0;
6367 x->best_mv.as_int = INVALID_MV;
6368
6369 if (scaled_ref_frame) {
Urvang Joshi454280d2016-10-14 16:51:44 -07006370 int j;
6371 for (j = 0; j < MAX_MB_PLANE; ++j)
6372 xd->plane[j].pre[ref_idx] = backup_yv12[j];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006373 }
6374 return;
6375 }
6376 }
6377 }
6378 }
6379
Alex Converse0fa0f422017-04-24 12:51:14 -07006380 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006381
Yue Chene9638cc2016-10-10 12:37:54 -07006382 if (mbmi->motion_mode != SIMPLE_TRANSLATION)
6383 mvp_full = mbmi->mv[0].as_mv;
6384 else
Yue Chene9638cc2016-10-10 12:37:54 -07006385 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006386
6387 mvp_full.col >>= 3;
6388 mvp_full.row >>= 3;
6389
6390 x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV;
6391
Yue Chene9638cc2016-10-10 12:37:54 -07006392 switch (mbmi->motion_mode) {
6393 case SIMPLE_TRANSLATION:
RogerZhoucc5d35d2017-08-07 22:20:15 -07006394#if CONFIG_HASH_ME
Yue Chene9638cc2016-10-10 12:37:54 -07006395 bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
6396 sadpb, cond_cost_list(cpi, cost_list),
RogerZhoucc5d35d2017-08-07 22:20:15 -07006397 &ref_mv, INT_MAX, 1, (MI_SIZE * mi_col),
RogerZhoud15e7c12017-09-26 08:49:28 -07006398 (MI_SIZE * mi_row), 0);
RogerZhoucc5d35d2017-08-07 22:20:15 -07006399#else
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01006400 bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
6401 sadpb, cond_cost_list(cpi, cost_list),
6402 &ref_mv, INT_MAX, 1);
RogerZhoucc5d35d2017-08-07 22:20:15 -07006403#endif
Yue Chene9638cc2016-10-10 12:37:54 -07006404 break;
6405 case OBMC_CAUSAL:
6406 bestsme = av1_obmc_full_pixel_diamond(
6407 cpi, x, &mvp_full, step_param, sadpb,
6408 MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv,
6409 &(x->best_mv.as_mv), 0);
6410 break;
James Zern88896732017-06-23 15:55:09 -07006411 default: assert(0 && "Invalid motion mode!\n");
Yue Chene9638cc2016-10-10 12:37:54 -07006412 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006413
Alex Converse0fa0f422017-04-24 12:51:14 -07006414 x->mv_limits = tmp_mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006415
RogerZhou3b635242017-09-19 10:06:46 -07006416#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -07006417 if (cpi->common.cur_frame_force_integer_mv) {
RogerZhou3b635242017-09-19 10:06:46 -07006418 x->best_mv.as_mv.row *= 8;
6419 x->best_mv.as_mv.col *= 8;
6420 }
RogerZhou10a03802017-10-26 11:49:48 -07006421 if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0) {
RogerZhou3b635242017-09-19 10:06:46 -07006422#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07006423 if (bestsme < INT_MAX) {
RogerZhou3b635242017-09-19 10:06:46 -07006424#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07006425 int dis; /* TODO: use dis in distortion calculation later. */
Yue Chene9638cc2016-10-10 12:37:54 -07006426 switch (mbmi->motion_mode) {
6427 case SIMPLE_TRANSLATION:
Yue Chene9638cc2016-10-10 12:37:54 -07006428 if (cpi->sf.use_upsampled_references) {
6429 int best_mv_var;
6430 const int try_second = x->second_best_mv.as_int != INVALID_MV &&
6431 x->second_best_mv.as_int != x->best_mv.as_int;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006432 const int pw = block_size_wide[bsize];
6433 const int ph = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006434
Yue Chene9638cc2016-10-10 12:37:54 -07006435 best_mv_var = cpi->find_fractional_mv_step(
Yaowu Xuc27fc142016-08-22 16:08:15 -07006436 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
6437 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6438 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006439 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
6440 0, 0, pw, ph, 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006441
Yue Chene9638cc2016-10-10 12:37:54 -07006442 if (try_second) {
Alex Converse0fa0f422017-04-24 12:51:14 -07006443 const int minc =
6444 AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX);
6445 const int maxc =
6446 AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX);
6447 const int minr =
6448 AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX);
6449 const int maxr =
6450 AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX);
Yue Chene9638cc2016-10-10 12:37:54 -07006451 int this_var;
6452 MV best_mv = x->best_mv.as_mv;
6453
6454 x->best_mv = x->second_best_mv;
6455 if (x->best_mv.as_mv.row * 8 <= maxr &&
6456 x->best_mv.as_mv.row * 8 >= minr &&
6457 x->best_mv.as_mv.col * 8 <= maxc &&
6458 x->best_mv.as_mv.col * 8 >= minc) {
6459 this_var = cpi->find_fractional_mv_step(
6460 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
6461 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6462 cpi->sf.mv.subpel_iters_per_step,
6463 cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006464 &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1);
Yue Chene9638cc2016-10-10 12:37:54 -07006465 if (this_var < best_mv_var) best_mv = x->best_mv.as_mv;
6466 x->best_mv.as_mv = best_mv;
6467 }
6468 }
Yue Chene9638cc2016-10-10 12:37:54 -07006469 } else {
6470 cpi->find_fractional_mv_step(
6471 x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
6472 &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6473 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02006474 x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL,
6475 0, 0, 0, 0, 0);
Yue Chene9638cc2016-10-10 12:37:54 -07006476 }
Yue Chene9638cc2016-10-10 12:37:54 -07006477 break;
6478 case OBMC_CAUSAL:
6479 av1_find_best_obmc_sub_pixel_tree_up(
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006480 x, &x->best_mv.as_mv, &ref_mv, cm->allow_high_precision_mv,
6481 x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
6482 cpi->sf.mv.subpel_iters_per_step, x->nmvjointcost, x->mvcost, &dis,
6483 &x->pred_sse[ref], 0, cpi->sf.use_upsampled_references);
Yue Chene9638cc2016-10-10 12:37:54 -07006484 break;
James Zern88896732017-06-23 15:55:09 -07006485 default: assert(0 && "Invalid motion mode!\n");
Yaowu Xuc27fc142016-08-22 16:08:15 -07006486 }
6487 }
Yaowu Xuf883b422016-08-30 14:01:10 -07006488 *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost,
6489 x->mvcost, MV_COST_WEIGHT);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006490
Yue Chene9638cc2016-10-10 12:37:54 -07006491 if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION)
Yue Chene9638cc2016-10-10 12:37:54 -07006492 x->pred_mv[ref] = x->best_mv.as_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006493
6494 if (scaled_ref_frame) {
6495 int i;
6496 for (i = 0; i < MAX_MB_PLANE; i++)
6497 xd->plane[i].pre[ref_idx] = backup_yv12[i];
6498 }
6499}
6500
David Barkerac37fa32016-12-02 12:30:21 +00006501static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006502 int i;
6503 for (i = 0; i < MAX_MB_PLANE; i++) {
David Barkerac37fa32016-12-02 12:30:21 +00006504 xd->plane[i].dst.buf = dst.plane[i];
6505 xd->plane[i].dst.stride = dst.stride[i];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006506 }
6507}
6508
David Barker8dd9b572017-05-12 16:31:38 +01006509static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x,
David Barkerf19f35f2017-05-22 16:33:22 +01006510 BLOCK_SIZE bsize, const MV *other_mv,
David Barker8dd9b572017-05-12 16:31:38 +01006511 int mi_row, int mi_col, const int block,
6512 int ref_idx, uint8_t *second_pred) {
6513 const AV1_COMMON *const cm = &cpi->common;
6514 const int pw = block_size_wide[bsize];
6515 const int ph = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006516 MACROBLOCKD *xd = &x->e_mbd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006517 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
David Barker8dd9b572017-05-12 16:31:38 +01006518 const int other_ref = mbmi->ref_frame[!ref_idx];
David Barker8dd9b572017-05-12 16:31:38 +01006519 struct scale_factors sf;
David Barker8dd9b572017-05-12 16:31:38 +01006520 struct macroblockd_plane *const pd = &xd->plane[0];
James Zern89a015b2017-08-08 12:39:00 -04006521 // ic and ir are the 4x4 coordinates of the sub8x8 at index "block"
David Barker8dd9b572017-05-12 16:31:38 +01006522 const int ic = block & 1;
6523 const int ir = (block - ic) >> 1;
6524 const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic;
6525 const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir;
Luc Trudeauf3bf8b12017-12-08 14:38:41 -05006526 const WarpedMotionParams *const wm = &xd->global_motion[other_ref];
David Barker8dd9b572017-05-12 16:31:38 +01006527 int is_global = is_global_mv_block(xd->mi[0], block, wm->wmtype);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006528
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006529 // This function should only ever be called for compound modes
David Barker8dd9b572017-05-12 16:31:38 +01006530 assert(has_second_ref(mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07006531
David Barker8dd9b572017-05-12 16:31:38 +01006532 struct buf_2d backup_yv12[MAX_MB_PLANE];
6533 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
6534 av1_get_scaled_ref_frame(cpi, other_ref);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006535
6536 if (scaled_ref_frame) {
David Barker8dd9b572017-05-12 16:31:38 +01006537 int i;
6538 // Swap out the reference frame for a version that's been scaled to
6539 // match the resolution of the current frame, allowing the existing
6540 // motion search code to be used without additional modifications.
6541 for (i = 0; i < MAX_MB_PLANE; i++)
6542 backup_yv12[i] = xd->plane[i].pre[!ref_idx];
6543 av1_setup_pre_planes(xd, !ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
6544 }
6545
6546// Since we have scaled the reference frames to match the size of the current
6547// frame we must use a unit scaling factor during mode selection.
6548#if CONFIG_HIGHBITDEPTH
6549 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
6550 cm->height, cm->use_highbitdepth);
6551#else
6552 av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
6553 cm->height);
6554#endif // CONFIG_HIGHBITDEPTH
6555
6556 struct buf_2d ref_yv12;
6557
6558 const int plane = 0;
David Barkere64d51a2017-06-09 14:52:42 +01006559 ConvolveParams conv_params = get_conv_params(!ref_idx, 0, plane);
David Barker8dd9b572017-05-12 16:31:38 +01006560 WarpTypesAllowed warp_types;
David Barker8dd9b572017-05-12 16:31:38 +01006561 warp_types.global_warp_allowed = is_global;
David Barker8dd9b572017-05-12 16:31:38 +01006562 warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL;
David Barker8dd9b572017-05-12 16:31:38 +01006563
6564 // Initialized here because of compiler problem in Visual Studio.
6565 ref_yv12 = xd->plane[plane].pre[!ref_idx];
6566
6567// Get the prediction block from the 'other' reference frame.
6568#if CONFIG_HIGHBITDEPTH
6569 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6570 av1_highbd_build_inter_predictor(
David Barkerf19f35f2017-05-22 16:33:22 +01006571 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
Sebastien Alaiwan48795802017-10-30 12:07:13 +01006572 0, mbmi->interp_filters, &warp_types, p_col, p_row, plane,
6573 MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
David Barker8dd9b572017-05-12 16:31:38 +01006574 } else {
6575#endif // CONFIG_HIGHBITDEPTH
6576 av1_build_inter_predictor(
David Barkerf19f35f2017-05-22 16:33:22 +01006577 ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph,
Sebastien Alaiwan48795802017-10-30 12:07:13 +01006578 &conv_params, mbmi->interp_filters, &warp_types, p_col, p_row, plane,
6579 !ref_idx, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd);
David Barker8dd9b572017-05-12 16:31:38 +01006580#if CONFIG_HIGHBITDEPTH
6581 }
6582#endif // CONFIG_HIGHBITDEPTH
6583
Cheng Chenefc55fd2017-10-10 12:08:28 -07006584#if CONFIG_JNT_COMP
Cheng Chenf78632e2017-10-20 15:30:51 -07006585 av1_jnt_comp_weight_assign(cm, mbmi, 0, &xd->jcp_param.fwd_offset,
Cheng Chen8263f802017-11-14 15:50:00 -08006586 &xd->jcp_param.bck_offset,
6587 &xd->jcp_param.use_jnt_comp_avg, 1);
Cheng Chenefc55fd2017-10-10 12:08:28 -07006588#endif // CONFIG_JNT_COMP
6589
David Barker8dd9b572017-05-12 16:31:38 +01006590 if (scaled_ref_frame) {
6591 // Restore the prediction frame pointers to their unscaled versions.
6592 int i;
6593 for (i = 0; i < MAX_MB_PLANE; i++)
6594 xd->plane[i].pre[!ref_idx] = backup_yv12[i];
6595 }
6596}
6597
6598// Search for the best mv for one component of a compound,
6599// given that the other component is fixed.
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006600static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
6601 BLOCK_SIZE bsize, MV *this_mv,
6602 int mi_row, int mi_col,
6603 const uint8_t *second_pred,
6604 const uint8_t *mask, int mask_stride,
6605 int *rate_mv, int ref_idx) {
David Barker8dd9b572017-05-12 16:31:38 +01006606 const int pw = block_size_wide[bsize];
6607 const int ph = block_size_high[bsize];
6608 MACROBLOCKD *xd = &x->e_mbd;
6609 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6610 const int ref = mbmi->ref_frame[ref_idx];
David Barkerf19f35f2017-05-22 16:33:22 +01006611 int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0];
David Barker8dd9b572017-05-12 16:31:38 +01006612 struct macroblockd_plane *const pd = &xd->plane[0];
6613
6614 struct buf_2d backup_yv12[MAX_MB_PLANE];
David Barker8dd9b572017-05-12 16:31:38 +01006615 const YV12_BUFFER_CONFIG *const scaled_ref_frame =
6616 av1_get_scaled_ref_frame(cpi, ref);
6617
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006618 // Check that this is either an interinter or an interintra block
Ryan Lei1d1df182017-06-15 11:38:59 -07006619 assert(has_second_ref(mbmi) ||
6620 (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME));
David Barker8dd9b572017-05-12 16:31:38 +01006621
David Barker8dd9b572017-05-12 16:31:38 +01006622 if (scaled_ref_frame) {
6623 int i;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006624 // Swap out the reference frame for a version that's been scaled to
6625 // match the resolution of the current frame, allowing the existing
6626 // motion search code to be used without additional modifications.
6627 for (i = 0; i < MAX_MB_PLANE; i++)
6628 backup_yv12[i] = xd->plane[i].pre[ref_idx];
Yaowu Xuf883b422016-08-30 14:01:10 -07006629 av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006630 }
6631
David Barker8dd9b572017-05-12 16:31:38 +01006632 struct buf_2d orig_yv12;
6633 int bestsme = INT_MAX;
6634 int sadpb = x->sadperbit16;
6635 MV *const best_mv = &x->best_mv.as_mv;
6636 int search_range = 3;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006637
David Barker8dd9b572017-05-12 16:31:38 +01006638 MvLimits tmp_mv_limits = x->mv_limits;
David Barker8dd9b572017-05-12 16:31:38 +01006639
6640 // Initialized here because of compiler problem in Visual Studio.
6641 if (ref_idx) {
David Barkerf19f35f2017-05-22 16:33:22 +01006642 orig_yv12 = pd->pre[0];
6643 pd->pre[0] = pd->pre[ref_idx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006644 }
6645
David Barker8dd9b572017-05-12 16:31:38 +01006646 // Do compound motion search on the current reference frame.
6647 av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv);
6648
6649 // Use the mv result from the single mode as mv predictor.
David Barkerf19f35f2017-05-22 16:33:22 +01006650 *best_mv = *this_mv;
David Barker8dd9b572017-05-12 16:31:38 +01006651
6652 best_mv->col >>= 3;
6653 best_mv->row >>= 3;
6654
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006655 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
David Barker8dd9b572017-05-12 16:31:38 +01006656
6657 // Small-range full-pixel motion search.
6658 bestsme = av1_refining_search_8p_c(x, sadpb, search_range,
6659 &cpi->fn_ptr[bsize], mask, mask_stride,
6660 ref_idx, &ref_mv.as_mv, second_pred);
6661 if (bestsme < INT_MAX) {
6662 if (mask)
6663 bestsme =
6664 av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask,
6665 mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1);
6666 else
6667 bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred,
6668 &cpi->fn_ptr[bsize], 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006669 }
6670
Alex Converse0fa0f422017-04-24 12:51:14 -07006671 x->mv_limits = tmp_mv_limits;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006672
RogerZhou3b635242017-09-19 10:06:46 -07006673#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -07006674 if (cpi->common.cur_frame_force_integer_mv) {
RogerZhou3b635242017-09-19 10:06:46 -07006675 x->best_mv.as_mv.row *= 8;
6676 x->best_mv.as_mv.col *= 8;
6677 }
RogerZhou10a03802017-10-26 11:49:48 -07006678 if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0) {
RogerZhou3b635242017-09-19 10:06:46 -07006679#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07006680 if (bestsme < INT_MAX) {
RogerZhou3b635242017-09-19 10:06:46 -07006681#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07006682 int dis; /* TODO: use dis in distortion calculation later. */
David Barker8dd9b572017-05-12 16:31:38 +01006683 unsigned int sse;
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006684 bestsme = cpi->find_fractional_mv_step(
6685 x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
6686 &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL,
6687 x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride,
6688 ref_idx, pw, ph, cpi->sf.use_upsampled_references);
David Barker8dd9b572017-05-12 16:31:38 +01006689 }
6690
6691 // Restore the pointer to the first (possibly scaled) prediction buffer.
David Barkerf19f35f2017-05-22 16:33:22 +01006692 if (ref_idx) pd->pre[0] = orig_yv12;
David Barker8dd9b572017-05-12 16:31:38 +01006693
Yue Chenf03907a2017-05-31 12:04:04 -07006694 if (bestsme < INT_MAX) *this_mv = *best_mv;
David Barker8dd9b572017-05-12 16:31:38 +01006695
6696 *rate_mv = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006697
6698 if (scaled_ref_frame) {
David Barker8dd9b572017-05-12 16:31:38 +01006699 // Restore the prediction frame pointers to their unscaled versions.
6700 int i;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006701 for (i = 0; i < MAX_MB_PLANE; i++)
6702 xd->plane[i].pre[ref_idx] = backup_yv12[i];
6703 }
David Barker8dd9b572017-05-12 16:31:38 +01006704
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006705 av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx);
David Barkerf19f35f2017-05-22 16:33:22 +01006706 *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost,
6707 x->mvcost, MV_COST_WEIGHT);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006708}
6709
David Barker8dd9b572017-05-12 16:31:38 +01006710// Wrapper for compound_single_motion_search, for the common case
6711// where the second prediction is also an inter mode.
6712static void compound_single_motion_search_interinter(
6713 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv,
David Barkerf19f35f2017-05-22 16:33:22 +01006714 int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv,
6715 const int block, int ref_idx) {
6716 MACROBLOCKD *xd = &x->e_mbd;
6717 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6718
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006719 // This function should only ever be called for compound modes
David Barkerf19f35f2017-05-22 16:33:22 +01006720 assert(has_second_ref(mbmi));
David Barker8dd9b572017-05-12 16:31:38 +01006721
6722// Prediction buffer from second frame.
6723#if CONFIG_HIGHBITDEPTH
David Barker8dd9b572017-05-12 16:31:38 +01006724 DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]);
6725 uint8_t *second_pred;
6726 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
6727 second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
6728 else
6729 second_pred = (uint8_t *)second_pred_alloc_16;
6730#else
6731 DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
6732#endif // CONFIG_HIGHBITDEPTH
6733
David Barkerf19f35f2017-05-22 16:33:22 +01006734 MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv;
6735 const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv;
6736
6737 build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block,
David Barker8dd9b572017-05-12 16:31:38 +01006738 ref_idx, second_pred);
6739
David Barkerf19f35f2017-05-22 16:33:22 +01006740 compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07006741 second_pred, mask, mask_stride, rate_mv,
David Barkerf19f35f2017-05-22 16:33:22 +01006742 ref_idx);
David Barker8dd9b572017-05-12 16:31:38 +01006743}
6744
Sarah Parker6fdc8532016-11-16 17:47:13 -08006745static void do_masked_motion_search_indexed(
David Barkerc155e012017-05-11 13:54:54 +01006746 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
Sarah Parkerb9f757c2017-01-06 17:12:24 -08006747 const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize,
Zoe Liu7f24e1b2017-03-17 17:42:05 -07006748 int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006749 // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both
6750 MACROBLOCKD *xd = &x->e_mbd;
6751 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6752 BLOCK_SIZE sb_type = mbmi->sb_type;
6753 const uint8_t *mask;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006754 const int mask_stride = block_size_wide[bsize];
Sarah Parker569edda2016-12-14 14:57:38 -08006755
Sarah Parkerb9f757c2017-01-06 17:12:24 -08006756 mask = av1_get_compound_type_mask(comp_data, sb_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006757
David Barker8dd9b572017-05-12 16:31:38 +01006758 int_mv frame_mv[TOTAL_REFS_PER_FRAME];
6759 MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] };
Yaowu Xuc27fc142016-08-22 16:08:15 -07006760
David Barker8dd9b572017-05-12 16:31:38 +01006761 frame_mv[rf[0]].as_int = cur_mv[0].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006762 frame_mv[rf[1]].as_int = cur_mv[1].as_int;
David Barkerf19f35f2017-05-22 16:33:22 +01006763 if (which == 0 || which == 1) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006764 compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row,
6765 mi_col, mask, mask_stride, rate_mv,
6766 0, which);
David Barkerf19f35f2017-05-22 16:33:22 +01006767 } else if (which == 2) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006768 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask,
6769 mask_stride, rate_mv, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006770 }
David Barker8dd9b572017-05-12 16:31:38 +01006771 tmp_mv[0].as_int = frame_mv[rf[0]].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01006772 tmp_mv[1].as_int = frame_mv[rf[1]].as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006773}
Yaowu Xuc27fc142016-08-22 16:08:15 -07006774
James Zern89a015b2017-08-08 12:39:00 -04006775// In some situations we want to discount the apparent cost of a new motion
Yaowu Xuc27fc142016-08-22 16:08:15 -07006776// vector. Where there is a subtle motion field and especially where there is
6777// low spatial complexity then it can be hard to cover the cost of a new motion
6778// vector in a single block, even if that motion vector reduces distortion.
6779// However, once established that vector may be usable through the nearest and
6780// near mv modes to reduce distortion in subsequent blocks and also improve
6781// visual quality.
Urvang Joshi52648442016-10-13 17:27:51 -07006782static int discount_newmv_test(const AV1_COMP *const cpi, int this_mode,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006783 int_mv this_mv,
6784 int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME],
6785 int ref_frame) {
6786 return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
6787 (this_mv.as_int != 0) &&
6788 ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
6789 (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
6790 ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
6791 (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
6792}
6793
Yaowu Xu671f2bd2016-09-30 15:07:57 -07006794#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
6795#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
Yaowu Xuc27fc142016-08-22 16:08:15 -07006796
6797// TODO(jingning): this mv clamping function should be block size dependent.
6798static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
6799 clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN,
6800 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
6801 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
6802 xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN);
6803}
6804
Yaowu Xuf883b422016-08-30 14:01:10 -07006805static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006806 const BLOCK_SIZE bsize, const uint8_t *pred0,
6807 int stride0, const uint8_t *pred1, int stride1) {
6808 const struct macroblock_plane *const p = &x->plane[0];
6809 const uint8_t *src = p->src.buf;
6810 int src_stride = p->src.stride;
6811 const int f_index = bsize - BLOCK_8X8;
Jingning Han61418bb2017-01-23 17:12:48 -08006812 const int bw = block_size_wide[bsize];
6813 const int bh = block_size_high[bsize];
Yue Chenf03907a2017-05-31 12:04:04 -07006814 uint32_t esq[2][4];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006815 int64_t tl, br;
6816
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006817#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006818 if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
6819 pred0 = CONVERT_TO_BYTEPTR(pred0);
6820 pred1 = CONVERT_TO_BYTEPTR(pred1);
6821 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006822#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006823
Yue Chenf03907a2017-05-31 12:04:04 -07006824 cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
6825 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0,
6826 &esq[0][1]);
6827 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6828 pred0 + bh / 2 * stride0, stride0, &esq[0][2]);
6829 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6830 pred0 + bh / 2 * stride0 + bw / 2, stride0,
6831 &esq[0][3]);
6832 cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
6833 cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1,
6834 &esq[1][1]);
6835 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride,
6836 pred1 + bh / 2 * stride1, stride0, &esq[1][2]);
6837 cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride,
6838 pred1 + bh / 2 * stride1 + bw / 2, stride0,
6839 &esq[1][3]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006840
6841 tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) -
6842 (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]);
6843 br = (int64_t)(esq[1][3] + esq[1][1] + esq[1][2]) -
6844 (int64_t)(esq[0][3] + esq[0][1] + esq[0][2]);
6845 return (tl + br > 0);
6846}
Yaowu Xuc27fc142016-08-22 16:08:15 -07006847
6848#if !CONFIG_DUAL_FILTER
James Zern7b9407a2016-05-18 23:48:05 -07006849static InterpFilter predict_interp_filter(
Yaowu Xuf883b422016-08-30 14:01:10 -07006850 const AV1_COMP *cpi, const MACROBLOCK *x, const BLOCK_SIZE bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006851 const int mi_row, const int mi_col,
James Zern7b9407a2016-05-18 23:48:05 -07006852 InterpFilter (*single_filter)[TOTAL_REFS_PER_FRAME]) {
6853 InterpFilter best_filter = SWITCHABLE;
Yaowu Xuf883b422016-08-30 14:01:10 -07006854 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006855 const MACROBLOCKD *xd = &x->e_mbd;
6856 int bsl = mi_width_log2_lookup[bsize];
6857 int pred_filter_search =
6858 cpi->sf.cb_pred_filter_search
6859 ? (((mi_row + mi_col) >> bsl) +
6860 get_chessboard_index(cm->current_video_frame)) &
6861 0x1
6862 : 0;
6863 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
6864 const int is_comp_pred = has_second_ref(mbmi);
6865 const int this_mode = mbmi->mode;
6866 int refs[2] = { mbmi->ref_frame[0],
6867 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
Yaowu Xuc27fc142016-08-22 16:08:15 -07006868 if (pred_filter_search) {
James Zern7b9407a2016-05-18 23:48:05 -07006869 InterpFilter af = SWITCHABLE, lf = SWITCHABLE;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01006870 if (xd->up_available)
6871 af = av1_extract_interp_filter(
6872 xd->mi[-xd->mi_stride]->mbmi.interp_filters, 0);
6873 if (xd->left_available)
6874 lf = av1_extract_interp_filter(xd->mi[-1]->mbmi.interp_filters, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006875
Zoe Liu7f24e1b2017-03-17 17:42:05 -07006876 if ((this_mode != NEWMV && this_mode != NEW_NEWMV) || (af == lf))
Yaowu Xuc27fc142016-08-22 16:08:15 -07006877 best_filter = af;
6878 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006879 if (is_comp_pred) {
6880 if (cpi->sf.adaptive_mode_search) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07006881 switch (this_mode) {
6882 case NEAREST_NEARESTMV:
6883 if (single_filter[NEARESTMV][refs[0]] ==
6884 single_filter[NEARESTMV][refs[1]])
6885 best_filter = single_filter[NEARESTMV][refs[0]];
6886 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006887 case NEAR_NEARMV:
6888 if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]])
6889 best_filter = single_filter[NEARMV][refs[0]];
6890 break;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07006891 case GLOBAL_GLOBALMV:
6892 if (single_filter[GLOBALMV][refs[0]] ==
6893 single_filter[GLOBALMV][refs[1]])
6894 best_filter = single_filter[GLOBALMV][refs[0]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006895 break;
6896 case NEW_NEWMV:
6897 if (single_filter[NEWMV][refs[0]] == single_filter[NEWMV][refs[1]])
6898 best_filter = single_filter[NEWMV][refs[0]];
6899 break;
6900 case NEAREST_NEWMV:
6901 if (single_filter[NEARESTMV][refs[0]] ==
6902 single_filter[NEWMV][refs[1]])
6903 best_filter = single_filter[NEARESTMV][refs[0]];
6904 break;
6905 case NEAR_NEWMV:
6906 if (single_filter[NEARMV][refs[0]] == single_filter[NEWMV][refs[1]])
6907 best_filter = single_filter[NEARMV][refs[0]];
6908 break;
6909 case NEW_NEARESTMV:
6910 if (single_filter[NEWMV][refs[0]] ==
6911 single_filter[NEARESTMV][refs[1]])
6912 best_filter = single_filter[NEWMV][refs[0]];
6913 break;
6914 case NEW_NEARMV:
6915 if (single_filter[NEWMV][refs[0]] == single_filter[NEARMV][refs[1]])
6916 best_filter = single_filter[NEWMV][refs[0]];
6917 break;
6918 default:
6919 if (single_filter[this_mode][refs[0]] ==
6920 single_filter[this_mode][refs[1]])
6921 best_filter = single_filter[this_mode][refs[0]];
6922 break;
6923 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07006924 }
6925 }
Angie Chiang75c22092016-10-25 12:19:16 -07006926 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
6927 best_filter = EIGHTTAP_REGULAR;
Yaowu Xuc27fc142016-08-22 16:08:15 -07006928 }
6929 return best_filter;
6930}
Fergus Simpson4063a682017-02-28 16:52:22 -08006931#endif // !CONFIG_DUAL_FILTER
Yaowu Xuc27fc142016-08-22 16:08:15 -07006932
Yaowu Xuc27fc142016-08-22 16:08:15 -07006933// Choose the best wedge index and sign
Yaowu Xuf883b422016-08-30 14:01:10 -07006934static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006935 const BLOCK_SIZE bsize, const uint8_t *const p0,
6936 const uint8_t *const p1, int *const best_wedge_sign,
6937 int *const best_wedge_index) {
6938 const MACROBLOCKD *const xd = &x->e_mbd;
6939 const struct buf_2d *const src = &x->plane[0].src;
Jingning Hanae5cfde2016-11-30 12:01:44 -08006940 const int bw = block_size_wide[bsize];
6941 const int bh = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07006942 const int N = bw * bh;
6943 int rate;
6944 int64_t dist;
6945 int64_t rd, best_rd = INT64_MAX;
6946 int wedge_index;
6947 int wedge_sign;
6948 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
6949 const uint8_t *mask;
6950 uint64_t sse;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006951#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006952 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
6953 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
6954#else
6955 const int bd_round = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006956#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006957
6958 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
6959 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
6960 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
6961 DECLARE_ALIGNED(32, int16_t, ds[MAX_SB_SQUARE]);
6962
6963 int64_t sign_limit;
6964
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006965#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006966 if (hbd) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006967 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006968 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006969 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006970 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07006971 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
Yaowu Xuc27fc142016-08-22 16:08:15 -07006972 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
6973 } else // NOLINT
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02006974#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07006975 {
Yaowu Xuf883b422016-08-30 14:01:10 -07006976 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
6977 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
6978 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006979 }
6980
Yaowu Xuf883b422016-08-30 14:01:10 -07006981 sign_limit = ((int64_t)aom_sum_squares_i16(r0, N) -
6982 (int64_t)aom_sum_squares_i16(r1, N)) *
Yaowu Xuc27fc142016-08-22 16:08:15 -07006983 (1 << WEDGE_WEIGHT_BITS) / 2;
6984
Jingning Han61418bb2017-01-23 17:12:48 -08006985 if (N < 64)
6986 av1_wedge_compute_delta_squares_c(ds, r0, r1, N);
6987 else
6988 av1_wedge_compute_delta_squares(ds, r0, r1, N);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006989
6990 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
Yaowu Xuf883b422016-08-30 14:01:10 -07006991 mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
Jingning Han61418bb2017-01-23 17:12:48 -08006992
6993 // TODO(jingning): Make sse2 functions support N = 16 case
6994 if (N < 64)
6995 wedge_sign = av1_wedge_sign_from_residuals_c(ds, mask, N, sign_limit);
6996 else
6997 wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
Yaowu Xuc27fc142016-08-22 16:08:15 -07006998
Yaowu Xuf883b422016-08-30 14:01:10 -07006999 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
Jingning Han61418bb2017-01-23 17:12:48 -08007000 if (N < 64)
7001 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
7002 else
7003 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007004 sse = ROUND_POWER_OF_TWO(sse, bd_round);
7005
7006 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
Urvang Joshi70006e42017-06-14 16:08:55 -07007007 rd = RDCOST(x->rdmult, rate, dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007008
7009 if (rd < best_rd) {
7010 *best_wedge_index = wedge_index;
7011 *best_wedge_sign = wedge_sign;
7012 best_rd = rd;
7013 }
7014 }
7015
7016 return best_rd;
7017}
7018
7019// Choose the best wedge index the specified sign
7020static int64_t pick_wedge_fixed_sign(
Yaowu Xuf883b422016-08-30 14:01:10 -07007021 const AV1_COMP *const cpi, const MACROBLOCK *const x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007022 const BLOCK_SIZE bsize, const uint8_t *const p0, const uint8_t *const p1,
7023 const int wedge_sign, int *const best_wedge_index) {
7024 const MACROBLOCKD *const xd = &x->e_mbd;
7025 const struct buf_2d *const src = &x->plane[0].src;
Jingning Hanae5cfde2016-11-30 12:01:44 -08007026 const int bw = block_size_wide[bsize];
7027 const int bh = block_size_high[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07007028 const int N = bw * bh;
7029 int rate;
7030 int64_t dist;
7031 int64_t rd, best_rd = INT64_MAX;
7032 int wedge_index;
7033 int wedge_types = (1 << get_wedge_bits_lookup(bsize));
7034 const uint8_t *mask;
7035 uint64_t sse;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007036#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07007037 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
7038 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
7039#else
7040 const int bd_round = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007041#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07007042
7043 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
7044 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
7045
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007046#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07007047 if (hbd) {
Yaowu Xuf883b422016-08-30 14:01:10 -07007048 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007049 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
Yaowu Xuf883b422016-08-30 14:01:10 -07007050 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007051 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
7052 } else // NOLINT
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007053#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07007054 {
Yaowu Xuf883b422016-08-30 14:01:10 -07007055 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
7056 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007057 }
7058
7059 for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
Yaowu Xuf883b422016-08-30 14:01:10 -07007060 mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
Jingning Han61418bb2017-01-23 17:12:48 -08007061 if (N < 64)
7062 sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
7063 else
7064 sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007065 sse = ROUND_POWER_OF_TWO(sse, bd_round);
7066
7067 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
Urvang Joshi70006e42017-06-14 16:08:55 -07007068 rd = RDCOST(x->rdmult, rate, dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007069
7070 if (rd < best_rd) {
7071 *best_wedge_index = wedge_index;
7072 best_rd = rd;
7073 }
7074 }
7075
7076 return best_rd;
7077}
7078
Yaowu Xuf883b422016-08-30 14:01:10 -07007079static int64_t pick_interinter_wedge(const AV1_COMP *const cpi,
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007080 MACROBLOCK *const x,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007081 const BLOCK_SIZE bsize,
7082 const uint8_t *const p0,
7083 const uint8_t *const p1) {
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007084 MACROBLOCKD *const xd = &x->e_mbd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07007085 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Jingning Hanae5cfde2016-11-30 12:01:44 -08007086 const int bw = block_size_wide[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07007087
7088 int64_t rd;
7089 int wedge_index = -1;
7090 int wedge_sign = 0;
7091
Sarah Parker42d96102017-01-31 21:05:27 -08007092 assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07007093 assert(cpi->common.allow_masked_compound);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007094
7095 if (cpi->sf.fast_wedge_sign_estimate) {
7096 wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
7097 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, wedge_sign, &wedge_index);
7098 } else {
7099 rd = pick_wedge(cpi, x, bsize, p0, p1, &wedge_sign, &wedge_index);
7100 }
7101
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007102 mbmi->wedge_sign = wedge_sign;
7103 mbmi->wedge_index = wedge_index;
Yaowu Xuc27fc142016-08-22 16:08:15 -07007104 return rd;
7105}
7106
Sarah Parkerddcea392017-04-25 15:57:22 -07007107static int64_t pick_interinter_seg(const AV1_COMP *const cpi,
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007108 MACROBLOCK *const x, const BLOCK_SIZE bsize,
Sarah Parkerddcea392017-04-25 15:57:22 -07007109 const uint8_t *const p0,
7110 const uint8_t *const p1) {
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007111 MACROBLOCKD *const xd = &x->e_mbd;
Sarah Parker569edda2016-12-14 14:57:38 -08007112 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7113 const struct buf_2d *const src = &x->plane[0].src;
7114 const int bw = block_size_wide[bsize];
7115 const int bh = block_size_high[bsize];
7116 const int N = bw * bh;
7117 int rate;
7118 uint64_t sse;
7119 int64_t dist;
Debargha Mukherjeec30934b2017-04-25 01:23:51 -07007120 int64_t rd0;
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007121 SEG_MASK_TYPE cur_mask_type;
7122 int64_t best_rd = INT64_MAX;
7123 SEG_MASK_TYPE best_mask_type = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007124#if CONFIG_HIGHBITDEPTH
Sarah Parker569edda2016-12-14 14:57:38 -08007125 const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH;
7126 const int bd_round = hbd ? (xd->bd - 8) * 2 : 0;
7127#else
7128 const int bd_round = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007129#endif // CONFIG_HIGHBITDEPTH
Sarah Parker569edda2016-12-14 14:57:38 -08007130 DECLARE_ALIGNED(32, int16_t, r0[MAX_SB_SQUARE]);
7131 DECLARE_ALIGNED(32, int16_t, r1[MAX_SB_SQUARE]);
7132 DECLARE_ALIGNED(32, int16_t, d10[MAX_SB_SQUARE]);
7133
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007134#if CONFIG_HIGHBITDEPTH
Sarah Parker569edda2016-12-14 14:57:38 -08007135 if (hbd) {
7136 aom_highbd_subtract_block(bh, bw, r0, bw, src->buf, src->stride,
7137 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
7138 aom_highbd_subtract_block(bh, bw, r1, bw, src->buf, src->stride,
7139 CONVERT_TO_BYTEPTR(p1), bw, xd->bd);
7140 aom_highbd_subtract_block(bh, bw, d10, bw, CONVERT_TO_BYTEPTR(p1), bw,
7141 CONVERT_TO_BYTEPTR(p0), bw, xd->bd);
7142 } else // NOLINT
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007143#endif // CONFIG_HIGHBITDEPTH
Sarah Parker569edda2016-12-14 14:57:38 -08007144 {
7145 aom_subtract_block(bh, bw, r0, bw, src->buf, src->stride, p0, bw);
7146 aom_subtract_block(bh, bw, r1, bw, src->buf, src->stride, p1, bw);
7147 aom_subtract_block(bh, bw, d10, bw, p1, bw, p0, bw);
7148 }
7149
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007150 // try each mask type and its inverse
7151 for (cur_mask_type = 0; cur_mask_type < SEG_MASK_TYPES; cur_mask_type++) {
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007152// build mask and inverse
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007153#if CONFIG_HIGHBITDEPTH
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007154 if (hbd)
7155 build_compound_seg_mask_highbd(
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007156 xd->seg_mask, cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw,
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007157 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
7158 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007159#endif // CONFIG_HIGHBITDEPTH
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007160 build_compound_seg_mask(xd->seg_mask, cur_mask_type, p0, bw, p1, bw,
7161 bsize, bh, bw);
Sarah Parker569edda2016-12-14 14:57:38 -08007162
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007163 // compute rd for mask
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007164 sse = av1_wedge_sse_from_residuals(r1, d10, xd->seg_mask, N);
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007165 sse = ROUND_POWER_OF_TWO(sse, bd_round);
Sarah Parker569edda2016-12-14 14:57:38 -08007166
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007167 model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
Urvang Joshi70006e42017-06-14 16:08:55 -07007168 rd0 = RDCOST(x->rdmult, rate, dist);
Sarah Parker569edda2016-12-14 14:57:38 -08007169
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007170 if (rd0 < best_rd) {
7171 best_mask_type = cur_mask_type;
7172 best_rd = rd0;
7173 }
7174 }
Sarah Parker569edda2016-12-14 14:57:38 -08007175
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007176 // make final mask
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007177 mbmi->mask_type = best_mask_type;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007178#if CONFIG_HIGHBITDEPTH
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007179 if (hbd)
7180 build_compound_seg_mask_highbd(
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007181 xd->seg_mask, mbmi->mask_type, CONVERT_TO_BYTEPTR(p0), bw,
Debargha Mukherjee1edf9a32017-01-07 18:54:20 -08007182 CONVERT_TO_BYTEPTR(p1), bw, bsize, bh, bw, xd->bd);
7183 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02007184#endif // CONFIG_HIGHBITDEPTH
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007185 build_compound_seg_mask(xd->seg_mask, mbmi->mask_type, p0, bw, p1, bw,
7186 bsize, bh, bw);
Sarah Parker569edda2016-12-14 14:57:38 -08007187
Sarah Parkerb9f757c2017-01-06 17:12:24 -08007188 return best_rd;
Sarah Parker569edda2016-12-14 14:57:38 -08007189}
Sarah Parker569edda2016-12-14 14:57:38 -08007190
Yaowu Xuf883b422016-08-30 14:01:10 -07007191static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
Yaowu Xuc27fc142016-08-22 16:08:15 -07007192 const MACROBLOCK *const x,
7193 const BLOCK_SIZE bsize,
7194 const uint8_t *const p0,
7195 const uint8_t *const p1) {
7196 const MACROBLOCKD *const xd = &x->e_mbd;
7197 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7198
7199 int64_t rd;
7200 int wedge_index = -1;
7201
7202 assert(is_interintra_wedge_used(bsize));
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07007203 assert(cpi->common.allow_interintra_compound);
Yaowu Xuc27fc142016-08-22 16:08:15 -07007204
7205 rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index);
7206
7207 mbmi->interintra_wedge_sign = 0;
7208 mbmi->interintra_wedge_index = wedge_index;
7209 return rd;
7210}
Sarah Parker6fdc8532016-11-16 17:47:13 -08007211
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007212static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x,
Sarah Parkerddcea392017-04-25 15:57:22 -07007213 const BLOCK_SIZE bsize,
7214 const uint8_t *const p0,
7215 const uint8_t *const p1) {
7216 const COMPOUND_TYPE compound_type =
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007217 x->e_mbd.mi[0]->mbmi.interinter_compound_type;
Sarah Parkerddcea392017-04-25 15:57:22 -07007218 switch (compound_type) {
Sarah Parkerddcea392017-04-25 15:57:22 -07007219 case COMPOUND_WEDGE: return pick_interinter_wedge(cpi, x, bsize, p0, p1);
Sarah Parkerddcea392017-04-25 15:57:22 -07007220 case COMPOUND_SEG: return pick_interinter_seg(cpi, x, bsize, p0, p1);
Sarah Parkerddcea392017-04-25 15:57:22 -07007221 default: assert(0); return 0;
7222 }
7223}
7224
David Barkerc155e012017-05-11 13:54:54 +01007225static int interinter_compound_motion_search(
7226 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
7227 const BLOCK_SIZE bsize, const int this_mode, int mi_row, int mi_col) {
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007228 MACROBLOCKD *const xd = &x->e_mbd;
Sarah Parker6fdc8532016-11-16 17:47:13 -08007229 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7230 int_mv tmp_mv[2];
David Barkerc155e012017-05-11 13:54:54 +01007231 int tmp_rate_mv = 0;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007232 const INTERINTER_COMPOUND_DATA compound_data = {
Debargha Mukherjee371968c2017-10-29 12:30:04 -07007233 mbmi->wedge_index, mbmi->wedge_sign, mbmi->mask_type, xd->seg_mask,
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007234 mbmi->interinter_compound_type
7235 };
Zoe Liu85b66462017-04-20 14:28:19 -07007236
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007237 if (this_mode == NEW_NEWMV) {
David Barkerc155e012017-05-11 13:54:54 +01007238 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
7239 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007240 mbmi->mv[0].as_int = tmp_mv[0].as_int;
7241 mbmi->mv[1].as_int = tmp_mv[1].as_int;
7242 } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) {
David Barkerc155e012017-05-11 13:54:54 +01007243 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
7244 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007245 mbmi->mv[0].as_int = tmp_mv[0].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007246 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
David Barkerc155e012017-05-11 13:54:54 +01007247 do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize,
7248 mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007249 mbmi->mv[1].as_int = tmp_mv[1].as_int;
7250 }
7251 return tmp_rate_mv;
7252}
7253
Sarah Parkerddcea392017-04-25 15:57:22 -07007254static int64_t build_and_cost_compound_type(
Sarah Parker569edda2016-12-14 14:57:38 -08007255 const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv,
7256 const BLOCK_SIZE bsize, const int this_mode, int rs2, int rate_mv,
7257 BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, uint8_t **preds1,
7258 int *strides, int mi_row, int mi_col) {
Debargha Mukherjeead8be032017-05-09 15:28:45 -07007259 const AV1_COMMON *const cm = &cpi->common;
Sarah Parker569edda2016-12-14 14:57:38 -08007260 MACROBLOCKD *xd = &x->e_mbd;
7261 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7262 int rate_sum;
7263 int64_t dist_sum;
7264 int64_t best_rd_cur = INT64_MAX;
7265 int64_t rd = INT64_MAX;
7266 int tmp_skip_txfm_sb;
7267 int64_t tmp_skip_sse_sb;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00007268 const COMPOUND_TYPE compound_type = mbmi->interinter_compound_type;
Sarah Parker569edda2016-12-14 14:57:38 -08007269
Sarah Parkerddcea392017-04-25 15:57:22 -07007270 best_rd_cur = pick_interinter_mask(cpi, x, bsize, *preds0, *preds1);
Urvang Joshi70006e42017-06-14 16:08:55 -07007271 best_rd_cur += RDCOST(x->rdmult, rs2 + rate_mv, 0);
Sarah Parker569edda2016-12-14 14:57:38 -08007272
Sarah Parker2e604882017-01-17 17:31:25 -08007273 if (have_newmv_in_inter_mode(this_mode) &&
Sarah Parkerddcea392017-04-25 15:57:22 -07007274 use_masked_motion_search(compound_type)) {
David Barkerc155e012017-05-11 13:54:54 +01007275 *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize,
7276 this_mode, mi_row, mi_col);
Debargha Mukherjeead8be032017-05-09 15:28:45 -07007277 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007278 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
7279 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07007280 rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
Zoe Liu4d44f5a2016-12-14 17:46:19 -08007281 if (rd >= best_rd_cur) {
Sarah Parker6fdc8532016-11-16 17:47:13 -08007282 mbmi->mv[0].as_int = cur_mv[0].as_int;
7283 mbmi->mv[1].as_int = cur_mv[1].as_int;
7284 *out_rate_mv = rate_mv;
Sebastien Alaiwan0cf54d42017-10-16 16:10:04 +02007285 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
7286 preds1, strides);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007287 }
7288 av1_subtract_plane(x, bsize, 0);
7289 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7290 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7291 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07007292 rd = RDCOST(x->rdmult, rs2 + *out_rate_mv + rate_sum, dist_sum);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007293 best_rd_cur = rd;
7294
7295 } else {
Sebastien Alaiwan0cf54d42017-10-16 16:10:04 +02007296 av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides,
7297 preds1, strides);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007298 av1_subtract_plane(x, bsize, 0);
7299 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
7300 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
7301 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07007302 rd = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
Sarah Parker6fdc8532016-11-16 17:47:13 -08007303 best_rd_cur = rd;
7304 }
7305 return best_rd_cur;
7306}
Yaowu Xuc27fc142016-08-22 16:08:15 -07007307
Fergus Simpson073c6f32017-02-17 12:13:48 -08007308typedef struct {
Fergus Simpson073c6f32017-02-17 12:13:48 -08007309 // Inter prediction buffers and respective strides
7310 uint8_t *above_pred_buf[MAX_MB_PLANE];
7311 int above_pred_stride[MAX_MB_PLANE];
7312 uint8_t *left_pred_buf[MAX_MB_PLANE];
7313 int left_pred_stride[MAX_MB_PLANE];
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007314 int_mv *single_newmv;
Fergus Simpson073c6f32017-02-17 12:13:48 -08007315 // Pointer to array of motion vectors to use for each ref and their rates
7316 // Should point to first of 2 arrays in 2D array
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007317 int *single_newmv_rate;
Fergus Simpson073c6f32017-02-17 12:13:48 -08007318 // Pointer to array of predicted rate-distortion
7319 // Should point to first of 2 arrays in 2D array
7320 int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME];
Fergus Simpson3424c2d2017-03-09 11:48:15 -08007321 InterpFilter single_filter[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
Fergus Simpson073c6f32017-02-17 12:13:48 -08007322} HandleInterModeArgs;
7323
Fergus Simpson45509632017-02-22 15:30:50 -08007324static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
7325 const BLOCK_SIZE bsize,
7326 int_mv (*const mode_mv)[TOTAL_REFS_PER_FRAME],
7327 const int mi_row, const int mi_col,
7328 int *const rate_mv, int_mv *const single_newmv,
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08007329 HandleInterModeArgs *const args) {
Fergus Simpson45509632017-02-22 15:30:50 -08007330 const MACROBLOCKD *const xd = &x->e_mbd;
7331 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7332 const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
7333 const int is_comp_pred = has_second_ref(mbmi);
7334 const PREDICTION_MODE this_mode = mbmi->mode;
Fergus Simpson45509632017-02-22 15:30:50 -08007335 const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
Fergus Simpson45509632017-02-22 15:30:50 -08007336 int_mv *const frame_mv = mode_mv[this_mode];
7337 const int refs[2] = { mbmi->ref_frame[0],
7338 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
7339 int i;
7340
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08007341 (void)args;
Fergus Simpson45509632017-02-22 15:30:50 -08007342
7343 if (is_comp_pred) {
Fergus Simpson45509632017-02-22 15:30:50 -08007344 for (i = 0; i < 2; ++i) {
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007345 single_newmv[refs[i]].as_int = args->single_newmv[refs[i]].as_int;
Fergus Simpson45509632017-02-22 15:30:50 -08007346 }
7347
7348 if (this_mode == NEW_NEWMV) {
7349 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
7350 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
7351
7352 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007353 joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL,
7354 0, rate_mv, 0);
Fergus Simpson45509632017-02-22 15:30:50 -08007355 } else {
7356 *rate_mv = 0;
7357 for (i = 0; i < 2; ++i) {
Zoe Liu122f3942017-04-25 11:18:38 -07007358 av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx);
Fergus Simpson45509632017-02-22 15:30:50 -08007359 *rate_mv += av1_mv_bit_cost(
7360 &frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv,
7361 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
7362 }
7363 }
7364 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
7365 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
David Barker8dd9b572017-05-12 16:31:38 +01007366 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
7367 frame_mv[refs[0]].as_int =
7368 mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007369 compound_single_motion_search_interinter(
7370 cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1);
David Barker8dd9b572017-05-12 16:31:38 +01007371 } else {
7372 av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx);
7373 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv,
7374 &mbmi_ext->ref_mvs[refs[1]][0].as_mv,
7375 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
7376 }
Fergus Simpson45509632017-02-22 15:30:50 -08007377 } else {
David Barkercb03dc32017-04-07 13:05:09 +01007378 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
Fergus Simpson45509632017-02-22 15:30:50 -08007379 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
David Barker8dd9b572017-05-12 16:31:38 +01007380 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
7381 frame_mv[refs[1]].as_int =
7382 mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007383 compound_single_motion_search_interinter(
7384 cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0);
David Barker8dd9b572017-05-12 16:31:38 +01007385 } else {
7386 av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx);
7387 *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv,
7388 &mbmi_ext->ref_mvs[refs[0]][0].as_mv,
7389 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
7390 }
Fergus Simpson45509632017-02-22 15:30:50 -08007391 }
Fergus Simpson45509632017-02-22 15:30:50 -08007392 } else {
Fergus Simpson45509632017-02-22 15:30:50 -08007393 if (is_comp_interintra_pred) {
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007394 x->best_mv = args->single_newmv[refs[0]];
7395 *rate_mv = args->single_newmv_rate[refs[0]];
Fergus Simpson45509632017-02-22 15:30:50 -08007396 } else {
Zoe Liu7f24e1b2017-03-17 17:42:05 -07007397 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv);
7398 args->single_newmv[refs[0]] = x->best_mv;
7399 args->single_newmv_rate[refs[0]] = *rate_mv;
Fergus Simpson45509632017-02-22 15:30:50 -08007400 }
Fergus Simpson45509632017-02-22 15:30:50 -08007401
7402 if (x->best_mv.as_int == INVALID_MV) return INT64_MAX;
7403
7404 frame_mv[refs[0]] = x->best_mv;
7405 xd->mi[0]->bmi[0].as_mv[0] = x->best_mv;
7406
7407 // Estimate the rate implications of a new mv but discount this
7408 // under certain circumstances where we want to help initiate a weak
7409 // motion field, where the distortion gain for a single block may not
7410 // be enough to overcome the cost of a new mv.
7411 if (discount_newmv_test(cpi, this_mode, x->best_mv, mode_mv, refs[0])) {
7412 *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1);
7413 }
7414 }
7415
7416 return 0;
7417}
7418
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007419int64_t interpolation_filter_search(
7420 MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize,
7421 int mi_row, int mi_col, const BUFFER_SET *const tmp_dst,
7422 BUFFER_SET *const orig_dst,
7423 InterpFilter (*const single_filter)[TOTAL_REFS_PER_FRAME],
7424 int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb,
7425 int64_t *const skip_sse_sb) {
7426 const AV1_COMMON *cm = &cpi->common;
7427 MACROBLOCKD *const xd = &x->e_mbd;
7428 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
7429 int i;
7430 int tmp_rate;
7431 int64_t tmp_dist;
7432
7433 (void)single_filter;
7434
7435 InterpFilter assign_filter = SWITCHABLE;
7436
7437 if (cm->interp_filter == SWITCHABLE) {
7438#if !CONFIG_DUAL_FILTER
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07007439 assign_filter = av1_is_interp_needed(xd)
7440 ? predict_interp_filter(cpi, x, bsize, mi_row, mi_col,
7441 single_filter)
7442 : cm->interp_filter;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007443#endif // !CONFIG_DUAL_FILTER
7444 } else {
7445 assign_filter = cm->interp_filter;
7446 }
7447
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07007448 set_default_interp_filters(mbmi, assign_filter);
7449
Yue Chenb23d00a2017-07-28 17:01:21 -07007450 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
Jingning Hanc44009c2017-05-06 11:36:49 -07007451 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007452 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist,
7453 skip_txfm_sb, skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07007454 *rd = RDCOST(x->rdmult, *switchable_rate + tmp_rate, tmp_dist);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007455
7456 if (assign_filter == SWITCHABLE) {
7457 // do interp_filter search
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07007458 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd)) {
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007459#if CONFIG_DUAL_FILTER
7460 const int filter_set_size = DUAL_FILTER_SET_SIZE;
7461#else
7462 const int filter_set_size = SWITCHABLE_FILTERS;
7463#endif // CONFIG_DUAL_FILTER
7464 int best_in_temp = 0;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007465 InterpFilters best_filters = mbmi->interp_filters;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007466 restore_dst_buf(xd, *tmp_dst);
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007467
7468#if CONFIG_DUAL_FILTER // Speed feature use_fast_interpolation_filter_search
7469 if (cpi->sf.use_fast_interpolation_filter_search) {
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007470 int tmp_skip_sb = 0;
7471 int64_t tmp_skip_sse = INT64_MAX;
7472 int tmp_rs;
7473 int64_t tmp_rd;
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007474
7475 // default to (R,R): EIGHTTAP_REGULARxEIGHTTAP_REGULAR
7476 int best_dual_mode = 0;
7477 // Find best of {R}x{R,Sm,Sh}
7478 // EIGHTTAP_REGULAR mode is calculated beforehand
7479 for (i = 1; i < SWITCHABLE_FILTERS; ++i) {
7480 tmp_skip_sb = 0;
7481 tmp_skip_sse = INT64_MAX;
7482
7483 mbmi->interp_filters =
7484 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
7485
7486 tmp_rs = av1_get_switchable_rate(cm, x, xd);
7487 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
7488 bsize);
7489 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7490 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7491 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
7492
7493 if (tmp_rd < *rd) {
7494 best_dual_mode = i;
7495
7496 *rd = tmp_rd;
7497 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
7498 best_filters = mbmi->interp_filters;
7499 *skip_txfm_sb = tmp_skip_sb;
7500 *skip_sse_sb = tmp_skip_sse;
7501 best_in_temp = !best_in_temp;
7502 if (best_in_temp) {
7503 restore_dst_buf(xd, *orig_dst);
7504 } else {
7505 restore_dst_buf(xd, *tmp_dst);
7506 }
7507 }
7508 }
7509
7510 // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes
7511 for (i = best_dual_mode + SWITCHABLE_FILTERS; i < filter_set_size;
7512 i += SWITCHABLE_FILTERS) {
7513 tmp_skip_sb = 0;
7514 tmp_skip_sse = INT64_MAX;
7515
7516 mbmi->interp_filters =
7517 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
7518
7519 tmp_rs = av1_get_switchable_rate(cm, x, xd);
7520 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
7521 bsize);
7522 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7523 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7524 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
7525
7526 if (tmp_rd < *rd) {
7527 *rd = tmp_rd;
7528 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
7529 best_filters = mbmi->interp_filters;
7530 *skip_txfm_sb = tmp_skip_sb;
7531 *skip_sse_sb = tmp_skip_sse;
7532 best_in_temp = !best_in_temp;
7533 if (best_in_temp) {
7534 restore_dst_buf(xd, *orig_dst);
7535 } else {
7536 restore_dst_buf(xd, *tmp_dst);
7537 }
7538 }
7539 }
7540 } else {
7541#endif // CONFIG_DUAL_FILTER Speed feature use_fast_interpolation_filter_search
7542 // EIGHTTAP_REGULAR mode is calculated beforehand
7543 for (i = 1; i < filter_set_size; ++i) {
7544 int tmp_skip_sb = 0;
7545 int64_t tmp_skip_sse = INT64_MAX;
7546 int tmp_rs;
7547 int64_t tmp_rd;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007548#if CONFIG_DUAL_FILTER
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007549 mbmi->interp_filters =
7550 av1_make_interp_filters(filter_sets[i][0], filter_sets[i][1]);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007551#else
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007552 mbmi->interp_filters = av1_broadcast_interp_filter((InterpFilter)i);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007553#endif // CONFIG_DUAL_FILTER
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007554 tmp_rs = av1_get_switchable_rate(cm, x, xd);
7555 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst,
7556 bsize);
7557 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7558 &tmp_dist, &tmp_skip_sb, &tmp_skip_sse);
7559 tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate, tmp_dist);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007560
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007561 if (tmp_rd < *rd) {
7562 *rd = tmp_rd;
7563 *switchable_rate = av1_get_switchable_rate(cm, x, xd);
7564 best_filters = mbmi->interp_filters;
7565 *skip_txfm_sb = tmp_skip_sb;
7566 *skip_sse_sb = tmp_skip_sse;
7567 best_in_temp = !best_in_temp;
7568 if (best_in_temp) {
7569 restore_dst_buf(xd, *orig_dst);
7570 } else {
7571 restore_dst_buf(xd, *tmp_dst);
7572 }
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007573 }
7574 }
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007575#if CONFIG_DUAL_FILTER // Speed feature use_fast_interpolation_filter_search
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007576 }
Michelle Findlay-Olynyka3eb9122017-11-27 10:46:45 -08007577#endif // CONFIG_DUAL_FILTER Speed feature use_fast_interpolation_filter_search
7578
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007579 if (best_in_temp) {
7580 restore_dst_buf(xd, *tmp_dst);
7581 } else {
7582 restore_dst_buf(xd, *orig_dst);
7583 }
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007584 mbmi->interp_filters = best_filters;
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007585 } else {
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007586 assert(mbmi->interp_filters ==
7587 av1_broadcast_interp_filter(EIGHTTAP_REGULAR));
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08007588 }
7589 }
7590
7591 return 0;
7592}
7593
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007594#if CONFIG_DUAL_FILTER
7595static InterpFilters condition_interp_filters_on_mv(
7596 InterpFilters interp_filters, const MACROBLOCKD *xd) {
7597 InterpFilter filters[2];
7598 for (int i = 0; i < 2; ++i)
7599 filters[i] = (has_subpel_mv_component(xd->mi[0], xd, i))
7600 ? av1_extract_interp_filter(interp_filters, i)
7601 : EIGHTTAP_REGULAR;
7602
7603 return av1_make_interp_filters(filters[0], filters[1]);
7604}
7605#endif
7606
Yunqing Wang876a8b02017-11-13 17:13:27 -08007607#if CONFIG_EXT_WARPED_MOTION
7608static int handle_zero_mv(const AV1_COMMON *const cm, MACROBLOCK *const x,
7609 BLOCK_SIZE bsize, int mi_col, int mi_row) {
7610 MACROBLOCKD *xd = &x->e_mbd;
7611 MODE_INFO *mi = xd->mi[0];
7612 MB_MODE_INFO *mbmi = &mi->mbmi;
7613 int skip = 0;
7614
7615 // Handle the special case of 0 MV.
7616 if (mbmi->ref_frame[0] > INTRA_FRAME && mbmi->ref_frame[1] <= INTRA_FRAME) {
7617 int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
7618 int16_t mode_ctx = x->mbmi_ext->mode_context[ref_frame_type];
7619 if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
7620 int_mv zeromv;
7621 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[0];
7622 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ref],
7623 cm->allow_high_precision_mv, bsize,
7624 mi_col, mi_row, 0
7625#if CONFIG_AMVR
7626 ,
RogerZhoud904a352017-11-22 15:01:30 -08007627 cm->cur_frame_force_integer_mv
Yunqing Wang876a8b02017-11-13 17:13:27 -08007628#endif
7629 )
7630 .as_int;
7631 if (mbmi->mv[0].as_int == zeromv.as_int && mbmi->mode != GLOBALMV) {
7632 skip = 1;
7633 }
7634 }
7635 }
7636 return skip;
7637}
7638#endif // CONFIG_EXT_WARPED_MOTION
7639
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007640// TODO(afergs): Refactor the MBMI references in here - there's four
7641// TODO(afergs): Refactor optional args - add them to a struct or remove
7642static int64_t motion_mode_rd(
7643 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
7644 RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
7645 int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
7646 int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd,
7647 const int *refs, int rate_mv,
Wei-Ting Lin85a8f702017-06-22 13:55:15 -07007648 // only used when WARPED_MOTION is on?
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02007649 int_mv *const single_newmv, int rate2_bmc_nocoeff,
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01007650 MB_MODE_INFO *best_bmc_mbmi, int rate_mv_bmc, int rs, int *skip_txfm_sb,
7651 int64_t *skip_sse_sb, BUFFER_SET *orig_dst) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007652 const AV1_COMMON *const cm = &cpi->common;
7653 MACROBLOCKD *xd = &x->e_mbd;
7654 MODE_INFO *mi = xd->mi[0];
7655 MB_MODE_INFO *mbmi = &mi->mbmi;
7656 const int is_comp_pred = has_second_ref(mbmi);
7657 const PREDICTION_MODE this_mode = mbmi->mode;
7658
7659 (void)mode_mv;
7660 (void)mi_row;
7661 (void)mi_col;
7662 (void)args;
7663 (void)refs;
7664 (void)rate_mv;
7665 (void)is_comp_pred;
7666 (void)this_mode;
7667
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007668 MOTION_MODE motion_mode, last_motion_mode_allowed;
7669 int rate2_nocoeff = 0, best_xskip, best_disable_skip = 0;
7670 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
7671 MB_MODE_INFO base_mbmi, best_mbmi;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007672 uint8_t best_blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007673
Yunqing Wang97d6a372017-10-09 14:15:15 -07007674#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -07007675 int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE];
Yunqing Wang876a8b02017-11-13 17:13:27 -08007676 int pts_mv0[SAMPLES_ARRAY_SIZE], pts_wm[SAMPLES_ARRAY_SIZE];
Yunqing Wang1bc82862017-06-28 15:49:48 -07007677 int total_samples;
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007678 int best_cand = -1;
Yunqing Wang1bc82862017-06-28 15:49:48 -07007679#else
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007680 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
Yunqing Wang97d6a372017-10-09 14:15:15 -07007681#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007682
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007683 av1_invalid_rd_stats(&best_rd_stats);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007684
7685 if (cm->interp_filter == SWITCHABLE) rd_stats->rate += rs;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007686 aom_clear_system_state();
Yunqing Wang97d6a372017-10-09 14:15:15 -07007687#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -07007688 mbmi->num_proj_ref[0] =
Yunqing Wang876a8b02017-11-13 17:13:27 -08007689 findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0, pts_mv0, pts_wm);
Yunqing Wang1bc82862017-06-28 15:49:48 -07007690 total_samples = mbmi->num_proj_ref[0];
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007691
7692 // Find a warped neighbor.
7693 int cand;
7694 int best_weight = 0;
7695
7696 // if (this_mode == NEARESTMV)
7697 for (cand = 0; cand < mbmi->num_proj_ref[0]; cand++) {
7698 if (pts_wm[cand * 2 + 1] > best_weight) {
7699 best_weight = pts_wm[cand * 2 + 1];
7700 best_cand = cand;
7701 }
7702 }
7703 mbmi->wm_ctx = best_cand;
7704 best_bmc_mbmi->wm_ctx = mbmi->wm_ctx;
Yunqing Wang1bc82862017-06-28 15:49:48 -07007705#else
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007706 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
Yunqing Wang97d6a372017-10-09 14:15:15 -07007707#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007708 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007709 rate2_nocoeff = rd_stats->rate;
Sebastien Alaiwan1f56b8e2017-10-31 17:37:16 +01007710 last_motion_mode_allowed = motion_mode_allowed(0, xd->global_motion, xd, mi);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007711 base_mbmi = *mbmi;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007712
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007713 int64_t best_rd = INT64_MAX;
7714 for (motion_mode = SIMPLE_TRANSLATION;
7715 motion_mode <= last_motion_mode_allowed; motion_mode++) {
7716 int64_t tmp_rd = INT64_MAX;
7717 int tmp_rate;
7718 int64_t tmp_dist;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007719 int tmp_rate2 =
7720 motion_mode != SIMPLE_TRANSLATION ? rate2_bmc_nocoeff : rate2_nocoeff;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007721
7722 *mbmi = base_mbmi;
7723 mbmi->motion_mode = motion_mode;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007724 if (mbmi->motion_mode == OBMC_CAUSAL) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007725 *mbmi = *best_bmc_mbmi;
7726 mbmi->motion_mode = OBMC_CAUSAL;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01007727 if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007728 int tmp_rate_mv = 0;
7729
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02007730 single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, &tmp_rate_mv);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007731 mbmi->mv[0].as_int = x->best_mv.as_int;
7732 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7733 refs[0])) {
7734 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
7735 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007736 tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007737#if CONFIG_DUAL_FILTER
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007738 mbmi->interp_filters =
7739 condition_interp_filters_on_mv(mbmi->interp_filters, xd);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007740#endif // CONFIG_DUAL_FILTER
Jingning Hanc44009c2017-05-06 11:36:49 -07007741 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007742 } else {
Jingning Hanc44009c2017-05-06 11:36:49 -07007743 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007744 }
7745 av1_build_obmc_inter_prediction(
7746 cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride,
7747 args->left_pred_buf, args->left_pred_stride);
7748 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7749 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7750 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007751
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007752 if (mbmi->motion_mode == WARPED_CAUSAL) {
Yunqing Wang97d6a372017-10-09 14:15:15 -07007753#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang1bc82862017-06-28 15:49:48 -07007754 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
Yunqing Wang97d6a372017-10-09 14:15:15 -07007755#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007756 *mbmi = *best_bmc_mbmi;
7757 mbmi->motion_mode = WARPED_CAUSAL;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007758 mbmi->wm_params[0].wmtype = DEFAULT_WMTYPE;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007759 mbmi->interp_filters = av1_broadcast_interp_filter(
7760 av1_unswitchable_filter(cm->interp_filter));
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007761
Yunqing Wang97d6a372017-10-09 14:15:15 -07007762#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007763 if (this_mode == NEARESTMV && best_cand != -1) {
Yunqing Wang876a8b02017-11-13 17:13:27 -08007764 MODE_INFO *best_mi = xd->mi[pts_wm[2 * best_cand]];
7765 assert(best_mi->mbmi.motion_mode == WARPED_CAUSAL);
7766 mbmi->wm_params[0] = best_mi->mbmi.wm_params[0];
7767
7768 // Handle the special case of 0 MV.
7769 if (handle_zero_mv(cm, x, bsize, mi_col, mi_row)) continue;
Yunqing Wang68f3ccd2017-05-23 14:43:54 -07007770
Jingning Hanc44009c2017-05-06 11:36:49 -07007771 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007772 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7773 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7774 } else {
Yunqing Wang876a8b02017-11-13 17:13:27 -08007775 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
7776 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
7777 // Rank the samples by motion vector difference
7778 if (mbmi->num_proj_ref[0] > 1) {
7779 mbmi->num_proj_ref[0] = sortSamples(pts_mv0, &mbmi->mv[0].as_mv, pts,
7780 pts_inref, mbmi->num_proj_ref[0]);
7781 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
7782 }
7783#endif // CONFIG_EXT_WARPED_MOTION
7784
7785 if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize,
7786 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
7787 &mbmi->wm_params[0], mi_row, mi_col)) {
7788 // Refine MV for NEWMV mode
7789 if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) {
7790 int tmp_rate_mv = 0;
7791 const int_mv mv0 = mbmi->mv[0];
Luc Trudeauf3bf8b12017-12-08 14:38:41 -05007792 const WarpedMotionParams wm_params0 = mbmi->wm_params[0];
Yunqing Wang876a8b02017-11-13 17:13:27 -08007793#if CONFIG_EXT_WARPED_MOTION
7794 int num_proj_ref0 = mbmi->num_proj_ref[0];
7795
7796 // Refine MV in a small range.
7797 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0,
7798 pts_inref0, pts_mv0, total_samples);
7799#else
7800 // Refine MV in a small range.
7801 av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref);
7802#endif // CONFIG_EXT_WARPED_MOTION
7803
7804 // Keep the refined MV and WM parameters.
7805 if (mv0.as_int != mbmi->mv[0].as_int) {
7806 const int ref = refs[0];
7807 const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
7808
7809 tmp_rate_mv =
7810 av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv, x->nmvjointcost,
7811 x->mvcost, MV_COST_WEIGHT);
7812
7813 if (cpi->sf.adaptive_motion_search)
7814 x->pred_mv[ref] = mbmi->mv[0].as_mv;
7815
7816 single_newmv[ref] = mbmi->mv[0];
7817
7818 if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv,
7819 refs[0])) {
7820 tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
7821 }
7822#if CONFIG_EXT_WARPED_MOTION
7823 best_bmc_mbmi->num_proj_ref[0] = mbmi->num_proj_ref[0];
7824#endif // CONFIG_EXT_WARPED_MOTION
7825 tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv;
7826#if CONFIG_DUAL_FILTER
7827 mbmi->interp_filters =
7828 condition_interp_filters_on_mv(mbmi->interp_filters, xd);
7829#endif // CONFIG_DUAL_FILTER
7830 } else {
7831 // Restore the old MV and WM parameters.
7832 mbmi->mv[0] = mv0;
7833 mbmi->wm_params[0] = wm_params0;
7834#if CONFIG_EXT_WARPED_MOTION
7835 mbmi->num_proj_ref[0] = num_proj_ref0;
7836#endif // CONFIG_EXT_WARPED_MOTION
7837 }
7838 }
7839
7840#if CONFIG_EXT_WARPED_MOTION
7841 // Handle the special case of 0 MV.
7842 if (handle_zero_mv(cm, x, bsize, mi_col, mi_row)) continue;
7843#endif // CONFIG_EXT_WARPED_MOTION
7844
7845 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
7846 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
7847 &tmp_dist, skip_txfm_sb, skip_sse_sb);
7848 } else {
7849 continue;
7850 }
7851#if CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007852 }
Yunqing Wang876a8b02017-11-13 17:13:27 -08007853#endif // CONFIG_EXT_WARPED_MOTION
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007854 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007855 x->skip = 0;
7856
7857 rd_stats->dist = 0;
7858 rd_stats->sse = 0;
7859 rd_stats->skip = 1;
7860 rd_stats->rate = tmp_rate2;
7861 if (last_motion_mode_allowed > SIMPLE_TRANSLATION) {
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007862 if (last_motion_mode_allowed == WARPED_CAUSAL) {
7863#if CONFIG_EXT_WARPED_MOTION
7864 int wm_ctx = 0;
7865 if (mbmi->wm_ctx != -1) {
7866 wm_ctx = 1;
7867 if (mbmi->mode == NEARESTMV) wm_ctx = 2;
7868 }
7869
7870 rd_stats->rate += x->motion_mode_cost[wm_ctx][bsize][mbmi->motion_mode];
7871#else
Yue Chenbdc8dab2017-07-26 12:05:47 -07007872 rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode];
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007873#endif // CONFIG_EXT_WARPED_MOTION
7874 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07007875 rd_stats->rate += x->motion_mode_cost1[bsize][mbmi->motion_mode];
Yunqing Wang3afbf3f2017-11-21 20:16:18 -08007876 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007877 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007878 if (mbmi->motion_mode == WARPED_CAUSAL) {
7879 rd_stats->rate -= rs;
7880 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007881 if (!*skip_txfm_sb) {
7882 int64_t rdcosty = INT64_MAX;
7883 int is_cost_valid_uv = 0;
7884
7885 // cost and distortion
7886 av1_subtract_plane(x, bsize, 0);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007887 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07007888 // Motion mode
Yue Chen25dc0702017-10-18 23:36:06 -07007889 select_tx_type_yrd(cpi, x, rd_stats_y, bsize, mi_row, mi_col,
7890 ref_best_rd);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007891 } else {
7892 int idx, idy;
7893 super_block_yrd(cpi, x, rd_stats_y, bsize, ref_best_rd);
7894 for (idy = 0; idy < xd->n8_h; ++idy)
7895 for (idx = 0; idx < xd->n8_w; ++idx)
7896 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
7897 memset(x->blk_skip[0], rd_stats_y->skip,
7898 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
7899 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007900
7901 if (rd_stats_y->rate == INT_MAX) {
7902 av1_invalid_rd_stats(rd_stats);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007903 if (mbmi->motion_mode != SIMPLE_TRANSLATION) {
7904 continue;
7905 } else {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007906 restore_dst_buf(xd, *orig_dst);
7907 return INT64_MAX;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007908 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007909 }
7910
7911 av1_merge_rd_stats(rd_stats, rd_stats_y);
7912
Urvang Joshi70006e42017-06-14 16:08:55 -07007913 rdcosty = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
7914 rdcosty = AOMMIN(rdcosty, RDCOST(x->rdmult, 0, rd_stats->sse));
Sebastien Alaiwanfb838772017-10-24 12:02:54 +02007915 /* clang-format off */
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007916 is_cost_valid_uv =
Debargha Mukherjee51666862017-10-24 14:29:13 -07007917 inter_block_uvrd(cpi, x, rd_stats_uv, bsize, ref_best_rd - rdcosty,
7918 0);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007919 if (!is_cost_valid_uv) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007920 continue;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007921 }
7922 /* clang-format on */
7923 av1_merge_rd_stats(rd_stats, rd_stats_uv);
7924#if CONFIG_RD_DEBUG
7925 // record transform block coefficient cost
7926 // TODO(angiebird): So far rd_debug tool only detects discrepancy of
7927 // coefficient cost. Therefore, it is fine to copy rd_stats into mbmi
7928 // here because we already collect the coefficient cost. Move this part to
7929 // other place when we need to compare non-coefficient cost.
7930 mbmi->rd_stats = *rd_stats;
7931#endif // CONFIG_RD_DEBUG
Zoe Liu1eed2df2017-10-16 17:13:15 -07007932 const int skip_ctx = av1_get_skip_context(xd);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007933 if (rd_stats->skip) {
7934 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
7935 rd_stats_y->rate = 0;
7936 rd_stats_uv->rate = 0;
Zoe Liu1eed2df2017-10-16 17:13:15 -07007937 rd_stats->rate += x->skip_cost[skip_ctx][1];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007938 mbmi->skip = 0;
7939 // here mbmi->skip temporarily plays a role as what this_skip2 does
7940 } else if (!xd->lossless[mbmi->segment_id] &&
Urvang Joshi70006e42017-06-14 16:08:55 -07007941 (RDCOST(x->rdmult,
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007942 rd_stats_y->rate + rd_stats_uv->rate +
Zoe Liu1eed2df2017-10-16 17:13:15 -07007943 x->skip_cost[skip_ctx][0],
7944 rd_stats->dist) >= RDCOST(x->rdmult,
7945 x->skip_cost[skip_ctx][1],
7946 rd_stats->sse))) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007947 rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate;
Zoe Liu1eed2df2017-10-16 17:13:15 -07007948 rd_stats->rate += x->skip_cost[skip_ctx][1];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007949 rd_stats->dist = rd_stats->sse;
7950 rd_stats_y->rate = 0;
7951 rd_stats_uv->rate = 0;
7952 mbmi->skip = 1;
7953 } else {
Zoe Liu1eed2df2017-10-16 17:13:15 -07007954 rd_stats->rate += x->skip_cost[skip_ctx][0];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007955 mbmi->skip = 0;
7956 }
7957 *disable_skip = 0;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007958 } else {
7959 x->skip = 1;
7960 *disable_skip = 1;
7961 mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode, 1);
7962
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01007963 // The cost of skip bit needs to be added.
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007964 mbmi->skip = 0;
Zoe Liu1eed2df2017-10-16 17:13:15 -07007965 rd_stats->rate += x->skip_cost[av1_get_skip_context(xd)][1];
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007966
7967 rd_stats->dist = *skip_sse_sb;
7968 rd_stats->sse = *skip_sse_sb;
7969 rd_stats_y->rate = 0;
7970 rd_stats_uv->rate = 0;
7971 rd_stats->skip = 1;
7972 }
7973
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07007974 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007975 if (is_nontrans_global_motion(xd)) {
7976 rd_stats->rate -= rs;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01007977 mbmi->interp_filters = av1_broadcast_interp_filter(
7978 av1_unswitchable_filter(cm->interp_filter));
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007979 }
7980 }
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007981
Urvang Joshi70006e42017-06-14 16:08:55 -07007982 tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007983 if (mbmi->motion_mode == SIMPLE_TRANSLATION || (tmp_rd < best_rd)) {
7984 best_mbmi = *mbmi;
7985 best_rd = tmp_rd;
7986 best_rd_stats = *rd_stats;
7987 best_rd_stats_y = *rd_stats_y;
7988 best_rd_stats_uv = *rd_stats_uv;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007989 for (int i = 0; i < MAX_MB_PLANE; ++i)
7990 memcpy(best_blk_skip[i], x->blk_skip[i],
7991 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08007992 best_xskip = x->skip;
7993 best_disable_skip = *disable_skip;
7994 }
7995 }
7996
7997 if (best_rd == INT64_MAX) {
7998 av1_invalid_rd_stats(rd_stats);
7999 restore_dst_buf(xd, *orig_dst);
8000 return INT64_MAX;
8001 }
8002 *mbmi = best_mbmi;
8003 *rd_stats = best_rd_stats;
8004 *rd_stats_y = best_rd_stats_y;
8005 *rd_stats_uv = best_rd_stats_uv;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008006 for (int i = 0; i < MAX_MB_PLANE; ++i)
8007 memcpy(x->blk_skip[i], best_blk_skip[i],
8008 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008009 x->skip = best_xskip;
8010 *disable_skip = best_disable_skip;
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008011
8012 restore_dst_buf(xd, *orig_dst);
8013 return 0;
8014}
8015
Zoe Liuf40a9572017-10-13 12:37:19 -07008016#if CONFIG_EXT_SKIP
8017static int64_t skip_mode_rd(const AV1_COMP *const cpi, MACROBLOCK *const x,
8018 BLOCK_SIZE bsize, int mi_row, int mi_col,
8019 BUFFER_SET *const orig_dst) {
8020 const AV1_COMMON *cm = &cpi->common;
8021 MACROBLOCKD *const xd = &x->e_mbd;
8022 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8023
Zoe Liuf40a9572017-10-13 12:37:19 -07008024 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize);
8025
8026 int64_t total_sse = 0;
Zoe Liuf40a9572017-10-13 12:37:19 -07008027 for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
8028 const struct macroblock_plane *const p = &x->plane[plane];
8029 const struct macroblockd_plane *const pd = &xd->plane[plane];
8030 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
8031 const int bw = block_size_wide[plane_bsize];
8032 const int bh = block_size_high[plane_bsize];
8033
8034 av1_subtract_plane(x, bsize, plane);
8035 int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh);
8036 sse = sse << 4;
8037 total_sse += sse;
8038 }
8039 x->skip_mode_dist = x->skip_mode_sse = total_sse;
8040 x->skip_mode_rate = 0;
8041 x->skip_mode_rdcost = RDCOST(x->rdmult, x->skip_mode_rate, x->skip_mode_dist);
8042
8043 // Save the ref frames / motion vectors
8044 x->skip_mode_ref_frame[0] = mbmi->ref_frame[0];
8045 x->skip_mode_ref_frame[1] = mbmi->ref_frame[1];
8046 x->skip_mode_mv[0].as_int = mbmi->mv[0].as_int;
8047 x->skip_mode_mv[1].as_int = mbmi->mv[1].as_int;
8048
8049 // Save the mode index
8050 x->skip_mode_index = x->skip_mode_index_candidate;
8051
8052 restore_dst_buf(xd, *orig_dst);
8053 return 0;
8054}
8055#endif // CONFIG_EXT_SKIP
8056
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008057static int64_t handle_inter_mode(
8058 const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
8059 RD_STATS *rd_stats, RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv,
8060 int *disable_skip, int_mv (*mode_mv)[TOTAL_REFS_PER_FRAME], int mi_row,
8061 int mi_col, HandleInterModeArgs *args, const int64_t ref_best_rd) {
Urvang Joshi52648442016-10-13 17:27:51 -07008062 const AV1_COMMON *cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008063 MACROBLOCKD *xd = &x->e_mbd;
Sarah Parker19234cc2017-03-10 16:43:25 -08008064 MODE_INFO *mi = xd->mi[0];
8065 MB_MODE_INFO *mbmi = &mi->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008066 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8067 const int is_comp_pred = has_second_ref(mbmi);
8068 const int this_mode = mbmi->mode;
8069 int_mv *frame_mv = mode_mv[this_mode];
8070 int i;
8071 int refs[2] = { mbmi->ref_frame[0],
8072 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
8073 int_mv cur_mv[2];
8074 int rate_mv = 0;
Angie Chiang75c22092016-10-25 12:19:16 -07008075 int pred_exists = 1;
Jingning Hanae5cfde2016-11-30 12:01:44 -08008076 const int bw = block_size_wide[bsize];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008077 int_mv single_newmv[TOTAL_REFS_PER_FRAME];
Yue Chenb23d00a2017-07-28 17:01:21 -07008078 const int *const interintra_mode_cost =
8079 x->interintra_mode_cost[size_group_lookup[bsize]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008080 const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME);
Yaowu Xuf883b422016-08-30 14:01:10 -07008081 uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008082#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008083 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
8084#else
8085 DECLARE_ALIGNED(16, uint8_t, tmp_buf_[MAX_MB_PLANE * MAX_SB_SQUARE]);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008086#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008087 uint8_t *tmp_buf;
8088
Yaowu Xuc27fc142016-08-22 16:08:15 -07008089 int rate2_bmc_nocoeff;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008090 MB_MODE_INFO best_bmc_mbmi;
Yue Chen69f18e12016-09-08 14:48:15 -07008091 int rate_mv_bmc;
Angie Chiang75c22092016-10-25 12:19:16 -07008092 int64_t rd = INT64_MAX;
David Barkerac37fa32016-12-02 12:30:21 +00008093 BUFFER_SET orig_dst, tmp_dst;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008094 int rs = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008095
8096 int skip_txfm_sb = 0;
8097 int64_t skip_sse_sb = INT64_MAX;
Yaowu Xub0d0d002016-11-22 09:26:43 -08008098 int16_t mode_ctx;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008099
Yue Chen5e606542017-05-24 17:03:17 -07008100 int compmode_interintra_cost = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008101 mbmi->use_wedge_interintra = 0;
Yue Chen5e606542017-05-24 17:03:17 -07008102 int compmode_interinter_cost = 0;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008103 mbmi->interinter_compound_type = COMPOUND_AVERAGE;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008104
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07008105 if (!cm->allow_interintra_compound && is_comp_interintra_pred)
8106 return INT64_MAX;
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07008107
Yaowu Xuc27fc142016-08-22 16:08:15 -07008108 // is_comp_interintra_pred implies !is_comp_pred
8109 assert(!is_comp_interintra_pred || (!is_comp_pred));
8110 // is_comp_interintra_pred implies is_interintra_allowed(mbmi->sb_type)
8111 assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi));
Yaowu Xuc27fc142016-08-22 16:08:15 -07008112
Yaowu Xuc27fc142016-08-22 16:08:15 -07008113 if (is_comp_pred)
8114 mode_ctx = mbmi_ext->compound_mode_context[refs[0]];
8115 else
Yaowu Xuf883b422016-08-30 14:01:10 -07008116 mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context,
8117 mbmi->ref_frame, bsize, -1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008118
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008119#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008120 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
8121 tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf_);
8122 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008123#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008124 tmp_buf = tmp_buf_;
David Barkerb8069f92016-11-18 14:49:56 +00008125 // Make sure that we didn't leave the plane destination buffers set
8126 // to tmp_buf at the end of the last iteration
8127 assert(xd->plane[0].dst.buf != tmp_buf);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008128
Yue Chen69f18e12016-09-08 14:48:15 -07008129 mbmi->num_proj_ref[0] = 0;
8130 mbmi->num_proj_ref[1] = 0;
Yue Chen69f18e12016-09-08 14:48:15 -07008131
Yaowu Xuc27fc142016-08-22 16:08:15 -07008132 if (is_comp_pred) {
8133 if (frame_mv[refs[0]].as_int == INVALID_MV ||
8134 frame_mv[refs[1]].as_int == INVALID_MV)
8135 return INT64_MAX;
8136 }
8137
Yue Chene9638cc2016-10-10 12:37:54 -07008138 mbmi->motion_mode = SIMPLE_TRANSLATION;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008139 if (have_newmv_in_inter_mode(this_mode)) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008140 const int64_t ret_val = handle_newmv(cpi, x, bsize, mode_mv, mi_row, mi_col,
8141 &rate_mv, single_newmv, args);
Fergus Simpson45509632017-02-22 15:30:50 -08008142 if (ret_val != 0)
8143 return ret_val;
8144 else
8145 rd_stats->rate += rate_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008146 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008147 for (i = 0; i < is_comp_pred + 1; ++i) {
8148 cur_mv[i] = frame_mv[refs[i]];
Zoe Liued29ea22017-04-20 16:48:15 -07008149 // Clip "next_nearest" so that it does not extend to far out of image
8150 if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008151 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008152 mbmi->mv[i].as_int = cur_mv[i].as_int;
8153 }
8154
Cheng Chen0a7f2f52017-10-10 15:16:09 -07008155#if CONFIG_JNT_COMP
Cheng Chenb09e55c2017-11-10 12:09:19 -08008156 if (is_comp_pred) {
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008157 if (mbmi->compound_idx == 0) {
Cheng Chen5a881722017-11-30 17:05:10 -08008158 int masked_compound_used = is_any_masked_compound_used(bsize);
8159 masked_compound_used = masked_compound_used && cm->allow_masked_compound;
8160
8161 if (masked_compound_used) {
8162 const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
8163 rd_stats->rate += x->comp_group_idx_cost[comp_group_idx_ctx][0];
8164 }
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008165
8166 const int comp_index_ctx = get_comp_index_context(cm, xd);
Cheng Chen5a881722017-11-30 17:05:10 -08008167 rd_stats->rate += x->comp_idx_cost[comp_index_ctx][mbmi->compound_idx];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008168 }
Cheng Chen0a7f2f52017-10-10 15:16:09 -07008169 }
8170#endif // CONFIG_JNT_COMP
8171
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02008172 if (this_mode == NEAREST_NEARESTMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008173 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
8174 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
8175 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
8176
8177 for (i = 0; i < 2; ++i) {
8178 clamp_mv2(&cur_mv[i].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008179 if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008180 mbmi->mv[i].as_int = cur_mv[i].as_int;
8181 }
8182 }
8183 }
8184
Yaowu Xuc27fc142016-08-22 16:08:15 -07008185 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008186 if (this_mode == NEAREST_NEWMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008187 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
8188
RogerZhou3b635242017-09-19 10:06:46 -07008189#if CONFIG_AMVR
8190 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
RogerZhou10a03802017-10-26 11:49:48 -07008191 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008192#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008193 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008194#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07008195 clamp_mv2(&cur_mv[0].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008196 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008197 mbmi->mv[0].as_int = cur_mv[0].as_int;
8198 }
8199
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -07008200 if (this_mode == NEW_NEARESTMV) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008201 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
8202
RogerZhou3b635242017-09-19 10:06:46 -07008203#if CONFIG_AMVR
8204 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
RogerZhou10a03802017-10-26 11:49:48 -07008205 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008206#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008207 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008208#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07008209 clamp_mv2(&cur_mv[1].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008210 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008211 mbmi->mv[1].as_int = cur_mv[1].as_int;
8212 }
8213 }
8214
8215 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
David Barker404b2e82017-03-27 13:07:47 +01008216 int ref_mv_idx = mbmi->ref_mv_idx + 1;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008217 if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) {
David Barker404b2e82017-03-27 13:07:47 +01008218 cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008219
RogerZhou3b635242017-09-19 10:06:46 -07008220#if CONFIG_AMVR
8221 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv,
RogerZhou10a03802017-10-26 11:49:48 -07008222 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008223#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008224 lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008225#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07008226 clamp_mv2(&cur_mv[0].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008227 if (mv_check_bounds(&x->mv_limits, &cur_mv[0].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008228 mbmi->mv[0].as_int = cur_mv[0].as_int;
8229 }
8230
Sebastien Alaiwan34d55662017-11-15 09:36:03 +01008231 if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) {
8232 cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008233
RogerZhou3b635242017-09-19 10:06:46 -07008234#if CONFIG_AMVR
8235 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv,
RogerZhou10a03802017-10-26 11:49:48 -07008236 cm->cur_frame_force_integer_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008237#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07008238 lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv);
RogerZhou3b635242017-09-19 10:06:46 -07008239#endif
Yaowu Xuc27fc142016-08-22 16:08:15 -07008240 clamp_mv2(&cur_mv[1].as_mv, xd);
Alex Converse0fa0f422017-04-24 12:51:14 -07008241 if (mv_check_bounds(&x->mv_limits, &cur_mv[1].as_mv)) return INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008242 mbmi->mv[1].as_int = cur_mv[1].as_int;
8243 }
8244 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008245
8246 // do first prediction into the destination buffer. Do the next
8247 // prediction into a temporary buffer. Then keep track of which one
8248 // of these currently holds the best predictor, and use the other
8249 // one for future predictions. In the end, copy from tmp_buf to
8250 // dst if necessary.
8251 for (i = 0; i < MAX_MB_PLANE; i++) {
David Barkerac37fa32016-12-02 12:30:21 +00008252 tmp_dst.plane[i] = tmp_buf + i * MAX_SB_SQUARE;
8253 tmp_dst.stride[i] = MAX_SB_SIZE;
Angie Chiang75c22092016-10-25 12:19:16 -07008254 }
8255 for (i = 0; i < MAX_MB_PLANE; i++) {
David Barkerac37fa32016-12-02 12:30:21 +00008256 orig_dst.plane[i] = xd->plane[i].dst.buf;
8257 orig_dst.stride[i] = xd->plane[i].dst.stride;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008258 }
8259
8260 // We don't include the cost of the second reference here, because there
8261 // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
8262 // words if you present them in that order, the second one is always known
8263 // if the first is known.
8264 //
8265 // Under some circumstances we discount the cost of new mv mode to encourage
8266 // initiation of a motion field.
8267 if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
8268 refs[0])) {
Yue Chenb23d00a2017-07-28 17:01:21 -07008269 rd_stats->rate += AOMMIN(
8270 cost_mv_ref(x, this_mode, mode_ctx),
8271 cost_mv_ref(x, is_comp_pred ? NEAREST_NEARESTMV : NEARESTMV, mode_ctx));
Yaowu Xuc27fc142016-08-22 16:08:15 -07008272 } else {
Yue Chenb23d00a2017-07-28 17:01:21 -07008273 rd_stats->rate += cost_mv_ref(x, this_mode, mode_ctx);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008274 }
8275
Urvang Joshi70006e42017-06-14 16:08:55 -07008276 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02008277 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV)
Yaowu Xuc27fc142016-08-22 16:08:15 -07008278 return INT64_MAX;
8279
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08008280 int64_t ret_val = interpolation_filter_search(
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08008281 x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst, args->single_filter,
8282 &rd, &rs, &skip_txfm_sb, &skip_sse_sb);
Fergus Simpsonde18e2b2017-03-01 20:12:34 -08008283 if (ret_val != 0) return ret_val;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008284
Yaowu Xuc27fc142016-08-22 16:08:15 -07008285 best_bmc_mbmi = *mbmi;
Angie Chiang76159122016-11-09 12:13:22 -08008286 rate2_bmc_nocoeff = rd_stats->rate;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008287 if (cm->interp_filter == SWITCHABLE) rate2_bmc_nocoeff += rs;
Yue Chen69f18e12016-09-08 14:48:15 -07008288 rate_mv_bmc = rate_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008289
Cheng Chen3392c432017-10-23 15:58:23 -07008290#if CONFIG_JNT_COMP
8291 if (is_comp_pred && mbmi->compound_idx)
8292#else
Yushin Cho67dda512017-07-25 14:58:22 -07008293 if (is_comp_pred)
Cheng Chen3392c432017-10-23 15:58:23 -07008294#endif // CONFIG_JNT_COMP
Yushin Cho67dda512017-07-25 14:58:22 -07008295 {
Urvang Joshi368fbc92016-10-17 16:31:34 -07008296 int rate_sum, rs2;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008297 int64_t dist_sum;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008298 int64_t best_rd_compound = INT64_MAX, best_rd_cur = INT64_MAX;
8299 INTERINTER_COMPOUND_DATA best_compound_data;
8300 int_mv best_mv[2];
8301 int best_tmp_rate_mv = rate_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008302 int tmp_skip_txfm_sb;
8303 int64_t tmp_skip_sse_sb;
Yaowu Xu5e8007f2017-06-28 12:39:18 -07008304 DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]);
8305 DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]);
Sarah Parker6fdc8532016-11-16 17:47:13 -08008306 uint8_t *preds0[1] = { pred0 };
8307 uint8_t *preds1[1] = { pred1 };
8308 int strides[1] = { bw };
Sarah Parker2e604882017-01-17 17:31:25 -08008309 int tmp_rate_mv;
Sarah Parker42d96102017-01-31 21:05:27 -08008310 int masked_compound_used = is_any_masked_compound_used(bsize);
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07008311 masked_compound_used = masked_compound_used && cm->allow_masked_compound;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008312 COMPOUND_TYPE cur_type;
Yue Chena4245512017-08-31 11:58:08 -07008313 int best_compmode_interinter_cost = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008314
Sarah Parker6fdc8532016-11-16 17:47:13 -08008315 best_mv[0].as_int = cur_mv[0].as_int;
8316 best_mv[1].as_int = cur_mv[1].as_int;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008317 memset(&best_compound_data, 0, sizeof(best_compound_data));
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008318 uint8_t tmp_mask_buf[2 * MAX_SB_SQUARE];
8319 best_compound_data.seg_mask = tmp_mask_buf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008320
Sarah Parker42d96102017-01-31 21:05:27 -08008321 if (masked_compound_used) {
Sarah Parker6fdc8532016-11-16 17:47:13 -08008322 // get inter predictors to use for masked compound modes
Yaowu Xuf883b422016-08-30 14:01:10 -07008323 av1_build_inter_predictors_for_planes_single_buf(
Yaowu Xuc27fc142016-08-22 16:08:15 -07008324 xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides);
Yaowu Xuf883b422016-08-30 14:01:10 -07008325 av1_build_inter_predictors_for_planes_single_buf(
Yaowu Xuc27fc142016-08-22 16:08:15 -07008326 xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides);
Sarah Parker6fdc8532016-11-16 17:47:13 -08008327 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008328
Sarah Parker6fdc8532016-11-16 17:47:13 -08008329 for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) {
Debargha Mukherjee9e2c7a62017-05-23 21:18:42 -07008330 if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break;
Yue Chene2518252017-06-05 12:36:46 -07008331 if (!is_interinter_compound_used(cur_type, bsize)) continue;
Sarah Parker2e604882017-01-17 17:31:25 -08008332 tmp_rate_mv = rate_mv;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008333 best_rd_cur = INT64_MAX;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008334 mbmi->interinter_compound_type = cur_type;
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008335#if CONFIG_JNT_COMP
Cheng Chen5a881722017-11-30 17:05:10 -08008336 const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
8337 int masked_type_cost = 0;
8338 if (masked_compound_used) {
8339 if (cur_type == COMPOUND_AVERAGE) {
8340 masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][0];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008341
Cheng Chen5a881722017-11-30 17:05:10 -08008342 const int comp_index_ctx = get_comp_index_context(cm, xd);
8343 masked_type_cost += x->comp_idx_cost[comp_index_ctx][1];
8344 } else {
8345 masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][1];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008346
Cheng Chen5a881722017-11-30 17:05:10 -08008347 masked_type_cost +=
8348 x->compound_type_cost[bsize][mbmi->interinter_compound_type - 1];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008349 }
Cheng Chen5a881722017-11-30 17:05:10 -08008350 } else {
8351 const int comp_index_ctx = get_comp_index_context(cm, xd);
8352 masked_type_cost += x->comp_idx_cost[comp_index_ctx][1];
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008353 }
Cheng Chen5a881722017-11-30 17:05:10 -08008354 rs2 = av1_cost_literal(get_interinter_compound_type_bits(
8355 bsize, mbmi->interinter_compound_type)) +
8356 masked_type_cost;
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008357#else
Sarah Parker680b9b12017-08-16 18:55:34 -07008358 int masked_type_cost = 0;
8359 if (masked_compound_used) {
Sarah Parker680b9b12017-08-16 18:55:34 -07008360 if (!is_interinter_compound_used(COMPOUND_WEDGE, bsize))
8361 masked_type_cost += av1_cost_literal(1);
8362 else
Sarah Parker680b9b12017-08-16 18:55:34 -07008363 masked_type_cost +=
Yue Chena4245512017-08-31 11:58:08 -07008364 x->compound_type_cost[bsize][mbmi->interinter_compound_type];
Sarah Parker680b9b12017-08-16 18:55:34 -07008365 }
Sarah Parker6fdc8532016-11-16 17:47:13 -08008366 rs2 = av1_cost_literal(get_interinter_compound_type_bits(
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008367 bsize, mbmi->interinter_compound_type)) +
Sarah Parker680b9b12017-08-16 18:55:34 -07008368 masked_type_cost;
Cheng Chen2ef24ea2017-11-29 12:22:24 -08008369#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -07008370
Sarah Parker6fdc8532016-11-16 17:47:13 -08008371 switch (cur_type) {
8372 case COMPOUND_AVERAGE:
Jingning Hanc44009c2017-05-06 11:36:49 -07008373 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
8374 bsize);
Sarah Parker6fdc8532016-11-16 17:47:13 -08008375 av1_subtract_plane(x, bsize, 0);
8376 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8377 &tmp_skip_txfm_sb, &tmp_skip_sse_sb,
8378 INT64_MAX);
8379 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07008380 best_rd_cur = RDCOST(x->rdmult, rs2 + rate_mv + rate_sum, dist_sum);
Sarah Parker2e604882017-01-17 17:31:25 -08008381 best_rd_compound = best_rd_cur;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008382 break;
8383 case COMPOUND_WEDGE:
Sarah Parker6fdc8532016-11-16 17:47:13 -08008384 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
8385 best_rd_compound / 3 < ref_best_rd) {
Sarah Parkerddcea392017-04-25 15:57:22 -07008386 best_rd_cur = build_and_cost_compound_type(
David Barkerac37fa32016-12-02 12:30:21 +00008387 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
8388 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
Sarah Parker6fdc8532016-11-16 17:47:13 -08008389 }
8390 break;
Sarah Parker569edda2016-12-14 14:57:38 -08008391 case COMPOUND_SEG:
Sarah Parker569edda2016-12-14 14:57:38 -08008392 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh &&
8393 best_rd_compound / 3 < ref_best_rd) {
Sarah Parkerddcea392017-04-25 15:57:22 -07008394 best_rd_cur = build_and_cost_compound_type(
Sarah Parker569edda2016-12-14 14:57:38 -08008395 cpi, x, cur_mv, bsize, this_mode, rs2, rate_mv, &orig_dst,
8396 &tmp_rate_mv, preds0, preds1, strides, mi_row, mi_col);
Sarah Parker569edda2016-12-14 14:57:38 -08008397 }
8398 break;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008399 default: assert(0); return 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008400 }
Sarah Parker2e604882017-01-17 17:31:25 -08008401
8402 if (best_rd_cur < best_rd_compound) {
8403 best_rd_compound = best_rd_cur;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008404 best_compound_data.wedge_index = mbmi->wedge_index;
8405 best_compound_data.wedge_sign = mbmi->wedge_sign;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008406 best_compound_data.mask_type = mbmi->mask_type;
8407 memcpy(best_compound_data.seg_mask, xd->seg_mask,
8408 2 * MAX_SB_SQUARE * sizeof(uint8_t));
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008409 best_compound_data.interinter_compound_type =
8410 mbmi->interinter_compound_type;
Yue Chena4245512017-08-31 11:58:08 -07008411 best_compmode_interinter_cost = rs2;
Sarah Parker2e604882017-01-17 17:31:25 -08008412 if (have_newmv_in_inter_mode(this_mode)) {
8413 if (use_masked_motion_search(cur_type)) {
8414 best_tmp_rate_mv = tmp_rate_mv;
8415 best_mv[0].as_int = mbmi->mv[0].as_int;
8416 best_mv[1].as_int = mbmi->mv[1].as_int;
8417 } else {
8418 best_mv[0].as_int = cur_mv[0].as_int;
8419 best_mv[1].as_int = cur_mv[1].as_int;
8420 }
8421 }
8422 }
8423 // reset to original mvs for next iteration
8424 mbmi->mv[0].as_int = cur_mv[0].as_int;
8425 mbmi->mv[1].as_int = cur_mv[1].as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008426 }
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008427 mbmi->wedge_index = best_compound_data.wedge_index;
8428 mbmi->wedge_sign = best_compound_data.wedge_sign;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008429 mbmi->mask_type = best_compound_data.mask_type;
8430 memcpy(xd->seg_mask, best_compound_data.seg_mask,
8431 2 * MAX_SB_SQUARE * sizeof(uint8_t));
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008432 mbmi->interinter_compound_type =
8433 best_compound_data.interinter_compound_type;
Sarah Parker6fdc8532016-11-16 17:47:13 -08008434 if (have_newmv_in_inter_mode(this_mode)) {
8435 mbmi->mv[0].as_int = best_mv[0].as_int;
8436 mbmi->mv[1].as_int = best_mv[1].as_int;
8437 xd->mi[0]->bmi[0].as_mv[0].as_int = mbmi->mv[0].as_int;
8438 xd->mi[0]->bmi[0].as_mv[1].as_int = mbmi->mv[1].as_int;
Sarah Parker2d0e9b72017-05-04 01:34:16 +00008439 if (use_masked_motion_search(mbmi->interinter_compound_type)) {
Sarah Parker6fdc8532016-11-16 17:47:13 -08008440 rd_stats->rate += best_tmp_rate_mv - rate_mv;
8441 rate_mv = best_tmp_rate_mv;
8442 }
8443 }
8444
8445 if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) {
David Barkerac37fa32016-12-02 12:30:21 +00008446 restore_dst_buf(xd, orig_dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008447 return INT64_MAX;
David Barkerb8069f92016-11-18 14:49:56 +00008448 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008449
8450 pred_exists = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008451
Yue Chena4245512017-08-31 11:58:08 -07008452 compmode_interinter_cost = best_compmode_interinter_cost;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008453 }
8454
8455 if (is_comp_interintra_pred) {
8456 INTERINTRA_MODE best_interintra_mode = II_DC_PRED;
8457 int64_t best_interintra_rd = INT64_MAX;
8458 int rmode, rate_sum;
8459 int64_t dist_sum;
8460 int j;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008461 int tmp_rate_mv = 0;
8462 int tmp_skip_txfm_sb;
8463 int64_t tmp_skip_sse_sb;
8464 DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_SB_SQUARE]);
8465 uint8_t *intrapred;
8466
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008467#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008468 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
8469 intrapred = CONVERT_TO_BYTEPTR(intrapred_);
8470 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008471#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008472 intrapred = intrapred_;
8473
Emil Keyder01770b32017-01-20 18:03:11 -05008474 mbmi->ref_frame[1] = NONE_FRAME;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008475 for (j = 0; j < MAX_MB_PLANE; j++) {
8476 xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE;
8477 xd->plane[j].dst.stride = bw;
8478 }
Debargha Mukherjeead8be032017-05-09 15:28:45 -07008479 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize);
David Barkerac37fa32016-12-02 12:30:21 +00008480 restore_dst_buf(xd, orig_dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008481 mbmi->ref_frame[1] = INTRA_FRAME;
8482 mbmi->use_wedge_interintra = 0;
8483
8484 for (j = 0; j < INTERINTRA_MODES; ++j) {
8485 mbmi->interintra_mode = (INTERINTRA_MODE)j;
8486 rmode = interintra_mode_cost[mbmi->interintra_mode];
David Barker761b1ac2017-09-25 11:23:03 +01008487 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, &orig_dst,
David Barkerac37fa32016-12-02 12:30:21 +00008488 intrapred, bw);
Yaowu Xuf883b422016-08-30 14:01:10 -07008489 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008490 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
8491 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07008492 rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008493 if (rd < best_interintra_rd) {
8494 best_interintra_rd = rd;
8495 best_interintra_mode = mbmi->interintra_mode;
8496 }
8497 }
8498 mbmi->interintra_mode = best_interintra_mode;
8499 rmode = interintra_mode_cost[mbmi->interintra_mode];
David Barker761b1ac2017-09-25 11:23:03 +01008500 av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, &orig_dst,
David Barkerac37fa32016-12-02 12:30:21 +00008501 intrapred, bw);
Yaowu Xuf883b422016-08-30 14:01:10 -07008502 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
8503 av1_subtract_plane(x, bsize, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008504 rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8505 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
8506 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07008507 rd = RDCOST(x->rdmult, rate_mv + rmode + rate_sum, dist_sum);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008508 best_interintra_rd = rd;
8509
8510 if (ref_best_rd < INT64_MAX && best_interintra_rd > 2 * ref_best_rd) {
David Barkerb8069f92016-11-18 14:49:56 +00008511 // Don't need to call restore_dst_buf here
Yaowu Xuc27fc142016-08-22 16:08:15 -07008512 return INT64_MAX;
8513 }
8514 if (is_interintra_wedge_used(bsize)) {
Debargha Mukherjeec5f735f2017-04-26 03:25:28 +00008515 int64_t best_interintra_rd_nowedge = INT64_MAX;
8516 int64_t best_interintra_rd_wedge = INT64_MAX;
8517 int_mv tmp_mv;
Yue Cheneaf128a2017-10-16 17:01:36 -07008518 int rwedge = x->wedge_interintra_cost[bsize][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008519 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07008520 rd = RDCOST(x->rdmult, rmode + rate_mv + rwedge + rate_sum, dist_sum);
Yue Chenf03907a2017-05-31 12:04:04 -07008521 best_interintra_rd_nowedge = best_interintra_rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008522
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008523 // Disable wedge search if source variance is small
Yaowu Xuc27fc142016-08-22 16:08:15 -07008524 if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) {
8525 mbmi->use_wedge_interintra = 1;
8526
Yaowu Xuf883b422016-08-30 14:01:10 -07008527 rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) +
Yue Cheneaf128a2017-10-16 17:01:36 -07008528 x->wedge_interintra_cost[bsize][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008529
8530 best_interintra_rd_wedge =
8531 pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_);
8532
8533 best_interintra_rd_wedge +=
Urvang Joshi70006e42017-06-14 16:08:55 -07008534 RDCOST(x->rdmult, rmode + rate_mv + rwedge, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008535 // Refine motion vector.
8536 if (have_newmv_in_inter_mode(this_mode)) {
8537 // get negative of mask
Yaowu Xuf883b422016-08-30 14:01:10 -07008538 const uint8_t *mask = av1_get_contiguous_soft_mask(
Yaowu Xuc27fc142016-08-22 16:08:15 -07008539 mbmi->interintra_wedge_index, 1, bsize);
David Barkerf19f35f2017-05-22 16:33:22 +01008540 tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int;
8541 compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row,
8542 mi_col, intrapred, mask, bw,
Timothy B. Terriberry5d24b6f2017-06-15 13:39:35 -07008543 &tmp_rate_mv, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008544 mbmi->mv[0].as_int = tmp_mv.as_int;
Debargha Mukherjeead8be032017-05-09 15:28:45 -07008545 av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst,
8546 bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008547 model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum,
8548 &tmp_skip_txfm_sb, &tmp_skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07008549 rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
8550 dist_sum);
Yue Chenf03907a2017-05-31 12:04:04 -07008551 if (rd >= best_interintra_rd_wedge) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008552 tmp_mv.as_int = cur_mv[0].as_int;
8553 tmp_rate_mv = rate_mv;
8554 }
8555 } else {
8556 tmp_mv.as_int = cur_mv[0].as_int;
8557 tmp_rate_mv = rate_mv;
Yaowu Xuf883b422016-08-30 14:01:10 -07008558 av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008559 }
8560 // Evaluate closer to true rd
Yaowu Xuf883b422016-08-30 14:01:10 -07008561 av1_subtract_plane(x, bsize, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008562 rd =
8563 estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum,
8564 &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX);
8565 if (rd != INT64_MAX)
Urvang Joshi70006e42017-06-14 16:08:55 -07008566 rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum,
8567 dist_sum);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008568 best_interintra_rd_wedge = rd;
8569 if (best_interintra_rd_wedge < best_interintra_rd_nowedge) {
8570 mbmi->use_wedge_interintra = 1;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008571 mbmi->mv[0].as_int = tmp_mv.as_int;
Angie Chiang76159122016-11-09 12:13:22 -08008572 rd_stats->rate += tmp_rate_mv - rate_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008573 rate_mv = tmp_rate_mv;
8574 } else {
8575 mbmi->use_wedge_interintra = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008576 mbmi->mv[0].as_int = cur_mv[0].as_int;
8577 }
8578 } else {
8579 mbmi->use_wedge_interintra = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008580 }
8581 }
8582
8583 pred_exists = 0;
Yue Cheneaf128a2017-10-16 17:01:36 -07008584 compmode_interintra_cost = x->interintra_cost[size_group_lookup[bsize]][1] +
8585 interintra_mode_cost[mbmi->interintra_mode];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008586 if (is_interintra_wedge_used(bsize)) {
Yue Cheneaf128a2017-10-16 17:01:36 -07008587 compmode_interintra_cost +=
8588 x->wedge_interintra_cost[bsize][mbmi->use_wedge_interintra];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008589 if (mbmi->use_wedge_interintra) {
Yue Chen5e606542017-05-24 17:03:17 -07008590 compmode_interintra_cost +=
Yaowu Xuf883b422016-08-30 14:01:10 -07008591 av1_cost_literal(get_interintra_wedge_bits(bsize));
Yaowu Xuc27fc142016-08-22 16:08:15 -07008592 }
8593 }
8594 } else if (is_interintra_allowed(mbmi)) {
Yue Cheneaf128a2017-10-16 17:01:36 -07008595 compmode_interintra_cost = x->interintra_cost[size_group_lookup[bsize]][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07008596 }
8597
Angie Chiang75c22092016-10-25 12:19:16 -07008598 if (pred_exists == 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008599 int tmp_rate;
8600 int64_t tmp_dist;
Jingning Hanc44009c2017-05-06 11:36:49 -07008601 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008602 model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate,
8603 &tmp_dist, &skip_txfm_sb, &skip_sse_sb);
Urvang Joshi70006e42017-06-14 16:08:55 -07008604 rd = RDCOST(x->rdmult, rs + tmp_rate, tmp_dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008605 }
8606
Fergus Simpson3424c2d2017-03-09 11:48:15 -08008607 if (!is_comp_pred)
Rupert Swarbrick27e90292017-09-28 17:46:50 +01008608 args->single_filter[this_mode][refs[0]] =
8609 av1_extract_interp_filter(mbmi->interp_filters, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008610
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08008611 if (args->modelled_rd != NULL) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008612 if (is_comp_pred) {
8613 const int mode0 = compound_ref0_mode(this_mode);
8614 const int mode1 = compound_ref1_mode(this_mode);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08008615 const int64_t mrd = AOMMIN(args->modelled_rd[mode0][refs[0]],
8616 args->modelled_rd[mode1][refs[1]]);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008617 if (rd / 4 * 3 > mrd && ref_best_rd < INT64_MAX) {
David Barkerac37fa32016-12-02 12:30:21 +00008618 restore_dst_buf(xd, orig_dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008619 return INT64_MAX;
8620 }
8621 } else if (!is_comp_interintra_pred) {
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08008622 args->modelled_rd[this_mode][refs[0]] = rd;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008623 }
8624 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07008625
8626 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
8627 // if current pred_error modeled rd is substantially more than the best
8628 // so far, do not bother doing full rd
8629 if (rd / 2 > ref_best_rd) {
David Barkerac37fa32016-12-02 12:30:21 +00008630 restore_dst_buf(xd, orig_dst);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008631 return INT64_MAX;
8632 }
8633 }
8634
Yue Chen5e606542017-05-24 17:03:17 -07008635 rd_stats->rate += compmode_interintra_cost;
Yue Chen5e606542017-05-24 17:03:17 -07008636 rate2_bmc_nocoeff += compmode_interintra_cost;
Yue Chen5e606542017-05-24 17:03:17 -07008637 rd_stats->rate += compmode_interinter_cost;
Yue Chen5e606542017-05-24 17:03:17 -07008638
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01008639 ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
8640 disable_skip, mode_mv, mi_row, mi_col, args,
8641 ref_best_rd, refs, rate_mv, single_newmv,
8642 rate2_bmc_nocoeff, &best_bmc_mbmi, rate_mv_bmc, rs,
8643 &skip_txfm_sb, &skip_sse_sb, &orig_dst);
Fergus Simpson10fb9fb2017-03-09 16:48:02 -08008644 if (ret_val != 0) return ret_val;
Angie Chiang76159122016-11-09 12:13:22 -08008645
Yaowu Xuc27fc142016-08-22 16:08:15 -07008646 return 0; // The rate-distortion cost will be re-calculated by caller.
8647}
8648
Alex Converse28744302017-04-13 14:46:22 -07008649#if CONFIG_INTRABC
Alex Converse28744302017-04-13 14:46:22 -07008650static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
Angie Chiang2a2a7dd2017-04-25 16:08:47 -07008651 RD_STATS *rd_cost, BLOCK_SIZE bsize,
Alex Converse28744302017-04-13 14:46:22 -07008652 int64_t best_rd) {
Alex Converse3d8adf32017-04-24 12:35:42 -07008653 const AV1_COMMON *const cm = &cpi->common;
RogerZhouca865462017-10-05 15:06:27 -07008654 if (!av1_allow_intrabc(bsize, cm)) return INT64_MAX;
Alex Converse3d8adf32017-04-24 12:35:42 -07008655
Alex Converse28744302017-04-13 14:46:22 -07008656 MACROBLOCKD *const xd = &x->e_mbd;
Alex Converse3d8adf32017-04-24 12:35:42 -07008657 const TileInfo *tile = &xd->tile;
8658 MODE_INFO *const mi = xd->mi[0];
8659 const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE);
8660 const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE);
8661 const int w = block_size_wide[bsize];
8662 const int h = block_size_high[bsize];
Hui Su8de99a62017-12-01 12:04:56 -08008663 const int sb_row = mi_row >> cm->mib_size_log2;
8664 const int sb_col = mi_col >> cm->mib_size_log2;
Alex Converse3d8adf32017-04-24 12:35:42 -07008665
Alex Converse44c2bad2017-05-11 09:36:10 -07008666 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
8667 MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
8668 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
8669 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
8670 mbmi_ext->ref_mv_stack[ref_frame],
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02008671 mbmi_ext->compound_mode_context, candidates, mi_row, mi_col,
8672 NULL, NULL, mbmi_ext->mode_context);
Alex Converse44c2bad2017-05-11 09:36:10 -07008673
8674 int_mv nearestmv, nearmv;
RogerZhou10a03802017-10-26 11:49:48 -07008675#if CONFIG_AMVR
8676 av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv, 0);
8677#else
Alex Converse44c2bad2017-05-11 09:36:10 -07008678 av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv);
RogerZhou10a03802017-10-26 11:49:48 -07008679#endif
Alex Converse44c2bad2017-05-11 09:36:10 -07008680
8681 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
8682 if (dv_ref.as_int == 0) av1_find_ref_dv(&dv_ref, mi_row, mi_col);
Hui Su1e6bf6b2017-11-02 13:00:29 -07008683 // Ref DV should not have sub-pel.
8684 assert((dv_ref.as_mv.col & 7) == 0);
8685 assert((dv_ref.as_mv.row & 7) == 0);
Alex Converse44c2bad2017-05-11 09:36:10 -07008686 mbmi_ext->ref_mvs[INTRA_FRAME][0] = dv_ref;
Alex Converse3d8adf32017-04-24 12:35:42 -07008687
Alex Converse3d8adf32017-04-24 12:35:42 -07008688 struct buf_2d yv12_mb[MAX_MB_PLANE];
8689 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL);
8690 for (int i = 0; i < MAX_MB_PLANE; ++i) {
8691 xd->plane[i].pre[0] = yv12_mb[i];
8692 }
8693
Alex Converse861d7072017-05-15 14:19:53 -07008694 enum IntrabcMotionDirection {
8695 IBC_MOTION_ABOVE,
8696 IBC_MOTION_LEFT,
8697 IBC_MOTION_DIRECTIONS
8698 };
Alex Converse3d8adf32017-04-24 12:35:42 -07008699
Alex Converse3d8adf32017-04-24 12:35:42 -07008700 MB_MODE_INFO *mbmi = &mi->mbmi;
8701 MB_MODE_INFO best_mbmi = *mbmi;
8702 RD_STATS best_rdcost = *rd_cost;
8703 int best_skip = x->skip;
Alex Converse861d7072017-05-15 14:19:53 -07008704
8705 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
8706 dir < IBC_MOTION_DIRECTIONS; ++dir) {
8707 const MvLimits tmp_mv_limits = x->mv_limits;
8708 switch (dir) {
8709 case IBC_MOTION_ABOVE:
8710 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
8711 x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w;
8712 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
Hui Su8de99a62017-12-01 12:04:56 -08008713 x->mv_limits.row_max = (sb_row * cm->mib_size - mi_row) * MI_SIZE - h;
Alex Converse861d7072017-05-15 14:19:53 -07008714 break;
8715 case IBC_MOTION_LEFT:
8716 x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE;
Hui Su8de99a62017-12-01 12:04:56 -08008717 x->mv_limits.col_max = (sb_col * cm->mib_size - mi_col) * MI_SIZE - w;
Alex Converse861d7072017-05-15 14:19:53 -07008718 // TODO(aconverse@google.com): Minimize the overlap between above and
8719 // left areas.
8720 x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE;
8721 int bottom_coded_mi_edge =
Hui Su8de99a62017-12-01 12:04:56 -08008722 AOMMIN((sb_row + 1) * cm->mib_size, tile->mi_row_end);
Alex Converse861d7072017-05-15 14:19:53 -07008723 x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
8724 break;
8725 default: assert(0);
8726 }
8727 assert(x->mv_limits.col_min >= tmp_mv_limits.col_min);
8728 assert(x->mv_limits.col_max <= tmp_mv_limits.col_max);
8729 assert(x->mv_limits.row_min >= tmp_mv_limits.row_min);
8730 assert(x->mv_limits.row_max <= tmp_mv_limits.row_max);
8731 av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv);
8732
8733 if (x->mv_limits.col_max < x->mv_limits.col_min ||
8734 x->mv_limits.row_max < x->mv_limits.row_min) {
8735 x->mv_limits = tmp_mv_limits;
8736 continue;
8737 }
8738
8739 int step_param = cpi->mv_step_param;
8740 MV mvp_full = dv_ref.as_mv;
8741 mvp_full.col >>= 3;
8742 mvp_full.row >>= 3;
8743 int sadpb = x->sadperbit16;
8744 int cost_list[5];
RogerZhoucc5d35d2017-08-07 22:20:15 -07008745#if CONFIG_HASH_ME
RogerZhoud15e7c12017-09-26 08:49:28 -07008746 int bestsme = av1_full_pixel_search(
8747 cpi, x, bsize, &mvp_full, step_param, sadpb,
8748 cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1,
8749 (MI_SIZE * mi_col), (MI_SIZE * mi_row), 1);
RogerZhoucc5d35d2017-08-07 22:20:15 -07008750#else
Alex Converse861d7072017-05-15 14:19:53 -07008751 int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
8752 sadpb, cond_cost_list(cpi, cost_list),
8753 &dv_ref.as_mv, INT_MAX, 1);
RogerZhoucc5d35d2017-08-07 22:20:15 -07008754#endif
Alex Converse861d7072017-05-15 14:19:53 -07008755
8756 x->mv_limits = tmp_mv_limits;
8757 if (bestsme == INT_MAX) continue;
8758 mvp_full = x->best_mv.as_mv;
8759 MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 };
8760 if (mv_check_bounds(&x->mv_limits, &dv)) continue;
Hui Su64463e72017-11-06 12:36:00 -08008761 if (!av1_is_dv_valid(dv, tile, mi_row, mi_col, bsize, cm->mib_size_log2))
8762 continue;
Alex Converse861d7072017-05-15 14:19:53 -07008763
Hui Su1e6bf6b2017-11-02 13:00:29 -07008764 // DV should not have sub-pel.
8765 assert((dv.col & 7) == 0);
8766 assert((dv.row & 7) == 0);
Alex Converse861d7072017-05-15 14:19:53 -07008767 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
Alex Converse861d7072017-05-15 14:19:53 -07008768 mbmi->use_intrabc = 1;
8769 mbmi->mode = DC_PRED;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04008770 mbmi->uv_mode = UV_DC_PRED;
Alex Converse861d7072017-05-15 14:19:53 -07008771 mbmi->mv[0].as_mv = dv;
Rupert Swarbrick27e90292017-09-28 17:46:50 +01008772 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
Alex Converse861d7072017-05-15 14:19:53 -07008773 mbmi->skip = 0;
8774 x->skip = 0;
8775 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
Alex Converse3d8adf32017-04-24 12:35:42 -07008776
Hui Sudfcbfbd2017-11-13 12:05:30 -08008777 int *dvcost[2] = { (int *)&cpi->dv_cost[0][MV_MAX],
8778 (int *)&cpi->dv_cost[1][MV_MAX] };
Alex Conversed5d9b6c2017-05-23 15:23:45 -07008779 // TODO(aconverse@google.com): The full motion field defining discount
8780 // in MV_COST_WEIGHT is too large. Explore other values.
Hui Sudfcbfbd2017-11-13 12:05:30 -08008781 int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, cpi->dv_joint_cost,
8782 dvcost, MV_COST_WEIGHT_SUB);
Hui Su6c8584f2017-09-14 15:37:02 -07008783 const int rate_mode = x->intrabc_cost[1];
Alex Converse861d7072017-05-15 14:19:53 -07008784 RD_STATS rd_stats, rd_stats_uv;
8785 av1_subtract_plane(x, bsize, 0);
Hui Su12546aa2017-10-13 16:10:01 -07008786 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee51666862017-10-24 14:29:13 -07008787 // Intrabc
Yue Chen25dc0702017-10-18 23:36:06 -07008788 select_tx_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, INT64_MAX);
Hui Su12546aa2017-10-13 16:10:01 -07008789 } else {
8790 int idx, idy;
8791 super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX);
8792 for (idy = 0; idy < xd->n8_h; ++idy)
8793 for (idx = 0; idx < xd->n8_w; ++idx)
8794 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
8795 memset(x->blk_skip[0], rd_stats.skip,
8796 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
8797 }
Alex Converse861d7072017-05-15 14:19:53 -07008798 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
8799 av1_merge_rd_stats(&rd_stats, &rd_stats_uv);
Alex Converse3d8adf32017-04-24 12:35:42 -07008800#if CONFIG_RD_DEBUG
Alex Converse861d7072017-05-15 14:19:53 -07008801 mbmi->rd_stats = rd_stats;
Alex Converse3d8adf32017-04-24 12:35:42 -07008802#endif
8803
Zoe Liu1eed2df2017-10-16 17:13:15 -07008804 const int skip_ctx = av1_get_skip_context(xd);
Alex Converse3d8adf32017-04-24 12:35:42 -07008805
Alex Converse861d7072017-05-15 14:19:53 -07008806 RD_STATS rdc_noskip;
8807 av1_init_rd_stats(&rdc_noskip);
8808 rdc_noskip.rate =
Zoe Liu1eed2df2017-10-16 17:13:15 -07008809 rate_mode + rate_mv + rd_stats.rate + x->skip_cost[skip_ctx][0];
Alex Converse861d7072017-05-15 14:19:53 -07008810 rdc_noskip.dist = rd_stats.dist;
Urvang Joshi70006e42017-06-14 16:08:55 -07008811 rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist);
Alex Converse861d7072017-05-15 14:19:53 -07008812 if (rdc_noskip.rdcost < best_rd) {
8813 best_rd = rdc_noskip.rdcost;
8814 best_mbmi = *mbmi;
8815 best_skip = x->skip;
8816 best_rdcost = rdc_noskip;
8817 }
Alex Converse3d8adf32017-04-24 12:35:42 -07008818
Alex Converse861d7072017-05-15 14:19:53 -07008819 x->skip = 1;
8820 mbmi->skip = 1;
8821 RD_STATS rdc_skip;
8822 av1_init_rd_stats(&rdc_skip);
Zoe Liu1eed2df2017-10-16 17:13:15 -07008823 rdc_skip.rate = rate_mode + rate_mv + x->skip_cost[skip_ctx][1];
Alex Converse861d7072017-05-15 14:19:53 -07008824 rdc_skip.dist = rd_stats.sse;
Urvang Joshi70006e42017-06-14 16:08:55 -07008825 rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist);
Alex Converse861d7072017-05-15 14:19:53 -07008826 if (rdc_skip.rdcost < best_rd) {
8827 best_rd = rdc_skip.rdcost;
8828 best_mbmi = *mbmi;
8829 best_skip = x->skip;
8830 best_rdcost = rdc_skip;
8831 }
Alex Converse3d8adf32017-04-24 12:35:42 -07008832 }
8833 *mbmi = best_mbmi;
8834 *rd_cost = best_rdcost;
8835 x->skip = best_skip;
8836 return best_rd;
Alex Converse28744302017-04-13 14:46:22 -07008837}
8838#endif // CONFIG_INTRABC
8839
Urvang Joshi52648442016-10-13 17:27:51 -07008840void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
Angie Chiang2a2a7dd2017-04-25 16:08:47 -07008841 RD_STATS *rd_cost, BLOCK_SIZE bsize,
Urvang Joshi52648442016-10-13 17:27:51 -07008842 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
8843 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008844 MACROBLOCKD *const xd = &x->e_mbd;
Luc Trudeau14fc5042017-06-16 12:40:29 -04008845 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008846 struct macroblockd_plane *const pd = xd->plane;
8847 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
8848 int y_skip = 0, uv_skip = 0;
8849 int64_t dist_y = 0, dist_uv = 0;
8850 TX_SIZE max_uv_tx_size;
Jingning Han271bb2c2016-12-14 12:34:46 -08008851
Zoe Liu1eed2df2017-10-16 17:13:15 -07008852 (void)cm;
8853
Yaowu Xuc27fc142016-08-22 16:08:15 -07008854 ctx->skip = 0;
Luc Trudeau14fc5042017-06-16 12:40:29 -04008855 mbmi->ref_frame[0] = INTRA_FRAME;
8856 mbmi->ref_frame[1] = NONE_FRAME;
Alex Converse28744302017-04-13 14:46:22 -07008857#if CONFIG_INTRABC
Luc Trudeau14fc5042017-06-16 12:40:29 -04008858 mbmi->use_intrabc = 0;
8859 mbmi->mv[0].as_int = 0;
Alex Converse28744302017-04-13 14:46:22 -07008860#endif // CONFIG_INTRABC
Yaowu Xuc27fc142016-08-22 16:08:15 -07008861
Debargha Mukherjeeedced252017-10-20 00:02:00 -07008862 const int64_t intra_yrd = rd_pick_intra_sby_mode(
8863 cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, bsize, best_rd);
Alex Conversed1b6fad2017-04-26 15:39:37 -07008864
8865 if (intra_yrd < best_rd) {
Luc Trudeau14fc5042017-06-16 12:40:29 -04008866#if CONFIG_CFL
Luc Trudeaub05eeae2017-08-18 15:14:30 -04008867 // Only store reconstructed luma when there's chroma RDO. When there's no
8868 // chroma RDO, the reconstructed luma will be stored in encode_superblock().
Luc Trudeau1e84af52017-11-25 15:00:28 -05008869 xd->cfl.store_y = !x->skip_chroma_rd;
8870 if (xd->cfl.store_y) {
Luc Trudeaub05eeae2017-08-18 15:14:30 -04008871 // Perform one extra call to txfm_rd_in_plane(), with the values chosen
8872 // during luma RDO, so we can store reconstructed luma values
8873 RD_STATS this_rd_stats;
Luc Trudeau32306c22017-08-14 14:44:26 -04008874 txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y,
8875 mbmi->sb_type, mbmi->tx_size,
8876 cpi->sf.use_fast_coef_costing);
Luc Trudeau1e84af52017-11-25 15:00:28 -05008877 xd->cfl.store_y = 0;
Luc Trudeau32306c22017-08-14 14:44:26 -04008878 }
Luc Trudeau6e1cd782017-06-21 13:52:36 -04008879#endif // CONFIG_CFL
Luc Trudeau14fc5042017-06-16 12:40:29 -04008880 max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x]
8881 [pd[1].subsampling_y];
8882 init_sbuv_mode(mbmi);
Alex Conversed1b6fad2017-04-26 15:39:37 -07008883 if (!x->skip_chroma_rd)
8884 rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv,
8885 &uv_skip, bsize, max_uv_tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008886
Rupert Swarbrickc6cc1f52017-10-04 11:52:12 +01008887 if (y_skip && (uv_skip || x->skip_chroma_rd)) {
Alex Conversed1b6fad2017-04-26 15:39:37 -07008888 rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
Zoe Liu1eed2df2017-10-16 17:13:15 -07008889 x->skip_cost[av1_get_skip_context(xd)][1];
Alex Conversed1b6fad2017-04-26 15:39:37 -07008890 rd_cost->dist = dist_y + dist_uv;
8891 } else {
8892 rd_cost->rate =
Zoe Liu1eed2df2017-10-16 17:13:15 -07008893 rate_y + rate_uv + x->skip_cost[av1_get_skip_context(xd)][0];
Alex Conversed1b6fad2017-04-26 15:39:37 -07008894 rd_cost->dist = dist_y + dist_uv;
8895 }
Urvang Joshi70006e42017-06-14 16:08:55 -07008896 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008897 } else {
Alex Conversed1b6fad2017-04-26 15:39:37 -07008898 rd_cost->rate = INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008899 }
8900
Alex Converse28744302017-04-13 14:46:22 -07008901#if CONFIG_INTRABC
Alex Conversed1b6fad2017-04-26 15:39:37 -07008902 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
8903 best_rd = rd_cost->rdcost;
Alex Converse28744302017-04-13 14:46:22 -07008904 if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) {
8905 ctx->skip = x->skip; // FIXME where is the proper place to set this?!
Alex Conversed1b6fad2017-04-26 15:39:37 -07008906 assert(rd_cost->rate != INT_MAX);
Alex Converse28744302017-04-13 14:46:22 -07008907 }
8908#endif
Alex Conversed1b6fad2017-04-26 15:39:37 -07008909 if (rd_cost->rate == INT_MAX) return;
Alex Converse28744302017-04-13 14:46:22 -07008910
Yaowu Xuc27fc142016-08-22 16:08:15 -07008911 ctx->mic = *xd->mi[0];
8912 ctx->mbmi_ext = *x->mbmi_ext;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008913}
8914
Yaowu Xuc27fc142016-08-22 16:08:15 -07008915// Do we have an internal image edge (e.g. formatting bars).
Urvang Joshi52648442016-10-13 17:27:51 -07008916int av1_internal_image_edge(const AV1_COMP *cpi) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008917 return (cpi->oxcf.pass == 2) &&
8918 ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
8919 (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
8920}
8921
8922// Checks to see if a super block is on a horizontal image edge.
8923// In most cases this is the "real" edge unless there are formatting
8924// bars embedded in the stream.
Urvang Joshi52648442016-10-13 17:27:51 -07008925int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008926 int top_edge = 0;
8927 int bottom_edge = cpi->common.mi_rows;
8928 int is_active_h_edge = 0;
8929
8930 // For two pass account for any formatting bars detected.
8931 if (cpi->oxcf.pass == 2) {
Urvang Joshi52648442016-10-13 17:27:51 -07008932 const TWO_PASS *const twopass = &cpi->twopass;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008933
8934 // The inactive region is specified in MBs not mi units.
8935 // The image edge is in the following MB row.
8936 top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
8937
8938 bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
Yaowu Xuf883b422016-08-30 14:01:10 -07008939 bottom_edge = AOMMAX(top_edge, bottom_edge);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008940 }
8941
8942 if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
8943 ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
8944 is_active_h_edge = 1;
8945 }
8946 return is_active_h_edge;
8947}
8948
8949// Checks to see if a super block is on a vertical image edge.
8950// In most cases this is the "real" edge unless there are formatting
8951// bars embedded in the stream.
Urvang Joshi52648442016-10-13 17:27:51 -07008952int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008953 int left_edge = 0;
8954 int right_edge = cpi->common.mi_cols;
8955 int is_active_v_edge = 0;
8956
8957 // For two pass account for any formatting bars detected.
8958 if (cpi->oxcf.pass == 2) {
Urvang Joshi52648442016-10-13 17:27:51 -07008959 const TWO_PASS *const twopass = &cpi->twopass;
Yaowu Xuc27fc142016-08-22 16:08:15 -07008960
8961 // The inactive region is specified in MBs not mi units.
8962 // The image edge is in the following MB row.
8963 left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
8964
8965 right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
Yaowu Xuf883b422016-08-30 14:01:10 -07008966 right_edge = AOMMAX(left_edge, right_edge);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008967 }
8968
8969 if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
8970 ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
8971 is_active_v_edge = 1;
8972 }
8973 return is_active_v_edge;
8974}
8975
8976// Checks to see if a super block is at the edge of the active image.
8977// In most cases this is the "real" edge unless there are formatting
8978// bars embedded in the stream.
Urvang Joshi52648442016-10-13 17:27:51 -07008979int av1_active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) {
Yaowu Xuf883b422016-08-30 14:01:10 -07008980 return av1_active_h_edge(cpi, mi_row, cpi->common.mib_size) ||
8981 av1_active_v_edge(cpi, mi_col, cpi->common.mib_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008982}
8983
Urvang Joshi52648442016-10-13 17:27:51 -07008984static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07008985 MACROBLOCKD *const xd = &x->e_mbd;
8986 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
8987 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
8988 const BLOCK_SIZE bsize = mbmi->sb_type;
Urvang Joshic9e71d42017-08-09 18:58:33 -07008989 assert(bsize >= BLOCK_8X8);
Yaowu Xuc27fc142016-08-22 16:08:15 -07008990 int src_stride = x->plane[1].src.stride;
8991 const uint8_t *const src_u = x->plane[1].src.buf;
8992 const uint8_t *const src_v = x->plane[2].src.buf;
8993 float *const data = x->palette_buffer->kmeans_data_buf;
8994 float centroids[2 * PALETTE_MAX_SIZE];
8995 uint8_t *const color_map = xd->plane[1].color_index_map;
8996 int r, c;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02008997#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07008998 const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u);
8999 const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v);
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009000#endif // CONFIG_HIGHBITDEPTH
Urvang Joshi56ba91b2017-01-10 13:22:09 -08009001 int plane_block_width, plane_block_height, rows, cols;
9002 av1_get_block_dimensions(bsize, 1, xd, &plane_block_width,
9003 &plane_block_height, &rows, &cols);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009004 (void)cpi;
9005
9006 for (r = 0; r < rows; ++r) {
9007 for (c = 0; c < cols; ++c) {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009008#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009009 if (cpi->common.use_highbitdepth) {
9010 data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
9011 data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
9012 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009013#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009014 data[(r * cols + c) * 2] = src_u[r * src_stride + c];
9015 data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c];
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009016#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009017 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009018#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009019 }
9020 }
9021
9022 for (r = 1; r < 3; ++r) {
9023 for (c = 0; c < pmi->palette_size[1]; ++c) {
9024 centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c];
9025 }
9026 }
9027
Yaowu Xuf883b422016-08-30 14:01:10 -07009028 av1_calc_indices(data, centroids, color_map, rows * cols,
9029 pmi->palette_size[1], 2);
Urvang Joshi56ba91b2017-01-10 13:22:09 -08009030 extend_palette_color_map(color_map, cols, rows, plane_block_width,
9031 plane_block_height);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009032}
9033
Yaowu Xuf883b422016-08-30 14:01:10 -07009034static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
9035 const MACROBLOCKD *xd, int mi_row,
9036 int mi_col, const uint8_t *above,
9037 int above_stride, const uint8_t *left,
Yue Chene9638cc2016-10-10 12:37:54 -07009038 int left_stride);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009039
Zoe Liuf40a9572017-10-13 12:37:19 -07009040#if CONFIG_EXT_SKIP
9041static void estimate_skip_mode_rdcost(
9042 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
9043 BLOCK_SIZE bsize, int mi_row, int mi_col,
9044 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME],
9045 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]) {
9046 const AV1_COMMON *const cm = &cpi->common;
9047 MACROBLOCKD *const xd = &x->e_mbd;
9048 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
9049 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
9050
9051 int *mode_map = tile_data->mode_map[bsize];
9052 static const int flag_list[TOTAL_REFS_PER_FRAME] = { 0,
9053 AOM_LAST_FLAG,
9054 AOM_LAST2_FLAG,
9055 AOM_LAST3_FLAG,
9056 AOM_GOLD_FLAG,
9057 AOM_BWD_FLAG,
9058 AOM_ALT2_FLAG,
9059 AOM_ALT_FLAG };
9060 int i;
9061
9062 for (int midx = 0; midx < MAX_MODES; ++midx) {
9063 const int mode_index = mode_map[midx];
9064 x->skip_mode_index_candidate = mode_index;
9065
9066 const MV_REFERENCE_FRAME ref_frame =
9067 av1_mode_order[mode_index].ref_frame[0];
9068 const MV_REFERENCE_FRAME second_ref_frame =
9069 av1_mode_order[mode_index].ref_frame[1];
9070 const int comp_pred = second_ref_frame > INTRA_FRAME;
9071
9072 if (!comp_pred) continue;
9073
9074 const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode;
9075
9076 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
9077 if (comp_pred && !(cpi->ref_frame_flags & flag_list[second_ref_frame]))
9078 continue;
9079 // Check whether current refs/mode align with skip_mode
9080 if (!(ref_frame == (LAST_FRAME + cm->ref_frame_idx_0) &&
9081 second_ref_frame == (LAST_FRAME + cm->ref_frame_idx_1) &&
9082 this_mode == NEAREST_NEARESTMV)) {
9083 continue;
9084 }
9085
9086 frame_mv[this_mode][ref_frame].as_int =
9087 frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
9088 frame_mv[this_mode][second_ref_frame].as_int =
9089 frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
9090
9091 if (frame_mv[this_mode][ref_frame].as_int == INVALID_MV ||
9092 frame_mv[this_mode][second_ref_frame].as_int == INVALID_MV)
9093 break;
9094
Zoe Liuf40a9572017-10-13 12:37:19 -07009095 mbmi->mode = this_mode;
9096 mbmi->uv_mode = UV_DC_PRED;
9097 mbmi->ref_frame[0] = ref_frame;
9098 mbmi->ref_frame[1] = second_ref_frame;
9099
9100 // Obtain NEAREST_NEARESTMV.
9101 {
9102 for (i = 0; i < 2; ++i) {
9103 int_mv cur_mv = frame_mv[mbmi->mode][mbmi->ref_frame[i]];
9104 clamp_mv2(&cur_mv.as_mv, xd);
9105 if (mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
9106 x->skip_mode_rdcost = INT64_MAX;
9107 break;
9108 }
9109 mbmi->mv[i].as_int = cur_mv.as_int;
9110 }
9111 if (x->skip_mode_rdcost == INT64_MAX) break;
9112
9113 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
9114 if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) {
9115 for (i = 0; i < 2; ++i) {
9116 int_mv cur_mv =
9117 (i == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
9118 : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9119 clamp_mv2(&cur_mv.as_mv, xd);
9120 if (mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
9121 x->skip_mode_rdcost = INT64_MAX;
9122 break;
9123 }
9124 mbmi->mv[i].as_int = cur_mv.as_int;
9125 }
9126 if (x->skip_mode_rdcost == INT64_MAX) break;
9127 }
9128 }
9129
9130#if CONFIG_FILTER_INTRA
9131 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9132 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9133#endif // CONFIG_FILTER_INTRA
9134 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
Zoe Liu50c810d2017-12-05 16:09:37 -08009135#if CONFIG_JNT_COMP
9136 mbmi->comp_group_idx = 0;
9137 mbmi->compound_idx = 1;
9138#endif // CONFIG_JNT_COMP
Zoe Liuf40a9572017-10-13 12:37:19 -07009139 mbmi->interinter_compound_type = COMPOUND_AVERAGE;
9140 mbmi->motion_mode = SIMPLE_TRANSLATION;
9141 mbmi->ref_mv_idx = 0;
9142 mbmi->skip_mode = mbmi->skip = 1;
9143
Zoe Liu8a5d3432017-11-30 16:33:44 -08009144 set_default_interp_filters(mbmi, cm->interp_filter);
9145
Zoe Liuf40a9572017-10-13 12:37:19 -07009146 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
9147 for (i = 0; i < MAX_MB_PLANE; i++) {
9148 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
9149 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
9150 }
9151
9152 BUFFER_SET orig_dst;
9153 for (i = 0; i < MAX_MB_PLANE; i++) {
9154 orig_dst.plane[i] = xd->plane[i].dst.buf;
9155 orig_dst.stride[i] = xd->plane[i].dst.stride;
9156 }
9157
9158 // Obtain the rdcost for skip_mode.
9159 skip_mode_rd(cpi, x, bsize, mi_row, mi_col, &orig_dst);
9160 break;
9161 }
9162}
9163#endif // CONFIG_EXT_SKIP
9164
Urvang Joshi52648442016-10-13 17:27:51 -07009165void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
Yaowu Xuf883b422016-08-30 14:01:10 -07009166 MACROBLOCK *x, int mi_row, int mi_col,
Sebastien Alaiwan0cf54d42017-10-16 16:10:04 +02009167 RD_STATS *rd_cost, BLOCK_SIZE bsize,
9168 PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) {
Urvang Joshi52648442016-10-13 17:27:51 -07009169 const AV1_COMMON *const cm = &cpi->common;
9170 const RD_OPT *const rd_opt = &cpi->rd;
9171 const SPEED_FEATURES *const sf = &cpi->sf;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009172 MACROBLOCKD *const xd = &x->e_mbd;
9173 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
Hui Sue87fb232017-10-05 15:00:15 -07009174 const int try_palette =
9175 av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009176 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
9177 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
9178 const struct segmentation *const seg = &cm->seg;
9179 PREDICTION_MODE this_mode;
9180 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
9181 unsigned char segment_id = mbmi->segment_id;
9182 int comp_pred, i, k;
9183 int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
9184 struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009185 int_mv single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
Zoe Liu7f24e1b2017-03-17 17:42:05 -07009186 int single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
9187 int64_t modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02009188 static const int flag_list[TOTAL_REFS_PER_FRAME] = { 0,
9189 AOM_LAST_FLAG,
9190 AOM_LAST2_FLAG,
9191 AOM_LAST3_FLAG,
9192 AOM_GOLD_FLAG,
9193 AOM_BWD_FLAG,
9194 AOM_ALT2_FLAG,
9195 AOM_ALT_FLAG };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009196 int64_t best_rd = best_rd_so_far;
9197 int best_rate_y = INT_MAX, best_rate_uv = INT_MAX;
9198 int64_t best_pred_diff[REFERENCE_MODES];
9199 int64_t best_pred_rd[REFERENCE_MODES];
9200 MB_MODE_INFO best_mbmode;
Zoe Liu1eed2df2017-10-16 17:13:15 -07009201 const int skip_ctx = av1_get_skip_context(xd);
9202 int rate_skip0 = x->skip_cost[skip_ctx][0];
9203 int rate_skip1 = x->skip_cost[skip_ctx][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009204 int best_mode_skippable = 0;
9205 int midx, best_mode_index = -1;
9206 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -07009207#if CONFIG_EXT_COMP_REFS
9208 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
9209#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07009210 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -07009211#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuf883b422016-08-30 14:01:10 -07009212 aom_prob comp_mode_p;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009213 int64_t best_intra_rd = INT64_MAX;
9214 unsigned int best_pred_sse = UINT_MAX;
9215 PREDICTION_MODE best_intra_mode = DC_PRED;
Urvang Joshifeb925f2016-12-05 10:37:29 -08009216 int rate_uv_intra[TX_SIZES_ALL], rate_uv_tokenonly[TX_SIZES_ALL];
9217 int64_t dist_uvs[TX_SIZES_ALL];
9218 int skip_uvs[TX_SIZES_ALL];
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04009219 UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL];
Urvang Joshifeb925f2016-12-05 10:37:29 -08009220 PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009221#if CONFIG_EXT_INTRA
Urvang Joshifeb925f2016-12-05 10:37:29 -08009222 int8_t uv_angle_delta[TX_SIZES_ALL];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009223 int is_directional_mode, angle_stats_ready = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009224 uint8_t directional_mode_skip_mask[INTRA_MODES];
9225#endif // CONFIG_EXT_INTRA
Yaowu Xuf883b422016-08-30 14:01:10 -07009226 const int intra_cost_penalty = av1_get_intra_cost_penalty(
Yaowu Xuc27fc142016-08-22 16:08:15 -07009227 cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
Yue Chenb23d00a2017-07-28 17:01:21 -07009228 const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009229 int best_skip2 = 0;
Zoe Liu97ad0582017-02-09 10:51:00 -08009230 uint16_t ref_frame_skip_mask[2] = { 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009231 uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 };
9232 MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME;
9233 int64_t best_single_inter_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009234 int mode_skip_start = sf->mode_skip_start + 1;
9235 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
9236 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
9237 int64_t mode_threshold[MAX_MODES];
9238 int *mode_map = tile_data->mode_map[bsize];
9239 const int mode_search_skip_flags = sf->mode_search_skip_flags;
Yushin Cho77bba8d2016-11-04 16:36:56 -07009240
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009241 HandleInterModeArgs args = {
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +01009242 { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
9243 { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
9244 NULL, NULL,
9245 NULL, { { 0 } },
Fergus Simpson073c6f32017-02-17 12:13:48 -08009246 };
9247
Jingning Hanae5cfde2016-11-30 12:01:44 -08009248 const int rows = block_size_high[bsize];
9249 const int cols = block_size_wide[bsize];
Urvang Joshib100db72016-10-12 16:28:56 -07009250 int palette_ctx = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009251 const MODE_INFO *above_mi = xd->above_mi;
9252 const MODE_INFO *left_mi = xd->left_mi;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009253 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
9254 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
9255 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
9256 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
Yaowu Xuc27fc142016-08-22 16:08:15 -07009257
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009258#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009259 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
9260 int len = sizeof(uint16_t);
Jingning Hand064cf02017-06-01 10:00:39 -07009261 args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf);
9262 args.above_pred_buf[1] =
9263 CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009264 args.above_pred_buf[2] =
Jingning Hand064cf02017-06-01 10:00:39 -07009265 CONVERT_TO_BYTEPTR(x->above_pred_buf + 2 * MAX_SB_SQUARE * len);
9266 args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf);
9267 args.left_pred_buf[1] =
9268 CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009269 args.left_pred_buf[2] =
Jingning Hand064cf02017-06-01 10:00:39 -07009270 CONVERT_TO_BYTEPTR(x->left_pred_buf + 2 * MAX_SB_SQUARE * len);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009271 } else {
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009272#endif // CONFIG_HIGHBITDEPTH
Jingning Hand064cf02017-06-01 10:00:39 -07009273 args.above_pred_buf[0] = x->above_pred_buf;
9274 args.above_pred_buf[1] = x->above_pred_buf + MAX_SB_SQUARE;
9275 args.above_pred_buf[2] = x->above_pred_buf + 2 * MAX_SB_SQUARE;
9276 args.left_pred_buf[0] = x->left_pred_buf;
9277 args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE;
9278 args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009279#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009280 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009281#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009282
Yaowu Xuf883b422016-08-30 14:01:10 -07009283 av1_zero(best_mbmode);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009284
Urvang Joshib100db72016-10-12 16:28:56 -07009285 av1_zero(pmi_uv);
hui su9bc1d8d2017-03-24 12:36:03 -07009286 if (try_palette) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009287 if (above_mi)
9288 palette_ctx += (above_mi->mbmi.palette_mode_info.palette_size[0] > 0);
9289 if (left_mi)
9290 palette_ctx += (left_mi->mbmi.palette_mode_info.palette_size[0] > 0);
9291 }
9292
Yue Chen170678a2017-10-17 13:43:10 -07009293 estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
9294 ref_costs_comp, &comp_mode_p);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009295
9296 for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
Urvang Joshifeb925f2016-12-05 10:37:29 -08009297 for (i = 0; i < TX_SIZES_ALL; i++) rate_uv_intra[i] = INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009298 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
9299 for (i = 0; i < MB_MODE_COUNT; ++i) {
9300 for (k = 0; k < TOTAL_REFS_PER_FRAME; ++k) {
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009301 args.single_filter[i][k] = SWITCHABLE;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009302 }
9303 }
9304
9305 rd_cost->rate = INT_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009306
9307 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
9308 x->pred_mv_sad[ref_frame] = INT_MAX;
9309 x->mbmi_ext->mode_context[ref_frame] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009310 x->mbmi_ext->compound_mode_context[ref_frame] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009311 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
9312 assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
9313 setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
9314 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
9315 }
9316 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009317 frame_mv[GLOBALMV][ref_frame].as_int =
Sarah Parkerae7c4582017-02-28 16:30:30 -08009318 gm_get_motion_vector(&cm->global_motion[ref_frame],
Debargha Mukherjeefebb59c2017-03-02 12:23:45 -08009319 cm->allow_high_precision_mv, bsize, mi_col, mi_row,
RogerZhou3b635242017-09-19 10:06:46 -07009320 0
9321#if CONFIG_AMVR
9322 ,
RogerZhou10a03802017-10-26 11:49:48 -07009323 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07009324#endif
9325 )
David Barkercdcac6d2016-12-01 17:04:16 +00009326 .as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009327 frame_mv[NEW_NEWMV][ref_frame].as_int = INVALID_MV;
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009328 frame_mv[GLOBAL_GLOBALMV][ref_frame].as_int =
Sarah Parkerae7c4582017-02-28 16:30:30 -08009329 gm_get_motion_vector(&cm->global_motion[ref_frame],
Debargha Mukherjeefebb59c2017-03-02 12:23:45 -08009330 cm->allow_high_precision_mv, bsize, mi_col, mi_row,
RogerZhou3b635242017-09-19 10:06:46 -07009331 0
9332#if CONFIG_AMVR
9333 ,
RogerZhou10a03802017-10-26 11:49:48 -07009334 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07009335#endif
9336 )
Sarah Parkerc2d38712017-01-24 15:15:41 -08009337 .as_int;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009338 }
9339
Yaowu Xuc27fc142016-08-22 16:08:15 -07009340 for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
9341 MODE_INFO *const mi = xd->mi[0];
9342 int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
9343 x->mbmi_ext->mode_context[ref_frame] = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -07009344 av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame],
9345 mbmi_ext->ref_mv_stack[ref_frame],
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02009346 mbmi_ext->compound_mode_context, candidates, mi_row,
9347 mi_col, NULL, NULL, mbmi_ext->mode_context);
Jingning Han731af492016-11-17 11:53:23 -08009348 if (mbmi_ext->ref_mv_count[ref_frame] < 2) {
9349 MV_REFERENCE_FRAME rf[2];
9350 av1_set_ref_frame(rf, ref_frame);
David Barkercdcac6d2016-12-01 17:04:16 +00009351 if (mbmi_ext->ref_mvs[rf[0]][0].as_int !=
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009352 frame_mv[GLOBALMV][rf[0]].as_int ||
David Barkercdcac6d2016-12-01 17:04:16 +00009353 mbmi_ext->ref_mvs[rf[0]][1].as_int !=
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009354 frame_mv[GLOBALMV][rf[0]].as_int ||
David Barkercdcac6d2016-12-01 17:04:16 +00009355 mbmi_ext->ref_mvs[rf[1]][0].as_int !=
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009356 frame_mv[GLOBALMV][rf[1]].as_int ||
9357 mbmi_ext->ref_mvs[rf[1]][1].as_int !=
9358 frame_mv[GLOBALMV][rf[1]].as_int)
Jingning Han731af492016-11-17 11:53:23 -08009359 mbmi_ext->mode_context[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET);
9360 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009361 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009362
Yue Chen5329a2b2017-02-28 17:33:00 +08009363 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
Jingning Hanad586b92017-05-23 10:24:57 -07009364
Yue Chenf7ba6472017-04-19 11:08:58 -07009365 if (check_num_overlappable_neighbors(mbmi) &&
9366 is_motion_variation_allowed_bsize(bsize)) {
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009367 av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col,
9368 args.above_pred_buf, dst_width1,
9369 dst_height1, args.above_pred_stride);
Yue Chen5329a2b2017-02-28 17:33:00 +08009370 av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col,
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009371 args.left_pred_buf, dst_width2,
9372 dst_height2, args.left_pred_stride);
Jingning Han91d9a792017-04-18 12:01:52 -07009373 av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row,
9374 mi_col);
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -08009375 calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0],
9376 args.above_pred_stride[0], args.left_pred_buf[0],
9377 args.left_pred_stride[0]);
Yue Chen5329a2b2017-02-28 17:33:00 +08009378 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009379
9380 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
9381 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
Zoe Liue9b15e22017-07-19 15:53:01 -07009382 // Skip checking missing references in both single and compound reference
9383 // modes. Note that a mode will be skipped iff both reference frames
9384 // are masked out.
Zoe Liuc082bbc2017-05-17 13:31:37 -07009385 ref_frame_skip_mask[0] |= (1 << ref_frame);
9386 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009387 } else {
9388 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
9389 // Skip fixed mv modes for poor references
9390 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
9391 mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
9392 break;
9393 }
9394 }
9395 }
9396 // If the segment reference frame feature is enabled....
9397 // then do nothing if the current ref frame is not allowed..
9398 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
9399 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
9400 ref_frame_skip_mask[0] |= (1 << ref_frame);
9401 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9402 }
9403 }
9404
9405 // Disable this drop out case if the ref frame
9406 // segment level feature is enabled for this segment. This is to
9407 // prevent the possibility that we end up unable to pick any mode.
9408 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -07009409 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
Yaowu Xuc27fc142016-08-22 16:08:15 -07009410 // unless ARNR filtering is enabled in which case we want
9411 // an unfiltered alternative. We allow near/nearest as well
9412 // because they may result in zero-zero MVs but be cheaper.
9413 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
Sarah Parkere5299862016-08-16 14:57:37 -07009414 int_mv zeromv;
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02009415 ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << LAST2_FRAME) |
9416 (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) |
9417 (1 << ALTREF2_FRAME) | (1 << GOLDEN_FRAME);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009418 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
9419 // TODO(zoeliu): To further explore whether following needs to be done for
9420 // BWDREF_FRAME as well.
9421 mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
David Barkercdcac6d2016-12-01 17:04:16 +00009422 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ALTREF_FRAME],
Sarah Parkerae7c4582017-02-28 16:30:30 -08009423 cm->allow_high_precision_mv, bsize,
RogerZhou3b635242017-09-19 10:06:46 -07009424 mi_col, mi_row, 0
9425#if CONFIG_AMVR
9426 ,
RogerZhou10a03802017-10-26 11:49:48 -07009427 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -07009428#endif
9429 )
David Barkercdcac6d2016-12-01 17:04:16 +00009430 .as_int;
Sarah Parkere5299862016-08-16 14:57:37 -07009431 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009432 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
Sarah Parkere5299862016-08-16 14:57:37 -07009433 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009434 mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
Sarah Parkere5299862016-08-16 14:57:37 -07009435 if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009436 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV);
Sarah Parkere5299862016-08-16 14:57:37 -07009437 if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009438 mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009439 }
9440 }
9441
9442 if (cpi->rc.is_src_frame_alt_ref) {
9443 if (sf->alt_ref_search_fp) {
9444 assert(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
9445 mode_skip_mask[ALTREF_FRAME] = 0;
9446 ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
9447 ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
9448 }
9449 }
9450
9451 if (sf->alt_ref_search_fp)
9452 if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
9453 if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
9454 mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
9455
9456 if (sf->adaptive_mode_search) {
9457 if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
9458 cpi->rc.frames_since_golden >= 3)
Yaowu Xu36bad472017-05-16 18:29:53 -07009459 if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME])
Yaowu Xuc27fc142016-08-22 16:08:15 -07009460 mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
9461 }
9462
9463 if (bsize > sf->max_intra_bsize) {
9464 ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
9465 ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
9466 }
9467
9468 mode_skip_mask[INTRA_FRAME] |=
9469 ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
9470
9471 for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
9472 for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
9473 mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
9474
9475 midx = sf->schedule_mode_search ? mode_skip_start : 0;
9476 while (midx > 4) {
9477 uint8_t end_pos = 0;
9478 for (i = 5; i < midx; ++i) {
9479 if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
9480 uint8_t tmp = mode_map[i];
9481 mode_map[i] = mode_map[i - 1];
9482 mode_map[i - 1] = tmp;
9483 end_pos = i;
9484 }
9485 }
9486 midx = end_pos;
9487 }
9488
9489 if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
9490 x->use_default_intra_tx_type = 1;
9491 else
9492 x->use_default_intra_tx_type = 0;
9493
9494 if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
9495 x->use_default_inter_tx_type = 1;
9496 else
9497 x->use_default_inter_tx_type = 0;
Yushin Chod0b77ac2017-10-20 17:33:16 -07009498
Yaowu Xuc27fc142016-08-22 16:08:15 -07009499 for (i = 0; i < MB_MODE_COUNT; ++i)
9500 for (ref_frame = 0; ref_frame < TOTAL_REFS_PER_FRAME; ++ref_frame)
9501 modelled_rd[i][ref_frame] = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009502
Zoe Liuf40a9572017-10-13 12:37:19 -07009503#if CONFIG_EXT_SKIP
9504 x->skip_mode_rdcost = -1;
9505 x->skip_mode_index = -1;
9506#endif // CONFIG_EXT_SKIP
9507
Yaowu Xuc27fc142016-08-22 16:08:15 -07009508 for (midx = 0; midx < MAX_MODES; ++midx) {
9509 int mode_index;
9510 int mode_excluded = 0;
9511 int64_t this_rd = INT64_MAX;
9512 int disable_skip = 0;
9513 int compmode_cost = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009514 int rate2 = 0, rate_y = 0, rate_uv = 0;
9515 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
9516 int skippable = 0;
9517 int this_skip2 = 0;
9518 int64_t total_sse = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009519 uint8_t ref_frame_type;
Yushin Chod0b77ac2017-10-20 17:33:16 -07009520
Yaowu Xuc27fc142016-08-22 16:08:15 -07009521 mode_index = mode_map[midx];
Zoe Liuf40a9572017-10-13 12:37:19 -07009522#if CONFIG_EXT_SKIP
9523 x->skip_mode_index_candidate = mode_index;
9524#endif // CONFIG_EXT_SKIP
Yaowu Xuf883b422016-08-30 14:01:10 -07009525 this_mode = av1_mode_order[mode_index].mode;
9526 ref_frame = av1_mode_order[mode_index].ref_frame[0];
9527 second_ref_frame = av1_mode_order[mode_index].ref_frame[1];
Yaowu Xu4306b6e2016-09-27 12:55:32 -07009528 mbmi->ref_mv_idx = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009529
Yaowu Xuc27fc142016-08-22 16:08:15 -07009530 if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) {
9531 // Mode must by compatible
Debargha Mukherjee37f6fe62017-02-10 21:44:13 -08009532 if (!is_interintra_allowed_mode(this_mode)) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009533 if (!is_interintra_allowed_bsize(bsize)) continue;
9534 }
9535
9536 if (is_inter_compound_mode(this_mode)) {
9537 frame_mv[this_mode][ref_frame].as_int =
9538 frame_mv[compound_ref0_mode(this_mode)][ref_frame].as_int;
9539 frame_mv[this_mode][second_ref_frame].as_int =
9540 frame_mv[compound_ref1_mode(this_mode)][second_ref_frame].as_int;
9541 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009542
9543 // Look at the reference frame of the best mode so far and set the
9544 // skip mask to look at a subset of the remaining modes.
9545 if (midx == mode_skip_start && best_mode_index >= 0) {
9546 switch (best_mbmode.ref_frame[0]) {
9547 case INTRA_FRAME: break;
9548 case LAST_FRAME:
9549 ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
9550 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9551 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009552 case LAST2_FRAME:
9553 ref_frame_skip_mask[0] |= LAST2_FRAME_MODE_MASK;
9554 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9555 break;
9556 case LAST3_FRAME:
9557 ref_frame_skip_mask[0] |= LAST3_FRAME_MODE_MASK;
9558 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9559 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009560 case GOLDEN_FRAME:
9561 ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
9562 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9563 break;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009564 case BWDREF_FRAME:
9565 ref_frame_skip_mask[0] |= BWDREF_FRAME_MODE_MASK;
9566 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9567 break;
Zoe Liue9b15e22017-07-19 15:53:01 -07009568 case ALTREF2_FRAME:
9569 ref_frame_skip_mask[0] |= ALTREF2_FRAME_MODE_MASK;
9570 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
9571 break;
Sebastien Alaiwan365e6442017-10-16 11:35:00 +02009572 case ALTREF_FRAME:
9573 ref_frame_skip_mask[0] |= ALTREF_FRAME_MODE_MASK;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009574 ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009575 break;
Emil Keyder01770b32017-01-20 18:03:11 -05009576 case NONE_FRAME:
Yaowu Xuc27fc142016-08-22 16:08:15 -07009577 case TOTAL_REFS_PER_FRAME:
9578 assert(0 && "Invalid Reference frame");
9579 break;
9580 }
9581 }
9582
9583 if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
Yaowu Xuf883b422016-08-30 14:01:10 -07009584 (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame))))
Yaowu Xuc27fc142016-08-22 16:08:15 -07009585 continue;
9586
9587 if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
9588
9589 // Test best rd so far against threshold for trying this mode.
9590 if (best_mode_skippable && sf->schedule_mode_search)
9591 mode_threshold[mode_index] <<= 1;
9592
9593 if (best_rd < mode_threshold[mode_index]) continue;
9594
Yunqing Wangff4fa062017-04-21 10:56:08 -07009595 // This is only used in motion vector unit test.
9596 if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue;
9597
Zoe Liuc01dddb2017-11-07 08:44:06 -08009598#if !CONFIG_EXT_COMP_REFS // Changes LL bitstream
Arild Fuldseth (arilfuld)3f429082017-04-28 15:54:28 +02009599 if (cpi->oxcf.pass == 0) {
9600 // Complexity-compression trade-offs
9601 // if (ref_frame == ALTREF_FRAME) continue;
9602 // if (ref_frame == BWDREF_FRAME) continue;
9603 if (second_ref_frame == ALTREF_FRAME) continue;
9604 // if (second_ref_frame == BWDREF_FRAME) continue;
9605 }
Zoe Liuc01dddb2017-11-07 08:44:06 -08009606#endif // !CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -07009607 comp_pred = second_ref_frame > INTRA_FRAME;
9608 if (comp_pred) {
9609 if (!cpi->allow_comp_inter_inter) continue;
9610
9611 // Skip compound inter modes if ARF is not available.
9612 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
9613
9614 // Do not allow compound prediction if the segment level reference frame
9615 // feature is in use as in this case there can only be one reference.
9616 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
9617
9618 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
9619 best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
9620 continue;
9621
9622 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
9623 } else {
Zoe Liud4d8b862017-12-06 10:56:01 -08009624#if CONFIG_REF_ADAPT
9625 if (ref_frame != INTRA_FRAME) mode_excluded = 0;
9626#else
Yaowu Xuc27fc142016-08-22 16:08:15 -07009627 if (ref_frame != INTRA_FRAME)
9628 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
Zoe Liud4d8b862017-12-06 10:56:01 -08009629#endif // CONFIG_REF_ADAPT
Yaowu Xuc27fc142016-08-22 16:08:15 -07009630 }
9631
9632 if (ref_frame == INTRA_FRAME) {
9633 if (sf->adaptive_mode_search)
9634 if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
9635 continue;
9636
9637 if (this_mode != DC_PRED) {
9638 // Disable intra modes other than DC_PRED for blocks with low variance
9639 // Threshold for intra skipping based on source variance
9640 // TODO(debargha): Specialize the threshold for super block sizes
9641 const unsigned int skip_intra_var_thresh = 64;
9642 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
9643 x->source_variance < skip_intra_var_thresh)
9644 continue;
9645 // Only search the oblique modes if the best so far is
9646 // one of the neighboring directional modes
9647 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
Urvang Joshi96d1c0a2017-10-10 13:15:32 -07009648 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009649 if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
9650 continue;
9651 }
9652 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
9653 if (conditional_skipintra(this_mode, best_intra_mode)) continue;
9654 }
9655 }
David Barkercf3d0b02016-11-10 10:14:49 +00009656 } else if (cm->global_motion[ref_frame].wmtype == IDENTITY &&
Sarah Parkere5299862016-08-16 14:57:37 -07009657 (!comp_pred ||
David Barkercf3d0b02016-11-10 10:14:49 +00009658 cm->global_motion[second_ref_frame].wmtype == IDENTITY)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009659 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
Yue Chenb23d00a2017-07-28 17:01:21 -07009660 if (!check_best_zero_mv(cpi, x, mbmi_ext->mode_context,
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +02009661 mbmi_ext->compound_mode_context, frame_mv,
9662 this_mode, ref_frames, bsize, -1, mi_row, mi_col))
Yaowu Xuc27fc142016-08-22 16:08:15 -07009663 continue;
9664 }
9665
9666 mbmi->mode = this_mode;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -04009667 mbmi->uv_mode = UV_DC_PRED;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009668 mbmi->ref_frame[0] = ref_frame;
9669 mbmi->ref_frame[1] = second_ref_frame;
9670 pmi->palette_size[0] = 0;
9671 pmi->palette_size[1] = 0;
hui su5db97432016-10-14 16:10:14 -07009672#if CONFIG_FILTER_INTRA
9673 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9674 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9675#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009676 // Evaluate all sub-pel filters irrespective of whether we can use
9677 // them for this frame.
Debargha Mukherjee0df711f2017-05-02 16:00:20 -07009678
9679 set_default_interp_filters(mbmi, cm->interp_filter);
9680
Yaowu Xuc27fc142016-08-22 16:08:15 -07009681 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
Yue Chencb60b182016-10-13 15:18:22 -07009682 mbmi->motion_mode = SIMPLE_TRANSLATION;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009683
9684 x->skip = 0;
9685 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
9686
9687 // Select prediction reference frames.
9688 for (i = 0; i < MAX_MB_PLANE; i++) {
9689 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
9690 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
9691 }
9692
Debargha Mukherjeecb603792016-10-04 13:10:23 -07009693 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009694
Jingning Hancf842ad2017-10-25 12:02:30 -07009695#if CONFIG_FRAME_MARKER
9696 if (sf->selective_ref_frame) {
Debargha Mukherjee06b40cc2017-11-02 13:39:39 -07009697 if (sf->selective_ref_frame == 2) {
9698 if (mbmi->ref_frame[0] == ALTREF2_FRAME ||
9699 mbmi->ref_frame[1] == ALTREF2_FRAME)
9700 if (cm->cur_frame->alt2_frame_offset < cm->frame_offset) continue;
9701 if (mbmi->ref_frame[0] == BWDREF_FRAME ||
9702 mbmi->ref_frame[1] == BWDREF_FRAME)
9703 if (cm->cur_frame->bwd_frame_offset < cm->frame_offset) continue;
9704 }
Jingning Hancf842ad2017-10-25 12:02:30 -07009705 if (mbmi->ref_frame[0] == LAST3_FRAME ||
9706 mbmi->ref_frame[1] == LAST3_FRAME)
9707 if (cm->cur_frame->lst3_frame_offset <= cm->cur_frame->gld_frame_offset)
9708 continue;
9709 if (mbmi->ref_frame[0] == LAST2_FRAME ||
9710 mbmi->ref_frame[1] == LAST2_FRAME)
9711 if (cm->cur_frame->lst2_frame_offset <= cm->cur_frame->gld_frame_offset)
9712 continue;
9713 }
Zoe Liu77fb5be2017-11-02 14:36:19 -07009714
9715 // One-sided compound is used only when all reference frames are one-sided.
9716 if (sf->selective_ref_frame && comp_pred && !cpi->all_one_sided_refs) {
9717 unsigned int ref_offsets[2];
9718 for (i = 0; i < 2; ++i) {
9719 const int buf_idx = cm->frame_refs[mbmi->ref_frame[i] - LAST_FRAME].idx;
9720 assert(buf_idx >= 0);
9721 ref_offsets[i] = cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
9722 }
9723 if ((ref_offsets[0] <= cm->frame_offset &&
9724 ref_offsets[1] <= cm->frame_offset) ||
9725 (ref_offsets[0] > cm->frame_offset &&
9726 ref_offsets[1] > cm->frame_offset))
9727 continue;
9728 }
9729#endif // CONFIG_FRAME_MARKER
Jingning Hancf842ad2017-10-25 12:02:30 -07009730
Yaowu Xuc27fc142016-08-22 16:08:15 -07009731 if (ref_frame == INTRA_FRAME) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009732 RD_STATS rd_stats_y;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009733 TX_SIZE uv_tx;
9734 struct macroblockd_plane *const pd = &xd->plane[1];
9735#if CONFIG_EXT_INTRA
hui su45dc5972016-12-08 17:42:50 -08009736 is_directional_mode = av1_is_directional_mode(mbmi->mode, bsize);
Joe Young830d4ce2017-05-30 17:48:13 -07009737 if (is_directional_mode && av1_use_angle_delta(bsize)) {
hui su45dc5972016-12-08 17:42:50 -08009738 int rate_dummy;
hui su9a416f52017-01-13 11:37:53 -08009739 int64_t model_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009740 if (!angle_stats_ready) {
9741 const int src_stride = x->plane[0].src.stride;
9742 const uint8_t *src = x->plane[0].src.buf;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009743#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -07009744 if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
hui su9cc10652017-04-27 17:22:07 -07009745 highbd_angle_estimation(src, src_stride, rows, cols, bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07009746 directional_mode_skip_mask);
9747 else
Sebastien Alaiwan71e87842017-04-12 16:03:28 +02009748#endif // CONFIG_HIGHBITDEPTH
hui su9cc10652017-04-27 17:22:07 -07009749 angle_estimation(src, src_stride, rows, cols, bsize,
Yaowu Xuc27fc142016-08-22 16:08:15 -07009750 directional_mode_skip_mask);
9751 angle_stats_ready = 1;
9752 }
9753 if (directional_mode_skip_mask[mbmi->mode]) continue;
hui su45dc5972016-12-08 17:42:50 -08009754 rd_stats_y.rate = INT_MAX;
Yue Chenb0f808b2017-04-26 11:55:14 -07009755 rd_pick_intra_angle_sby(cpi, x, &rate_dummy, &rd_stats_y, bsize,
9756 intra_mode_cost[mbmi->mode], best_rd,
9757 &model_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009758 } else {
9759 mbmi->angle_delta[0] = 0;
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009760 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009761 }
9762#else
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009763 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
hui su45dc5972016-12-08 17:42:50 -08009764#endif // CONFIG_EXT_INTRA
Angie Chiang0e9a2e92016-11-08 09:45:40 -08009765 rate_y = rd_stats_y.rate;
9766 distortion_y = rd_stats_y.dist;
9767 skippable = rd_stats_y.skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009768
hui su5db97432016-10-14 16:10:14 -07009769#if CONFIG_FILTER_INTRA
Yue Chen18f6c152017-11-06 11:23:47 -08009770 if (mbmi->mode == DC_PRED && !xd->lossless[mbmi->segment_id] &&
9771 av1_filter_intra_allowed_bsize(mbmi->sb_type)) {
Yue Chen57b8ff62017-10-10 23:37:31 -07009772 RD_STATS rd_stats_y_fi;
9773 int filter_intra_selected_flag = 0;
9774 TX_SIZE best_tx_size = mbmi->tx_size;
9775 TX_TYPE best_tx_type = mbmi->tx_type;
9776 FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED;
Yue Chen95e13e22017-11-01 23:56:35 -07009777 int64_t best_rd_tmp = INT64_MAX;
9778 if (rate_y != INT_MAX &&
9779 av1_filter_intra_allowed_txsize(best_tx_size)) {
Yue Chen4eba69b2017-11-09 22:37:35 -08009780 best_rd_tmp = RDCOST(x->rdmult,
9781 rate_y + x->filter_intra_cost[mbmi->tx_size][0] +
9782 intra_mode_cost[mbmi->mode],
9783 distortion_y);
Yue Chen95e13e22017-11-01 23:56:35 -07009784 }
Yue Chen57b8ff62017-10-10 23:37:31 -07009785
9786 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
9787 for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED;
9788 fi_mode < FILTER_INTRA_MODES; ++fi_mode) {
9789 int this_rate_tmp;
9790 int64_t this_rd_tmp;
9791 mbmi->filter_intra_mode_info.filter_intra_mode[0] = fi_mode;
9792
9793 super_block_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd);
9794 if (rd_stats_y_fi.rate == INT_MAX) continue;
9795
Yue Chen4eba69b2017-11-09 22:37:35 -08009796 this_rate_tmp = rd_stats_y_fi.rate +
9797 x->filter_intra_cost[mbmi->tx_size][1] +
9798 x->filter_intra_mode_cost[0][fi_mode] +
9799 intra_mode_cost[mbmi->mode];
Yue Chen57b8ff62017-10-10 23:37:31 -07009800 this_rd_tmp = RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist);
9801
9802 if (this_rd_tmp < best_rd_tmp) {
9803 best_tx_size = mbmi->tx_size;
9804 best_tx_type = mbmi->tx_type;
9805 best_fi_mode = fi_mode;
9806 rd_stats_y = rd_stats_y_fi;
9807 rate_y = rd_stats_y_fi.rate;
9808 distortion_y = rd_stats_y_fi.dist;
9809 skippable = rd_stats_y_fi.skip;
9810 filter_intra_selected_flag = 1;
9811 best_rd_tmp = this_rd_tmp;
9812 }
9813 }
9814
9815 mbmi->tx_size = best_tx_size;
9816 mbmi->tx_type = best_tx_type;
9817 if (filter_intra_selected_flag) {
9818 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 1;
9819 mbmi->filter_intra_mode_info.filter_intra_mode[0] = best_fi_mode;
9820 } else {
9821 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9822 }
9823 }
9824#endif
9825
9826 if (rate_y == INT_MAX) continue;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009827
Debargha Mukherjee2f123402016-08-30 17:43:38 -07009828 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][pd->subsampling_x]
9829 [pd->subsampling_y];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009830 if (rate_uv_intra[uv_tx] == INT_MAX) {
Luc Trudeau9d4cbb82017-07-27 17:01:32 -04009831 choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
Urvang Joshi368fbc92016-10-17 16:31:34 -07009832 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
9833 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
hui su9bc1d8d2017-03-24 12:36:03 -07009834 if (try_palette) pmi_uv[uv_tx] = *pmi;
Urvang Joshib100db72016-10-12 16:28:56 -07009835
Yaowu Xuc27fc142016-08-22 16:08:15 -07009836#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009837 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
9838#endif // CONFIG_EXT_INTRA
9839 }
9840
9841 rate_uv = rate_uv_tokenonly[uv_tx];
Urvang Joshi368fbc92016-10-17 16:31:34 -07009842 distortion_uv = dist_uvs[uv_tx];
9843 skippable = skippable && skip_uvs[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009844 mbmi->uv_mode = mode_uv[uv_tx];
hui su9bc1d8d2017-03-24 12:36:03 -07009845 if (try_palette) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009846 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
9847 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
9848 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
9849 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
9850 }
Urvang Joshib100db72016-10-12 16:28:56 -07009851
Yaowu Xuc27fc142016-08-22 16:08:15 -07009852#if CONFIG_EXT_INTRA
9853 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009854#endif // CONFIG_EXT_INTRA
9855
Jingning Han36fe3202017-02-20 22:31:49 -08009856 rate2 = rate_y + intra_mode_cost[mbmi->mode];
9857 if (!x->skip_chroma_rd)
Yue Chenb23d00a2017-07-28 17:01:21 -07009858 rate2 += rate_uv + x->intra_uv_mode_cost[mbmi->mode][mbmi->uv_mode];
Jingning Han36fe3202017-02-20 22:31:49 -08009859
hui su9bc1d8d2017-03-24 12:36:03 -07009860 if (try_palette && mbmi->mode == DC_PRED) {
Yue Chendab2ca92017-10-16 17:48:48 -07009861 rate2 += x->palette_y_mode_cost[bsize - BLOCK_8X8][palette_ctx][0];
hui su9bc1d8d2017-03-24 12:36:03 -07009862 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009863
Rupert Swarbrickfcff0b22017-10-05 09:26:04 +01009864 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009865 // super_block_yrd above includes the cost of the tx_size in the
9866 // tokenonly rate, but for intra blocks, tx_size is always coded
9867 // (prediction granularity), so we account for it in the full rate,
9868 // not the tokenonly rate.
Yue Chen3dd03e32017-10-17 15:39:52 -07009869 rate_y -= tx_size_cost(cm, x, bsize, mbmi->tx_size);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009870 }
9871#if CONFIG_EXT_INTRA
9872 if (is_directional_mode) {
Joe Young830d4ce2017-05-30 17:48:13 -07009873 if (av1_use_angle_delta(bsize)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07009874#if CONFIG_EXT_INTRA_MOD
9875 rate2 += x->angle_delta_cost[mbmi->mode - V_PRED]
9876 [mbmi->angle_delta[0] + MAX_ANGLE_DELTA];
9877#else
Joe Young830d4ce2017-05-30 17:48:13 -07009878 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
9879 MAX_ANGLE_DELTA + mbmi->angle_delta[0]);
Joe Young3ca43bf2017-10-06 15:12:46 -07009880#endif // CONFIG_EXT_INTRA_MOD
Joe Young830d4ce2017-05-30 17:48:13 -07009881 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009882 }
Luc Trudeauf24a35f2017-09-11 11:56:33 -04009883 if (av1_is_directional_mode(get_uv_mode(mbmi->uv_mode), bsize) &&
Joe Young830d4ce2017-05-30 17:48:13 -07009884 av1_use_angle_delta(bsize)) {
Joe Young3ca43bf2017-10-06 15:12:46 -07009885#if CONFIG_EXT_INTRA_MOD
9886 rate2 += x->angle_delta_cost[mbmi->uv_mode - V_PRED]
9887 [mbmi->angle_delta[1] + MAX_ANGLE_DELTA];
9888#else
hui su0a6731f2017-04-26 15:23:47 -07009889 rate2 += write_uniform_cost(2 * MAX_ANGLE_DELTA + 1,
9890 MAX_ANGLE_DELTA + mbmi->angle_delta[1]);
Joe Young3ca43bf2017-10-06 15:12:46 -07009891#endif // CONFIG_EXT_INTRA_MOD
Yaowu Xuc27fc142016-08-22 16:08:15 -07009892 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009893#endif // CONFIG_EXT_INTRA
hui su5db97432016-10-14 16:10:14 -07009894#if CONFIG_FILTER_INTRA
Yue Chen4eba69b2017-11-09 22:37:35 -08009895 if (mbmi->mode == DC_PRED &&
9896 av1_filter_intra_allowed_txsize(mbmi->tx_size)) {
hui su5db97432016-10-14 16:10:14 -07009897 rate2 +=
Yue Chen4eba69b2017-11-09 22:37:35 -08009898 x->filter_intra_cost[mbmi->tx_size][mbmi->filter_intra_mode_info
9899 .use_filter_intra_mode[0]];
hui su5db97432016-10-14 16:10:14 -07009900 if (mbmi->filter_intra_mode_info.use_filter_intra_mode[0]) {
Yue Chen57b8ff62017-10-10 23:37:31 -07009901 rate2 += x->filter_intra_mode_cost[0][mbmi->filter_intra_mode_info
9902 .filter_intra_mode[0]];
hui su5db97432016-10-14 16:10:14 -07009903 }
9904 }
hui su5db97432016-10-14 16:10:14 -07009905#endif // CONFIG_FILTER_INTRA
Urvang Joshi96d1c0a2017-10-10 13:15:32 -07009906 if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED)
Yaowu Xuc27fc142016-08-22 16:08:15 -07009907 rate2 += intra_cost_penalty;
9908 distortion2 = distortion_y + distortion_uv;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009909 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -07009910 int_mv backup_ref_mv[2];
9911
Debargha Mukherjee0f248c42017-09-07 12:40:18 -07009912 if (!is_comp_ref_allowed(bsize) && mbmi->ref_frame[1] > INTRA_FRAME)
9913 continue;
Jingning Hanc41a5492017-02-24 11:18:52 -08009914
Yaowu Xuc27fc142016-08-22 16:08:15 -07009915 backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0];
9916 if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -07009917 if (second_ref_frame == INTRA_FRAME) {
9918 if (best_single_inter_ref != ref_frame) continue;
Debargha Mukherjeecb603792016-10-04 13:10:23 -07009919 mbmi->interintra_mode = intra_to_interintra_mode[best_intra_mode];
hui su5db97432016-10-14 16:10:14 -07009920// TODO(debargha|geza.lore):
9921// Should we use ext_intra modes for interintra?
Yaowu Xuc27fc142016-08-22 16:08:15 -07009922#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009923 mbmi->angle_delta[0] = 0;
9924 mbmi->angle_delta[1] = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -07009925#endif // CONFIG_EXT_INTRA
hui su5db97432016-10-14 16:10:14 -07009926#if CONFIG_FILTER_INTRA
9927 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
9928 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
9929#endif // CONFIG_FILTER_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -07009930 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009931 mbmi->ref_mv_idx = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -07009932 ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
Yaowu Xuc27fc142016-08-22 16:08:15 -07009933
David Barker404b2e82017-03-27 13:07:47 +01009934 if (comp_pred) {
9935 if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
David Barker3dfba992017-04-03 16:10:09 +01009936 int ref_mv_idx = 0;
9937 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
9938 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
9939 // mbmi->ref_mv_idx (like NEWMV)
9940 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
9941 ref_mv_idx = 1;
9942
9943 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
9944 int_mv this_mv =
9945 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
David Barker404b2e82017-03-27 13:07:47 +01009946 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9947 xd->n8_h << MI_SIZE_LOG2, xd);
9948 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
9949 }
David Barker3dfba992017-04-03 16:10:09 +01009950 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
9951 int_mv this_mv =
9952 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
David Barker404b2e82017-03-27 13:07:47 +01009953 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9954 xd->n8_h << MI_SIZE_LOG2, xd);
9955 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
9956 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009957 }
David Barker404b2e82017-03-27 13:07:47 +01009958 } else {
Zoe Liu1157d502017-04-30 07:57:14 -07009959 if (mbmi->mode == NEWMV && mbmi_ext->ref_mv_count[ref_frame_type] > 1) {
David Barker404b2e82017-03-27 13:07:47 +01009960 int ref;
9961 for (ref = 0; ref < 1 + comp_pred; ++ref) {
9962 int_mv this_mv =
9963 (ref == 0) ? mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv
9964 : mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
9965 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
9966 xd->n8_h << MI_SIZE_LOG2, xd);
9967 mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0] = this_mv;
9968 }
9969 }
Yaowu Xuc27fc142016-08-22 16:08:15 -07009970 }
Cheng Chenca6958c2017-10-10 14:00:50 -07009971#if CONFIG_JNT_COMP
9972 {
9973 int cum_rate = rate2;
9974 MB_MODE_INFO backup_mbmi = *mbmi;
9975
9976 int_mv backup_frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
9977 int_mv backup_single_newmv[TOTAL_REFS_PER_FRAME];
9978 int backup_single_newmv_rate[TOTAL_REFS_PER_FRAME];
9979 int64_t backup_modelled_rd[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME];
9980
9981 memcpy(backup_frame_mv, frame_mv, sizeof(frame_mv));
9982 memcpy(backup_single_newmv, single_newmv, sizeof(single_newmv));
9983 memcpy(backup_single_newmv_rate, single_newmv_rate,
9984 sizeof(single_newmv_rate));
9985 memcpy(backup_modelled_rd, modelled_rd, sizeof(modelled_rd));
9986
9987 InterpFilters backup_interp_filters = mbmi->interp_filters;
9988
9989 for (int comp_idx = 0; comp_idx < 1 + has_second_ref(mbmi);
9990 ++comp_idx) {
9991 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
9992 av1_init_rd_stats(&rd_stats);
9993 av1_init_rd_stats(&rd_stats_y);
9994 av1_init_rd_stats(&rd_stats_uv);
9995 rd_stats.rate = cum_rate;
9996
9997 memcpy(frame_mv, backup_frame_mv, sizeof(frame_mv));
9998 memcpy(single_newmv, backup_single_newmv, sizeof(single_newmv));
9999 memcpy(single_newmv_rate, backup_single_newmv_rate,
10000 sizeof(single_newmv_rate));
10001 memcpy(modelled_rd, backup_modelled_rd, sizeof(modelled_rd));
10002
10003 mbmi->interp_filters = backup_interp_filters;
10004
10005 int dummy_disable_skip = 0;
10006
10007 // Point to variables that are maintained between loop iterations
10008 args.single_newmv = single_newmv;
10009 args.single_newmv_rate = single_newmv_rate;
10010 args.modelled_rd = modelled_rd;
10011 mbmi->compound_idx = comp_idx;
10012
10013 int64_t tmp_rd = handle_inter_mode(
10014 cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
10015 &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
10016
10017 if (tmp_rd < INT64_MAX) {
10018 if (RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist) <
10019 RDCOST(x->rdmult, 0, rd_stats.sse))
10020 tmp_rd =
10021 RDCOST(x->rdmult, rd_stats.rate + x->skip_cost[skip_ctx][0],
10022 rd_stats.dist);
10023 else
10024 tmp_rd = RDCOST(x->rdmult,
10025 rd_stats.rate + x->skip_cost[skip_ctx][1] -
10026 rd_stats_y.rate - rd_stats_uv.rate,
10027 rd_stats.sse);
10028 }
10029
10030 if (tmp_rd < this_rd) {
10031 this_rd = tmp_rd;
10032 rate2 = rd_stats.rate;
10033 skippable = rd_stats.skip;
10034 distortion2 = rd_stats.dist;
10035 total_sse = rd_stats.sse;
10036 rate_y = rd_stats_y.rate;
10037 rate_uv = rd_stats_uv.rate;
10038 disable_skip = dummy_disable_skip;
10039 backup_mbmi = *mbmi;
Jingning Han5d0320f2017-11-07 08:49:16 -080010040 for (i = 0; i < MAX_MB_PLANE; ++i)
10041 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
10042 sizeof(uint8_t) * ctx->num_4x4_blk);
Cheng Chenca6958c2017-10-10 14:00:50 -070010043 }
10044 }
10045 *mbmi = backup_mbmi;
Jingning Han5d0320f2017-11-07 08:49:16 -080010046 for (i = 0; i < MAX_MB_PLANE; ++i)
10047 memcpy(x->blk_skip[i], x->blk_skip_drl[i],
10048 sizeof(uint8_t) * ctx->num_4x4_blk);
Cheng Chenca6958c2017-10-10 14:00:50 -070010049 }
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010050#else // CONFIG_JNT_COMP
Angie Chiang76159122016-11-09 12:13:22 -080010051 {
10052 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
10053 av1_init_rd_stats(&rd_stats);
10054 rd_stats.rate = rate2;
Fergus Simpson073c6f32017-02-17 12:13:48 -080010055
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010056 // Point to variables that are maintained between loop iterations
10057 args.single_newmv = single_newmv;
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010058 args.single_newmv_rate = single_newmv_rate;
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010059 args.modelled_rd = modelled_rd;
Fergus Simpson3424c2d2017-03-09 11:48:15 -080010060 this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y,
10061 &rd_stats_uv, &disable_skip, frame_mv,
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010062 mi_row, mi_col, &args, best_rd);
Angie Chiang76159122016-11-09 12:13:22 -080010063 rate2 = rd_stats.rate;
10064 skippable = rd_stats.skip;
10065 distortion2 = rd_stats.dist;
10066 total_sse = rd_stats.sse;
10067 rate_y = rd_stats_y.rate;
10068 rate_uv = rd_stats_uv.rate;
10069 }
Cheng Chenca6958c2017-10-10 14:00:50 -070010070#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -070010071
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010072 // TODO(jingning): This needs some refactoring to improve code quality
10073 // and reduce redundant steps.
David Barker3dfba992017-04-03 16:10:09 +010010074 if ((have_nearmv_in_inter_mode(mbmi->mode) &&
David Barker404b2e82017-03-27 13:07:47 +010010075 mbmi_ext->ref_mv_count[ref_frame_type] > 2) ||
10076 ((mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) &&
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010077 mbmi_ext->ref_mv_count[ref_frame_type] > 1)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010078 int_mv backup_mv = frame_mv[NEARMV][ref_frame];
10079 MB_MODE_INFO backup_mbmi = *mbmi;
10080 int backup_skip = x->skip;
10081 int64_t tmp_ref_rd = this_rd;
10082 int ref_idx;
10083
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +020010084 // TODO(jingning): This should be deprecated shortly.
David Barker3dfba992017-04-03 16:10:09 +010010085 int idx_offset = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010086 int ref_set =
Yaowu Xuf883b422016-08-30 14:01:10 -070010087 AOMMIN(2, mbmi_ext->ref_mv_count[ref_frame_type] - 1 - idx_offset);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010088
10089 uint8_t drl_ctx =
Yaowu Xuf883b422016-08-30 14:01:10 -070010090 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx_offset);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010091 // Dummy
10092 int_mv backup_fmv[2];
10093 backup_fmv[0] = frame_mv[NEWMV][ref_frame];
10094 if (comp_pred) backup_fmv[1] = frame_mv[NEWMV][second_ref_frame];
10095
Yue Chenb23d00a2017-07-28 17:01:21 -070010096 rate2 += (rate2 < INT_MAX ? x->drl_mode_cost0[drl_ctx][0] : 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010097
10098 if (this_rd < INT64_MAX) {
Urvang Joshi70006e42017-06-14 16:08:55 -070010099 if (RDCOST(x->rdmult, rate_y + rate_uv, distortion2) <
10100 RDCOST(x->rdmult, 0, total_sse))
10101 tmp_ref_rd = RDCOST(
Zoe Liu1eed2df2017-10-16 17:13:15 -070010102 x->rdmult, rate2 + x->skip_cost[av1_get_skip_context(xd)][0],
Urvang Joshi70006e42017-06-14 16:08:55 -070010103 distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010104 else
clang-format4eafefe2017-09-04 12:51:20 -070010105 tmp_ref_rd =
10106 RDCOST(x->rdmult,
Zoe Liu1eed2df2017-10-16 17:13:15 -070010107 rate2 + x->skip_cost[av1_get_skip_context(xd)][1] -
clang-format4eafefe2017-09-04 12:51:20 -070010108 rate_y - rate_uv,
10109 total_sse);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010110 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010111 for (i = 0; i < MAX_MB_PLANE; ++i)
10112 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
10113 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010114
Cheng Chenca6958c2017-10-10 14:00:50 -070010115#if CONFIG_JNT_COMP
10116 for (int sidx = 0; sidx < ref_set * (1 + has_second_ref(mbmi)); ++sidx)
10117#else
10118 for (ref_idx = 0; ref_idx < ref_set; ++ref_idx)
10119#endif // CONFIG_JNT_COMP
10120 {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010121 int64_t tmp_alt_rd = INT64_MAX;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010122 int dummy_disable_skip = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010123 int_mv cur_mv;
Angie Chiang76159122016-11-09 12:13:22 -080010124 RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv;
Cheng Chenca6958c2017-10-10 14:00:50 -070010125#if CONFIG_JNT_COMP
10126 ref_idx = sidx;
10127 if (has_second_ref(mbmi)) ref_idx /= 2;
10128 mbmi->compound_idx = sidx % 2;
10129#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -070010130
Yaowu Xu5bfbfdf2016-11-22 16:43:34 -080010131 av1_invalid_rd_stats(&tmp_rd_stats);
Yushin Choc0f6bf22017-06-09 16:08:02 -070010132
Jingning Han52617b22017-04-11 12:50:08 -070010133 x->skip = 0;
Yaowu Xu5bfbfdf2016-11-22 16:43:34 -080010134
Yaowu Xuc27fc142016-08-22 16:08:15 -070010135 mbmi->ref_mv_idx = 1 + ref_idx;
10136
David Barker3dfba992017-04-03 16:10:09 +010010137 if (comp_pred) {
10138 int ref_mv_idx = mbmi->ref_mv_idx;
10139 // Special case: NEAR_NEWMV and NEW_NEARMV modes use
10140 // 1 + mbmi->ref_mv_idx (like NEARMV) instead of
10141 // mbmi->ref_mv_idx (like NEWMV)
10142 if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV)
10143 ref_mv_idx = 1 + mbmi->ref_mv_idx;
10144
10145 if (compound_ref0_mode(mbmi->mode) == NEWMV) {
10146 int_mv this_mv =
10147 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
10148 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10149 xd->n8_h << MI_SIZE_LOG2, xd);
10150 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
10151 } else if (compound_ref0_mode(mbmi->mode) == NEARESTMV) {
10152 int_mv this_mv =
10153 mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv;
10154 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10155 xd->n8_h << MI_SIZE_LOG2, xd);
10156 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
10157 }
10158
10159 if (compound_ref1_mode(mbmi->mode) == NEWMV) {
10160 int_mv this_mv =
10161 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
10162 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10163 xd->n8_h << MI_SIZE_LOG2, xd);
10164 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
10165 } else if (compound_ref1_mode(mbmi->mode) == NEARESTMV) {
10166 int_mv this_mv =
10167 mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv;
10168 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10169 xd->n8_h << MI_SIZE_LOG2, xd);
10170 mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0] = this_mv;
10171 }
10172 } else {
Jingning Hanc3ef32a2017-12-04 09:56:53 -080010173 int_mv this_mv =
10174 mbmi_ext->ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
10175 .this_mv;
10176 clamp_mv_ref(&this_mv.as_mv, xd->n8_w << MI_SIZE_LOG2,
10177 xd->n8_h << MI_SIZE_LOG2, xd);
10178 mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010179 }
10180
10181 cur_mv =
10182 mbmi_ext->ref_mv_stack[ref_frame][mbmi->ref_mv_idx + idx_offset]
10183 .this_mv;
10184 clamp_mv2(&cur_mv.as_mv, xd);
10185
Alex Converse0fa0f422017-04-24 12:51:14 -070010186 if (!mv_check_bounds(&x->mv_limits, &cur_mv.as_mv)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010187 int_mv dummy_single_newmv[TOTAL_REFS_PER_FRAME] = { { 0 } };
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010188 int dummy_single_newmv_rate[TOTAL_REFS_PER_FRAME] = { 0 };
Yaowu Xuc27fc142016-08-22 16:08:15 -070010189
10190 frame_mv[NEARMV][ref_frame] = cur_mv;
Angie Chiang76159122016-11-09 12:13:22 -080010191 av1_init_rd_stats(&tmp_rd_stats);
Yushin Cho8e75e8b2017-09-12 16:33:28 -070010192
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010193 // Point to variables that are not maintained between iterations
10194 args.single_newmv = dummy_single_newmv;
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010195 args.single_newmv_rate = dummy_single_newmv_rate;
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010196 args.modelled_rd = NULL;
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010197 tmp_alt_rd = handle_inter_mode(
10198 cpi, x, bsize, &tmp_rd_stats, &tmp_rd_stats_y, &tmp_rd_stats_uv,
10199 &dummy_disable_skip, frame_mv, mi_row, mi_col, &args, best_rd);
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010200 // Prevent pointers from escaping local scope
10201 args.single_newmv = NULL;
Zoe Liu7f24e1b2017-03-17 17:42:05 -070010202 args.single_newmv_rate = NULL;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010203 }
10204
10205 for (i = 0; i < mbmi->ref_mv_idx; ++i) {
10206 uint8_t drl1_ctx = 0;
Yaowu Xuf883b422016-08-30 14:01:10 -070010207 drl1_ctx = av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
10208 i + idx_offset);
Angie Chiang76159122016-11-09 12:13:22 -080010209 tmp_rd_stats.rate +=
Yue Chenb23d00a2017-07-28 17:01:21 -070010210 (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][1]
Angie Chiang76159122016-11-09 12:13:22 -080010211 : 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010212 }
10213
10214 if (mbmi_ext->ref_mv_count[ref_frame_type] >
10215 mbmi->ref_mv_idx + idx_offset + 1 &&
10216 ref_idx < ref_set - 1) {
10217 uint8_t drl1_ctx =
Yaowu Xuf883b422016-08-30 14:01:10 -070010218 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type],
10219 mbmi->ref_mv_idx + idx_offset);
Yaowu Xu83ed6fe2016-11-22 11:15:29 -080010220 tmp_rd_stats.rate +=
Yue Chenb23d00a2017-07-28 17:01:21 -070010221 (tmp_rd_stats.rate < INT_MAX ? x->drl_mode_cost0[drl1_ctx][0]
Yaowu Xu83ed6fe2016-11-22 11:15:29 -080010222 : 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010223 }
10224
10225 if (tmp_alt_rd < INT64_MAX) {
Urvang Joshi70006e42017-06-14 16:08:55 -070010226 tmp_alt_rd =
10227 RDCOST(x->rdmult, tmp_rd_stats.rate, tmp_rd_stats.dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010228 }
10229
10230 if (tmp_ref_rd > tmp_alt_rd) {
Angie Chiang76159122016-11-09 12:13:22 -080010231 rate2 = tmp_rd_stats.rate;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010232 disable_skip = dummy_disable_skip;
Angie Chiang76159122016-11-09 12:13:22 -080010233 distortion2 = tmp_rd_stats.dist;
10234 skippable = tmp_rd_stats.skip;
10235 rate_y = tmp_rd_stats_y.rate;
10236 rate_uv = tmp_rd_stats_uv.rate;
10237 total_sse = tmp_rd_stats.sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010238 this_rd = tmp_alt_rd;
10239 tmp_ref_rd = tmp_alt_rd;
10240 backup_mbmi = *mbmi;
10241 backup_skip = x->skip;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010242 for (i = 0; i < MAX_MB_PLANE; ++i)
10243 memcpy(x->blk_skip_drl[i], x->blk_skip[i],
10244 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010245 } else {
10246 *mbmi = backup_mbmi;
10247 x->skip = backup_skip;
10248 }
10249 }
10250
10251 frame_mv[NEARMV][ref_frame] = backup_mv;
10252 frame_mv[NEWMV][ref_frame] = backup_fmv[0];
10253 if (comp_pred) frame_mv[NEWMV][second_ref_frame] = backup_fmv[1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010254 for (i = 0; i < MAX_MB_PLANE; ++i)
10255 memcpy(x->blk_skip[i], x->blk_skip_drl[i],
10256 sizeof(uint8_t) * ctx->num_4x4_blk);
Cheng Chenca6958c2017-10-10 14:00:50 -070010257#if CONFIG_JNT_COMP
10258 *mbmi = backup_mbmi;
10259#endif // CONFIG_JNT_COMP
Yaowu Xuc27fc142016-08-22 16:08:15 -070010260 }
10261 mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0];
10262 if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010263
10264 if (this_rd == INT64_MAX) continue;
10265
Debargha Mukherjee0f248c42017-09-07 12:40:18 -070010266 if (is_comp_ref_allowed(mbmi->sb_type))
Jingning Hanc41a5492017-02-24 11:18:52 -080010267 compmode_cost = av1_cost_bit(comp_mode_p, comp_pred);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010268
10269 if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
10270 }
10271
Yaowu Xuc27fc142016-08-22 16:08:15 -070010272 // Estimate the reference frame signaling cost and add it
10273 // to the rolling cost variable.
10274 if (comp_pred) {
Zoe Liuc082bbc2017-05-17 13:31:37 -070010275#if CONFIG_EXT_COMP_REFS
10276 rate2 += ref_costs_comp[ref_frame][second_ref_frame];
Sebastien Alaiwan365e6442017-10-16 11:35:00 +020010277#else // !CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -070010278 rate2 += ref_costs_comp[ref_frame];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010279 rate2 += ref_costs_comp[second_ref_frame];
Zoe Liuc082bbc2017-05-17 13:31:37 -070010280#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuc27fc142016-08-22 16:08:15 -070010281 } else {
10282 rate2 += ref_costs_single[ref_frame];
10283 }
10284
Sebastien Alaiwan1bc94fc2017-10-31 10:25:17 +010010285 if (ref_frame == INTRA_FRAME) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010286 if (skippable) {
10287 // Back out the coefficient coding costs
10288 rate2 -= (rate_y + rate_uv);
10289 rate_y = 0;
10290 rate_uv = 0;
10291 // Cost the skip mb case
Zoe Liu1eed2df2017-10-16 17:13:15 -070010292 rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010293 } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) {
Urvang Joshi70006e42017-06-14 16:08:55 -070010294 if (RDCOST(x->rdmult, rate_y + rate_uv + rate_skip0, distortion2) <
10295 RDCOST(x->rdmult, rate_skip1, total_sse)) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010296 // Add in the cost of the no skip flag.
Zoe Liu1eed2df2017-10-16 17:13:15 -070010297 rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010298 } else {
10299 // FIXME(rbultje) make this work for splitmv also
Zoe Liu1eed2df2017-10-16 17:13:15 -070010300 rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010301 distortion2 = total_sse;
10302 assert(total_sse >= 0);
10303 rate2 -= (rate_y + rate_uv);
10304 this_skip2 = 1;
10305 rate_y = 0;
10306 rate_uv = 0;
10307 }
10308 } else {
10309 // Add in the cost of the no skip flag.
Zoe Liu1eed2df2017-10-16 17:13:15 -070010310 rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010311 }
10312
10313 // Calculate the final RD estimate for this mode.
Urvang Joshi70006e42017-06-14 16:08:55 -070010314 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010315 } else {
10316 this_skip2 = mbmi->skip;
Urvang Joshi70006e42017-06-14 16:08:55 -070010317 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010318 if (this_skip2) {
10319 rate_y = 0;
10320 rate_uv = 0;
10321 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010322 }
10323
Yaowu Xuc27fc142016-08-22 16:08:15 -070010324 if (ref_frame == INTRA_FRAME) {
10325 // Keep record of best intra rd
10326 if (this_rd < best_intra_rd) {
10327 best_intra_rd = this_rd;
10328 best_intra_mode = mbmi->mode;
10329 }
Emil Keyder01770b32017-01-20 18:03:11 -050010330 } else if (second_ref_frame == NONE_FRAME) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010331 if (this_rd < best_single_inter_rd) {
10332 best_single_inter_rd = this_rd;
10333 best_single_inter_ref = mbmi->ref_frame[0];
10334 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010335 }
10336
10337 if (!disable_skip && ref_frame == INTRA_FRAME) {
10338 for (i = 0; i < REFERENCE_MODES; ++i)
Yaowu Xuf883b422016-08-30 14:01:10 -070010339 best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010340 }
10341
10342 // Did this mode help.. i.e. is it the new best mode
10343 if (this_rd < best_rd || x->skip) {
10344 if (!mode_excluded) {
10345 // Note index of best mode so far
10346 best_mode_index = mode_index;
10347
10348 if (ref_frame == INTRA_FRAME) {
10349 /* required for left and above block mv */
10350 mbmi->mv[0].as_int = 0;
10351 } else {
10352 best_pred_sse = x->pred_sse[ref_frame];
10353 }
10354
10355 rd_cost->rate = rate2;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010356 rd_cost->dist = distortion2;
10357 rd_cost->rdcost = this_rd;
10358 best_rd = this_rd;
10359 best_mbmode = *mbmi;
10360 best_skip2 = this_skip2;
10361 best_mode_skippable = skippable;
Zoe Liu1eed2df2017-10-16 17:13:15 -070010362 best_rate_y =
10363 rate_y +
10364 x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010365 best_rate_uv = rate_uv;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010366 for (i = 0; i < MAX_MB_PLANE; ++i)
10367 memcpy(ctx->blk_skip[i], x->blk_skip[i],
10368 sizeof(uint8_t) * ctx->num_4x4_blk);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010369 }
10370 }
Yushin Cho8e75e8b2017-09-12 16:33:28 -070010371
Yaowu Xuc27fc142016-08-22 16:08:15 -070010372 /* keep record of best compound/single-only prediction */
10373 if (!disable_skip && ref_frame != INTRA_FRAME) {
10374 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
10375
10376 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
10377 single_rate = rate2 - compmode_cost;
10378 hybrid_rate = rate2;
10379 } else {
10380 single_rate = rate2;
10381 hybrid_rate = rate2 + compmode_cost;
10382 }
10383
Urvang Joshi70006e42017-06-14 16:08:55 -070010384 single_rd = RDCOST(x->rdmult, single_rate, distortion2);
10385 hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010386
10387 if (!comp_pred) {
10388 if (single_rd < best_pred_rd[SINGLE_REFERENCE])
10389 best_pred_rd[SINGLE_REFERENCE] = single_rd;
10390 } else {
10391 if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
10392 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
10393 }
10394 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
10395 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
10396 }
10397
Yaowu Xuc27fc142016-08-22 16:08:15 -070010398 if (x->skip && !comp_pred) break;
10399 }
10400
10401 if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 &&
10402 ((sf->tx_type_search.fast_inter_tx_type_search == 1 &&
10403 is_inter_mode(best_mbmode.mode)) ||
10404 (sf->tx_type_search.fast_intra_tx_type_search == 1 &&
10405 !is_inter_mode(best_mbmode.mode)))) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010406 int skip_blk = 0;
10407 RD_STATS rd_stats_y, rd_stats_uv;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010408
10409 x->use_default_inter_tx_type = 0;
10410 x->use_default_intra_tx_type = 0;
10411
10412 *mbmi = best_mbmode;
10413
10414 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
10415
10416 // Select prediction reference frames.
10417 for (i = 0; i < MAX_MB_PLANE; i++) {
10418 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
10419 if (has_second_ref(mbmi))
10420 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
10421 }
10422
10423 if (is_inter_mode(mbmi->mode)) {
Jingning Hanc44009c2017-05-06 11:36:49 -070010424 av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize);
Sarah Parker19234cc2017-03-10 16:43:25 -080010425 if (mbmi->motion_mode == OBMC_CAUSAL) {
Fergus Simpson073c6f32017-02-17 12:13:48 -080010426 av1_build_obmc_inter_prediction(
Fergus Simpson9f7ca0b2017-03-10 10:46:46 -080010427 cm, xd, mi_row, mi_col, args.above_pred_buf, args.above_pred_stride,
10428 args.left_pred_buf, args.left_pred_stride);
Sarah Parker19234cc2017-03-10 16:43:25 -080010429 }
Yaowu Xuf883b422016-08-30 14:01:10 -070010430 av1_subtract_plane(x, bsize, 0);
Rupert Swarbrick6f149692017-12-11 15:52:05 +000010431 if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) {
Debargha Mukherjee51666862017-10-24 14:29:13 -070010432 // av1_rd_pick_inter_mode_sb
Yue Chen25dc0702017-10-18 23:36:06 -070010433 select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col,
10434 INT64_MAX);
Hui Su1ddf2312017-08-19 15:21:34 -070010435 assert(rd_stats_y.rate != INT_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010436 } else {
10437 int idx, idy;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010438 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010439 for (idy = 0; idy < xd->n8_h; ++idy)
10440 for (idx = 0; idx < xd->n8_w; ++idx)
10441 mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010442 memset(x->blk_skip[0], rd_stats_y.skip,
Yaowu Xuc27fc142016-08-22 16:08:15 -070010443 sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
10444 }
10445
Debargha Mukherjee51666862017-10-24 14:29:13 -070010446 inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010447 } else {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010448 super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX);
Angie Chiang284d7772016-11-08 11:06:45 -080010449 super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010450 }
10451
Urvang Joshi70006e42017-06-14 16:08:55 -070010452 if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010453 (rd_stats_y.dist + rd_stats_uv.dist)) >
Urvang Joshi70006e42017-06-14 16:08:55 -070010454 RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010455 skip_blk = 1;
Zoe Liu1eed2df2017-10-16 17:13:15 -070010456 rd_stats_y.rate = x->skip_cost[av1_get_skip_context(xd)][1];
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010457 rd_stats_uv.rate = 0;
10458 rd_stats_y.dist = rd_stats_y.sse;
10459 rd_stats_uv.dist = rd_stats_uv.sse;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010460 } else {
10461 skip_blk = 0;
Zoe Liu1eed2df2017-10-16 17:13:15 -070010462 rd_stats_y.rate += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010463 }
10464
Urvang Joshi70006e42017-06-14 16:08:55 -070010465 if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) >
10466 RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate,
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010467 (rd_stats_y.dist + rd_stats_uv.dist))) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010468 int idx, idy;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010469 best_mbmode.tx_type = mbmi->tx_type;
10470 best_mbmode.tx_size = mbmi->tx_size;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010471 for (idy = 0; idy < xd->n8_h; ++idy)
10472 for (idx = 0; idx < xd->n8_w; ++idx)
10473 best_mbmode.inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
10474
10475 for (i = 0; i < MAX_MB_PLANE; ++i)
10476 memcpy(ctx->blk_skip[i], x->blk_skip[i],
10477 sizeof(uint8_t) * ctx->num_4x4_blk);
Jingning Hane67b38a2016-11-04 10:30:00 -070010478
10479 best_mbmode.min_tx_size = mbmi->min_tx_size;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010480 rd_cost->rate +=
10481 (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv);
10482 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
Urvang Joshi70006e42017-06-14 16:08:55 -070010483 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010484 best_skip2 = skip_blk;
10485 }
10486 }
10487
10488 // Only try palette mode when the best mode so far is an intra mode.
hui su9bc1d8d2017-03-24 12:36:03 -070010489 if (try_palette && !is_inter_mode(best_mbmode.mode)) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010490 int rate2 = 0;
Urvang Joshi451e0f22017-01-31 11:18:31 -080010491 int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd,
10492 best_model_rd_palette = INT64_MAX;
Urvang Joshi626591d2016-10-24 14:13:55 -070010493 int skippable = 0, rate_overhead_palette = 0;
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010494 RD_STATS rd_stats_y;
hui sude0c70a2017-01-09 17:12:17 -080010495 TX_SIZE uv_tx;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010496 uint8_t *const best_palette_color_map =
10497 x->palette_buffer->best_palette_color_map;
10498 uint8_t *const color_map = xd->plane[0].color_index_map;
Hui Suefb755c2017-10-26 16:09:05 -070010499 MB_MODE_INFO best_mbmi_palette = *mbmi;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010500
10501 mbmi->mode = DC_PRED;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -040010502 mbmi->uv_mode = UV_DC_PRED;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010503 mbmi->ref_frame[0] = INTRA_FRAME;
Emil Keyder01770b32017-01-20 18:03:11 -050010504 mbmi->ref_frame[1] = NONE_FRAME;
Urvang Joshi626591d2016-10-24 14:13:55 -070010505 rate_overhead_palette = rd_pick_palette_intra_sby(
Urvang Joshi451e0f22017-01-31 11:18:31 -080010506 cpi, x, bsize, palette_ctx, intra_mode_cost[DC_PRED],
10507 &best_mbmi_palette, best_palette_color_map, &best_rd_palette,
10508 &best_model_rd_palette, NULL, NULL, NULL, NULL);
hui sude0c70a2017-01-09 17:12:17 -080010509 if (pmi->palette_size[0] == 0) goto PALETTE_EXIT;
10510 memcpy(color_map, best_palette_color_map,
10511 rows * cols * sizeof(best_palette_color_map[0]));
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010512 super_block_yrd(cpi, x, &rd_stats_y, bsize, best_rd);
10513 if (rd_stats_y.rate == INT_MAX) goto PALETTE_EXIT;
Debargha Mukherjee2f123402016-08-30 17:43:38 -070010514 uv_tx = uv_txsize_lookup[bsize][mbmi->tx_size][xd->plane[1].subsampling_x]
10515 [xd->plane[1].subsampling_y];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010516 if (rate_uv_intra[uv_tx] == INT_MAX) {
Luc Trudeau9d4cbb82017-07-27 17:01:32 -040010517 choose_intra_uv_mode(cpi, x, bsize, uv_tx, &rate_uv_intra[uv_tx],
Urvang Joshi368fbc92016-10-17 16:31:34 -070010518 &rate_uv_tokenonly[uv_tx], &dist_uvs[uv_tx],
10519 &skip_uvs[uv_tx], &mode_uv[uv_tx]);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010520 pmi_uv[uv_tx] = *pmi;
10521#if CONFIG_EXT_INTRA
Yaowu Xuc27fc142016-08-22 16:08:15 -070010522 uv_angle_delta[uv_tx] = mbmi->angle_delta[1];
10523#endif // CONFIG_EXT_INTRA
10524 }
10525 mbmi->uv_mode = mode_uv[uv_tx];
10526 pmi->palette_size[1] = pmi_uv[uv_tx].palette_size[1];
hui sude0c70a2017-01-09 17:12:17 -080010527 if (pmi->palette_size[1] > 0) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010528 memcpy(pmi->palette_colors + PALETTE_MAX_SIZE,
10529 pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE,
10530 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0]));
hui sude0c70a2017-01-09 17:12:17 -080010531 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010532#if CONFIG_EXT_INTRA
10533 mbmi->angle_delta[1] = uv_angle_delta[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010534#endif // CONFIG_EXT_INTRA
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010535 skippable = rd_stats_y.skip && skip_uvs[uv_tx];
10536 distortion2 = rd_stats_y.dist + dist_uvs[uv_tx];
10537 rate2 = rd_stats_y.rate + rate_overhead_palette + rate_uv_intra[uv_tx];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010538 rate2 += ref_costs_single[INTRA_FRAME];
10539
10540 if (skippable) {
Angie Chiang0e9a2e92016-11-08 09:45:40 -080010541 rate2 -= (rd_stats_y.rate + rate_uv_tokenonly[uv_tx]);
Zoe Liu1eed2df2017-10-16 17:13:15 -070010542 rate2 += x->skip_cost[av1_get_skip_context(xd)][1];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010543 } else {
Zoe Liu1eed2df2017-10-16 17:13:15 -070010544 rate2 += x->skip_cost[av1_get_skip_context(xd)][0];
Yaowu Xuc27fc142016-08-22 16:08:15 -070010545 }
Urvang Joshi70006e42017-06-14 16:08:55 -070010546 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010547 if (this_rd < best_rd) {
10548 best_mode_index = 3;
10549 mbmi->mv[0].as_int = 0;
10550 rd_cost->rate = rate2;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010551 rd_cost->dist = distortion2;
10552 rd_cost->rdcost = this_rd;
10553 best_rd = this_rd;
10554 best_mbmode = *mbmi;
10555 best_skip2 = 0;
10556 best_mode_skippable = skippable;
10557 }
10558 }
10559PALETTE_EXIT:
Zoe Liuf40a9572017-10-13 12:37:19 -070010560
10561#if CONFIG_EXT_SKIP
Zoe Liu8a5d3432017-11-30 16:33:44 -080010562 best_mbmode.skip_mode = 0;
10563 if (cm->skip_mode_flag &&
Zoe Liuf40a9572017-10-13 12:37:19 -070010564 !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
10565 is_comp_ref_allowed(bsize)) {
Zoe Liu8a5d3432017-11-30 16:33:44 -080010566 // Obtain the rdcost for skip_mode.
Zoe Liuf40a9572017-10-13 12:37:19 -070010567 estimate_skip_mode_rdcost(cpi, tile_data, x, bsize, mi_row, mi_col,
10568 frame_mv, yv12_mb);
Zoe Liuf40a9572017-10-13 12:37:19 -070010569
Zoe Liu8a5d3432017-11-30 16:33:44 -080010570 if (x->skip_mode_rdcost >= 0 && x->skip_mode_rdcost < INT64_MAX) {
10571 // Update skip mode rdcost.
10572 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
10573 x->skip_mode_rate += x->skip_mode_cost[skip_mode_ctx][1];
10574 x->skip_mode_rdcost =
10575 RDCOST(x->rdmult, x->skip_mode_rate, x->skip_mode_dist);
Zoe Liuf40a9572017-10-13 12:37:19 -070010576
Zoe Liu8a5d3432017-11-30 16:33:44 -080010577 // Compare the use of skip_mode with the best intra/inter mode obtained.
10578 const int64_t best_intra_inter_mode_cost =
10579 RDCOST(x->rdmult, rd_cost->rate + x->skip_mode_cost[skip_mode_ctx][0],
10580 rd_cost->dist);
10581
Zoe Liu50c810d2017-12-05 16:09:37 -080010582 if (x->skip_mode_rdcost <= best_intra_inter_mode_cost)
Zoe Liu8a5d3432017-11-30 16:33:44 -080010583 best_mbmode.skip_mode = 1;
10584 }
10585
10586 if (best_mbmode.skip_mode) {
Zoe Liuf40a9572017-10-13 12:37:19 -070010587 best_mbmode = *mbmi;
10588
10589 best_mbmode.skip_mode = best_mbmode.skip = 1;
10590 best_mbmode.mode = NEAREST_NEARESTMV;
10591 best_mbmode.ref_frame[0] = x->skip_mode_ref_frame[0];
10592 best_mbmode.ref_frame[1] = x->skip_mode_ref_frame[1];
10593 best_mbmode.mv[0].as_int = x->skip_mode_mv[0].as_int;
10594 best_mbmode.mv[1].as_int = x->skip_mode_mv[1].as_int;
Zoe Liu8a5d3432017-11-30 16:33:44 -080010595 best_mbmode.ref_mv_idx = 0;
Zoe Liuf40a9572017-10-13 12:37:19 -070010596
10597 // Set up tx_size related variables for skip-specific loop filtering.
10598 best_mbmode.tx_size = block_signals_txsize(bsize)
10599 ? tx_size_from_tx_mode(bsize, cm->tx_mode, 1)
Debargha Mukherjeee4e18fc2017-12-06 23:43:24 -080010600 : max_txsize_rect_lookup[1][bsize];
Zoe Liuf40a9572017-10-13 12:37:19 -070010601 {
10602 const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
10603 const int height = block_size_high[bsize] >> tx_size_high_log2[0];
10604 for (int idy = 0; idy < height; ++idy)
10605 for (int idx = 0; idx < width; ++idx)
10606 best_mbmode.inter_tx_size[idy >> 1][idx >> 1] = best_mbmode.tx_size;
10607 }
10608 best_mbmode.min_tx_size = get_min_tx_size(best_mbmode.tx_size);
10609 set_txfm_ctxs(best_mbmode.tx_size, xd->n8_w, xd->n8_h, best_mbmode.skip,
10610 xd);
10611
10612 // Set up color-related variables for skip mode.
10613 best_mbmode.uv_mode = UV_DC_PRED;
10614 best_mbmode.palette_mode_info.palette_size[0] = 0;
10615 best_mbmode.palette_mode_info.palette_size[1] = 0;
Zoe Liuf40a9572017-10-13 12:37:19 -070010616 best_mbmode.interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
10617 best_mbmode.interinter_compound_type = COMPOUND_AVERAGE;
10618 best_mbmode.motion_mode = SIMPLE_TRANSLATION;
Zoe Liuf40a9572017-10-13 12:37:19 -070010619#if CONFIG_FILTER_INTRA
10620 best_mbmode.filter_intra_mode_info.use_filter_intra_mode[0] = 0;
10621 best_mbmode.filter_intra_mode_info.use_filter_intra_mode[1] = 0;
10622#endif // CONFIG_FILTER_INTRA
10623
Zoe Liuf40a9572017-10-13 12:37:19 -070010624 set_default_interp_filters(&best_mbmode, cm->interp_filter);
10625
Zoe Liu8a5d3432017-11-30 16:33:44 -080010626 best_mode_index = x->skip_mode_index;
10627
Zoe Liuf40a9572017-10-13 12:37:19 -070010628 // Update rd_cost
Zoe Liu8a5d3432017-11-30 16:33:44 -080010629 rd_cost->rate = x->skip_mode_rate;
Zoe Liuf40a9572017-10-13 12:37:19 -070010630 rd_cost->dist = rd_cost->sse = x->skip_mode_dist;
10631 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
Zoe Liu8a5d3432017-11-30 16:33:44 -080010632
Zoe Liuf40a9572017-10-13 12:37:19 -070010633 best_rd = rd_cost->rdcost;
Zoe Liu8a5d3432017-11-30 16:33:44 -080010634 best_skip2 = 1;
10635 best_mode_skippable = (x->skip_mode_sse == 0);
Zoe Liuf40a9572017-10-13 12:37:19 -070010636
10637 x->skip = 1;
10638#if 0
Zoe Liu8a5d3432017-11-30 16:33:44 -080010639 // TODO(zoeliu): To investigate why following cause performance drop.
10640 for (i = 0; i < MAX_MB_PLANE; ++i) {
10641 memset(x->blk_skip[i], x->skip, sizeof(uint8_t) * ctx->num_4x4_blk);
10642 memcpy(ctx->blk_skip[i], x->blk_skip[i],
Zoe Liuf40a9572017-10-13 12:37:19 -070010643 sizeof(uint8_t) * ctx->num_4x4_blk);
Zoe Liu8a5d3432017-11-30 16:33:44 -080010644 }
Zoe Liuf40a9572017-10-13 12:37:19 -070010645#endif // 0
10646 }
10647 }
10648#endif // CONFIG_EXT_SKIP
10649
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010650 // The inter modes' rate costs are not calculated precisely in some cases.
10651 // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
10652 // GLOBALMV. Here, checks are added for those cases, and the mode decisions
10653 // are corrected.
Yunqing Wang876a8b02017-11-13 17:13:27 -080010654 if ((best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV)
10655#if CONFIG_EXT_WARPED_MOTION
10656 && best_mbmode.motion_mode != WARPED_CAUSAL
10657#endif // CONFIG_EXT_WARPED_MOTION
10658 ) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010659 const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
10660 best_mbmode.ref_frame[1] };
10661 int comp_pred_mode = refs[1] > INTRA_FRAME;
Sarah Parkere5299862016-08-16 14:57:37 -070010662 int_mv zeromv[2];
Yaowu Xuf883b422016-08-30 14:01:10 -070010663 const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame);
Debargha Mukherjeefebb59c2017-03-02 12:23:45 -080010664 zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]],
10665 cm->allow_high_precision_mv, bsize,
RogerZhou3b635242017-09-19 10:06:46 -070010666 mi_col, mi_row, 0
10667#if CONFIG_AMVR
10668 ,
RogerZhou10a03802017-10-26 11:49:48 -070010669 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010670#endif
10671 )
Debargha Mukherjeefebb59c2017-03-02 12:23:45 -080010672 .as_int;
RogerZhou10a03802017-10-26 11:49:48 -070010673 zeromv[1].as_int = comp_pred_mode
10674 ? gm_get_motion_vector(&cm->global_motion[refs[1]],
10675 cm->allow_high_precision_mv,
10676 bsize, mi_col, mi_row, 0
RogerZhou3b635242017-09-19 10:06:46 -070010677#if CONFIG_AMVR
RogerZhou10a03802017-10-26 11:49:48 -070010678 ,
10679 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010680#endif
RogerZhou10a03802017-10-26 11:49:48 -070010681 )
10682 .as_int
10683 : 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010684 if (!comp_pred_mode) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010685 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
Yaowu Xuf883b422016-08-30 14:01:10 -070010686 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
Yaowu Xuc27fc142016-08-22 16:08:15 -070010687 : INT_MAX;
10688
10689 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10690 int_mv cur_mv = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10691 if (cur_mv.as_int == best_mbmode.mv[0].as_int) {
10692 best_mbmode.mode = NEARMV;
10693 best_mbmode.ref_mv_idx = i;
10694 }
10695 }
10696
10697 if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int)
10698 best_mbmode.mode = NEARESTMV;
Sarah Parkere5299862016-08-16 14:57:37 -070010699 else if (best_mbmode.mv[0].as_int == zeromv[0].as_int)
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010700 best_mbmode.mode = GLOBALMV;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010701 } else {
10702 int_mv nearestmv[2];
10703 int_mv nearmv[2];
10704
Yaowu Xuc27fc142016-08-22 16:08:15 -070010705 if (mbmi_ext->ref_mv_count[rf_type] > 1) {
10706 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][1].this_mv;
10707 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][1].comp_mv;
10708 } else {
10709 nearmv[0] = frame_mv[NEARMV][refs[0]];
10710 nearmv[1] = frame_mv[NEARMV][refs[1]];
10711 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010712 if (mbmi_ext->ref_mv_count[rf_type] >= 1) {
10713 nearestmv[0] = mbmi_ext->ref_mv_stack[rf_type][0].this_mv;
10714 nearestmv[1] = mbmi_ext->ref_mv_stack[rf_type][0].comp_mv;
10715 } else {
10716 nearestmv[0] = frame_mv[NEARESTMV][refs[0]];
10717 nearestmv[1] = frame_mv[NEARESTMV][refs[1]];
10718 }
10719
10720 if (nearestmv[0].as_int == best_mbmode.mv[0].as_int &&
Sebastien Alaiwan0bdea0d2017-10-02 15:15:05 +020010721 nearestmv[1].as_int == best_mbmode.mv[1].as_int) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010722 best_mbmode.mode = NEAREST_NEARESTMV;
David Barker404b2e82017-03-27 13:07:47 +010010723 } else {
10724 int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2)
10725 ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2)
10726 : INT_MAX;
10727
10728 for (i = 0; i <= ref_set && ref_set != INT_MAX; ++i) {
10729 nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv;
10730 nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv;
10731
Debargha Mukherjeebb6e1342017-04-17 16:05:04 -070010732 // Try switching to the NEAR_NEARMV mode
10733 if (nearmv[0].as_int == best_mbmode.mv[0].as_int &&
David Barker404b2e82017-03-27 13:07:47 +010010734 nearmv[1].as_int == best_mbmode.mv[1].as_int) {
10735 best_mbmode.mode = NEAR_NEARMV;
10736 best_mbmode.ref_mv_idx = i;
10737 }
10738 }
10739
David Barker3dfba992017-04-03 16:10:09 +010010740 if (best_mbmode.mode == NEW_NEWMV &&
David Barker404b2e82017-03-27 13:07:47 +010010741 best_mbmode.mv[0].as_int == zeromv[0].as_int &&
10742 best_mbmode.mv[1].as_int == zeromv[1].as_int)
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010743 best_mbmode.mode = GLOBAL_GLOBALMV;
David Barker404b2e82017-03-27 13:07:47 +010010744 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010745 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010746 }
10747
David Barker9620bcd2017-03-22 14:46:42 +000010748 // Make sure that the ref_mv_idx is only nonzero when we're
10749 // using a mode which can support ref_mv_idx
10750 if (best_mbmode.ref_mv_idx != 0 &&
David Barker3dfba992017-04-03 16:10:09 +010010751 !(best_mbmode.mode == NEWMV || best_mbmode.mode == NEW_NEWMV ||
Sebastien Alaiwan34d55662017-11-15 09:36:03 +010010752 have_nearmv_in_inter_mode(best_mbmode.mode))) {
David Barker9620bcd2017-03-22 14:46:42 +000010753 best_mbmode.ref_mv_idx = 0;
10754 }
10755
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010756 if (best_mbmode.ref_frame[0] > INTRA_FRAME &&
Yunqing Wang876a8b02017-11-13 17:13:27 -080010757 best_mbmode.ref_frame[1] <= INTRA_FRAME
10758#if CONFIG_EXT_WARPED_MOTION
10759 && best_mbmode.motion_mode != WARPED_CAUSAL
10760#endif // CONFIG_EXT_WARPED_MOTION
Zoe Liuf40a9572017-10-13 12:37:19 -070010761#if CONFIG_EXT_SKIP
10762 && !best_mbmode.skip_mode
10763#endif // CONFIG_EXT_SKIP
Yunqing Wang876a8b02017-11-13 17:13:27 -080010764 ) {
Jingning Han731af492016-11-17 11:53:23 -080010765 int8_t ref_frame_type = av1_ref_frame_type(best_mbmode.ref_frame);
10766 int16_t mode_ctx = mbmi_ext->mode_context[ref_frame_type];
David Barker68e6e862016-11-24 15:10:15 +000010767 if (mode_ctx & (1 << ALL_ZERO_FLAG_OFFSET)) {
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010768 int_mv zeromv;
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010769 const MV_REFERENCE_FRAME ref = best_mbmode.ref_frame[0];
10770 zeromv.as_int = gm_get_motion_vector(&cm->global_motion[ref],
10771 cm->allow_high_precision_mv, bsize,
10772 mi_col, mi_row, 0
RogerZhou3b635242017-09-19 10:06:46 -070010773#if CONFIG_AMVR
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010774 ,
RogerZhou10a03802017-10-26 11:49:48 -070010775 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010776#endif
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010777 )
10778 .as_int;
Rupert Swarbrick799ff702017-10-04 17:37:52 +010010779 if (best_mbmode.mv[0].as_int == zeromv.as_int) {
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010780 best_mbmode.mode = GLOBALMV;
David Barkercdcac6d2016-12-01 17:04:16 +000010781 }
David Barker68e6e862016-11-24 15:10:15 +000010782 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010783 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010784
10785 if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
10786 rd_cost->rate = INT_MAX;
10787 rd_cost->rdcost = INT64_MAX;
10788 return;
10789 }
10790
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010791 assert((cm->interp_filter == SWITCHABLE) ||
10792 (cm->interp_filter ==
10793 av1_extract_interp_filter(best_mbmode.interp_filters, 0)) ||
10794 !is_inter_block(&best_mbmode));
Yaowu Xuc27fc142016-08-22 16:08:15 -070010795#if CONFIG_DUAL_FILTER
10796 assert((cm->interp_filter == SWITCHABLE) ||
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010797 (cm->interp_filter ==
10798 av1_extract_interp_filter(best_mbmode.interp_filters, 1)) ||
Yaowu Xuc27fc142016-08-22 16:08:15 -070010799 !is_inter_block(&best_mbmode));
Fergus Simpson4063a682017-02-28 16:52:22 -080010800#endif // CONFIG_DUAL_FILTER
Yaowu Xuc27fc142016-08-22 16:08:15 -070010801
10802 if (!cpi->rc.is_src_frame_alt_ref)
Yaowu Xuf883b422016-08-30 14:01:10 -070010803 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
10804 sf->adaptive_rd_thresh, bsize, best_mode_index);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010805
10806 // macroblock modes
10807 *mbmi = best_mbmode;
10808 x->skip |= best_skip2;
10809
Sebastien Alaiwan48795802017-10-30 12:07:13 +010010810 // Note: this section is needed since the mode may have been forced to
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010811 // GLOBALMV by the all-zero mode handling of ref-mv.
10812 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
10813 // Correct the motion mode for GLOBALMV
Sarah Parker0eea89f2017-07-11 11:56:36 -070010814 const MOTION_MODE last_motion_mode_allowed =
Sebastien Alaiwan1f56b8e2017-10-31 17:37:16 +010010815 motion_mode_allowed(0, xd->global_motion, xd, xd->mi[0]);
Sarah Parker19234cc2017-03-10 16:43:25 -080010816 if (mbmi->motion_mode > last_motion_mode_allowed)
10817 mbmi->motion_mode = last_motion_mode_allowed;
Sarah Parker19234cc2017-03-10 16:43:25 -080010818
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010819 // Correct the interpolation filter for GLOBALMV
Yue Chen19e7aa82016-11-30 14:05:39 -080010820 if (is_nontrans_global_motion(xd)) {
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010821 mbmi->interp_filters = av1_broadcast_interp_filter(
10822 av1_unswitchable_filter(cm->interp_filter));
Yue Chen19e7aa82016-11-30 14:05:39 -080010823 }
10824 }
Yue Chen19e7aa82016-11-30 14:05:39 -080010825
Yaowu Xuc27fc142016-08-22 16:08:15 -070010826 for (i = 0; i < 1 + has_second_ref(mbmi); ++i) {
10827 if (mbmi->mode != NEWMV)
10828 mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int;
10829 else
10830 mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int;
10831 }
Yaowu Xuc27fc142016-08-22 16:08:15 -070010832
10833 for (i = 0; i < REFERENCE_MODES; ++i) {
10834 if (best_pred_rd[i] == INT64_MAX)
10835 best_pred_diff[i] = INT_MIN;
10836 else
10837 best_pred_diff[i] = best_rd - best_pred_rd[i];
10838 }
10839
10840 x->skip |= best_mode_skippable;
10841
10842 assert(best_mode_index >= 0);
10843
10844 store_coding_context(x, ctx, best_mode_index, best_pred_diff,
10845 best_mode_skippable);
10846
Urvang Joshic9e71d42017-08-09 18:58:33 -070010847 if (pmi->palette_size[1] > 0) {
10848 assert(try_palette);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010849 restore_uv_color_map(cpi, x);
10850 }
10851}
10852
Urvang Joshi52648442016-10-13 17:27:51 -070010853void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
10854 TileDataEnc *tile_data, MACROBLOCK *x,
David Barker45390c12017-02-20 14:44:40 +000010855 int mi_row, int mi_col,
Angie Chiang2a2a7dd2017-04-25 16:08:47 -070010856 RD_STATS *rd_cost, BLOCK_SIZE bsize,
Yaowu Xuf883b422016-08-30 14:01:10 -070010857 PICK_MODE_CONTEXT *ctx,
10858 int64_t best_rd_so_far) {
Urvang Joshi52648442016-10-13 17:27:51 -070010859 const AV1_COMMON *const cm = &cpi->common;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010860 MACROBLOCKD *const xd = &x->e_mbd;
10861 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
10862 unsigned char segment_id = mbmi->segment_id;
10863 const int comp_pred = 0;
10864 int i;
10865 int64_t best_pred_diff[REFERENCE_MODES];
10866 unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -070010867#if CONFIG_EXT_COMP_REFS
10868 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME][TOTAL_REFS_PER_FRAME];
10869#else
Yaowu Xuc27fc142016-08-22 16:08:15 -070010870 unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME];
Zoe Liuc082bbc2017-05-17 13:31:37 -070010871#endif // CONFIG_EXT_COMP_REFS
Yaowu Xuf883b422016-08-30 14:01:10 -070010872 aom_prob comp_mode_p;
James Zern7b9407a2016-05-18 23:48:05 -070010873 InterpFilter best_filter = SWITCHABLE;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010874 int64_t this_rd = INT64_MAX;
10875 int rate2 = 0;
10876 const int64_t distortion2 = 0;
David Barker45390c12017-02-20 14:44:40 +000010877 (void)mi_row;
10878 (void)mi_col;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010879
Yue Chen170678a2017-10-17 13:43:10 -070010880 estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single,
10881 ref_costs_comp, &comp_mode_p);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010882
10883 for (i = 0; i < TOTAL_REFS_PER_FRAME; ++i) x->pred_sse[i] = INT_MAX;
10884 for (i = LAST_FRAME; i < TOTAL_REFS_PER_FRAME; ++i)
10885 x->pred_mv_sad[i] = INT_MAX;
10886
10887 rd_cost->rate = INT_MAX;
10888
10889 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
10890
10891 mbmi->palette_mode_info.palette_size[0] = 0;
10892 mbmi->palette_mode_info.palette_size[1] = 0;
Urvang Joshib100db72016-10-12 16:28:56 -070010893
hui su5db97432016-10-14 16:10:14 -070010894#if CONFIG_FILTER_INTRA
10895 mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0;
10896 mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0;
10897#endif // CONFIG_FILTER_INTRA
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010898 mbmi->mode = GLOBALMV;
Yue Chencb60b182016-10-13 15:18:22 -070010899 mbmi->motion_mode = SIMPLE_TRANSLATION;
Luc Trudeaud6d9eee2017-07-12 12:36:50 -040010900 mbmi->uv_mode = UV_DC_PRED;
David Barkerd92f3562017-10-09 17:46:23 +010010901 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
10902 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
10903 else
10904 mbmi->ref_frame[0] = LAST_FRAME;
Emil Keyder01770b32017-01-20 18:03:11 -050010905 mbmi->ref_frame[1] = NONE_FRAME;
Sarah Parkere5299862016-08-16 14:57:37 -070010906 mbmi->mv[0].as_int =
Sarah Parkerae7c4582017-02-28 16:30:30 -080010907 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
RogerZhou3b635242017-09-19 10:06:46 -070010908 cm->allow_high_precision_mv, bsize, mi_col, mi_row, 0
10909#if CONFIG_AMVR
10910 ,
RogerZhou10a03802017-10-26 11:49:48 -070010911 cm->cur_frame_force_integer_mv
RogerZhou3b635242017-09-19 10:06:46 -070010912#endif
10913 )
David Barkercdcac6d2016-12-01 17:04:16 +000010914 .as_int;
Jingning Han64088952016-07-11 11:24:24 -070010915 mbmi->tx_size = max_txsize_lookup[bsize];
Yaowu Xuee775b12016-10-18 10:00:21 -070010916 x->skip = 1;
Sarah Parkere5299862016-08-16 14:57:37 -070010917
Yaowu Xuc27fc142016-08-22 16:08:15 -070010918 mbmi->ref_mv_idx = 0;
10919 mbmi->pred_mv[0].as_int = 0;
Yaowu Xuc27fc142016-08-22 16:08:15 -070010920
Yue Chendead17d2017-04-20 16:19:39 -070010921 mbmi->motion_mode = SIMPLE_TRANSLATION;
Yue Chendead17d2017-04-20 16:19:39 -070010922 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
Yue Chenf3e1ead2017-04-21 14:05:51 -070010923 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
10924 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
Yunqing Wang97d6a372017-10-09 14:15:15 -070010925#if CONFIG_EXT_WARPED_MOTION
Yunqing Wang876a8b02017-11-13 17:13:27 -080010926 int pts_mv[SAMPLES_ARRAY_SIZE], pts_wm[SAMPLES_ARRAY_SIZE];
Yunqing Wang1bc82862017-06-28 15:49:48 -070010927 mbmi->num_proj_ref[0] =
Yunqing Wang876a8b02017-11-13 17:13:27 -080010928 findSamples(cm, xd, mi_row, mi_col, pts, pts_inref, pts_mv, pts_wm);
Yunqing Wang1bc82862017-06-28 15:49:48 -070010929 // Rank the samples by motion vector difference
10930 if (mbmi->num_proj_ref[0] > 1)
10931 mbmi->num_proj_ref[0] = sortSamples(pts_mv, &mbmi->mv[0].as_mv, pts,
10932 pts_inref, mbmi->num_proj_ref[0]);
10933#else
Yue Chenf3e1ead2017-04-21 14:05:51 -070010934 mbmi->num_proj_ref[0] = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref);
Yunqing Wang97d6a372017-10-09 14:15:15 -070010935#endif // CONFIG_EXT_WARPED_MOTION
Yue Chenf3e1ead2017-04-21 14:05:51 -070010936 }
Yue Chendead17d2017-04-20 16:19:39 -070010937
Debargha Mukherjee0df711f2017-05-02 16:00:20 -070010938 set_default_interp_filters(mbmi, cm->interp_filter);
10939
10940 if (cm->interp_filter != SWITCHABLE) {
10941 best_filter = cm->interp_filter;
10942 } else {
Yaowu Xuc27fc142016-08-22 16:08:15 -070010943 best_filter = EIGHTTAP_REGULAR;
Debargha Mukherjee0df711f2017-05-02 16:00:20 -070010944 if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) &&
Yaowu Xuc27fc142016-08-22 16:08:15 -070010945 x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
10946 int rs;
10947 int best_rs = INT_MAX;
10948 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010949 mbmi->interp_filters = av1_broadcast_interp_filter(i);
Yue Chenb23d00a2017-07-28 17:01:21 -070010950 rs = av1_get_switchable_rate(cm, x, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010951 if (rs < best_rs) {
10952 best_rs = rs;
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010953 best_filter = av1_extract_interp_filter(mbmi->interp_filters, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010954 }
10955 }
10956 }
10957 }
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010958 // Set the appropriate filter
10959 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
Yue Chenb23d00a2017-07-28 17:01:21 -070010960 rate2 += av1_get_switchable_rate(cm, x, xd);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010961
10962 if (cm->reference_mode == REFERENCE_MODE_SELECT)
Yaowu Xuf883b422016-08-30 14:01:10 -070010963 rate2 += av1_cost_bit(comp_mode_p, comp_pred);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010964
10965 // Estimate the reference frame signaling cost and add it
10966 // to the rolling cost variable.
10967 rate2 += ref_costs_single[LAST_FRAME];
Urvang Joshi70006e42017-06-14 16:08:55 -070010968 this_rd = RDCOST(x->rdmult, rate2, distortion2);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010969
10970 rd_cost->rate = rate2;
10971 rd_cost->dist = distortion2;
10972 rd_cost->rdcost = this_rd;
Yushin Cho8e75e8b2017-09-12 16:33:28 -070010973
Yaowu Xuc27fc142016-08-22 16:08:15 -070010974 if (this_rd >= best_rd_so_far) {
10975 rd_cost->rate = INT_MAX;
10976 rd_cost->rdcost = INT64_MAX;
10977 return;
10978 }
10979
Yaowu Xuc27fc142016-08-22 16:08:15 -070010980 assert((cm->interp_filter == SWITCHABLE) ||
Rupert Swarbrick27e90292017-09-28 17:46:50 +010010981 (cm->interp_filter ==
10982 av1_extract_interp_filter(mbmi->interp_filters, 0)));
Yaowu Xuc27fc142016-08-22 16:08:15 -070010983
Yaowu Xuf883b422016-08-30 14:01:10 -070010984 av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact,
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010985 cpi->sf.adaptive_rd_thresh, bsize, THR_GLOBALMV);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010986
Yaowu Xuf883b422016-08-30 14:01:10 -070010987 av1_zero(best_pred_diff);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010988
Sarah Parker2b9ec2e2017-10-30 17:34:08 -070010989 store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0);
Yaowu Xuc27fc142016-08-22 16:08:15 -070010990}
10991
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010010992struct calc_target_weighted_pred_ctxt {
10993 const MACROBLOCK *x;
10994 const uint8_t *tmp;
10995 int tmp_stride;
10996 int overlap;
10997};
10998
10999static INLINE void calc_target_weighted_pred_above(MACROBLOCKD *xd,
11000 int rel_mi_col,
11001 uint8_t nb_mi_width,
11002 MODE_INFO *nb_mi,
11003 void *fun_ctxt) {
11004 (void)nb_mi;
11005
11006 struct calc_target_weighted_pred_ctxt *ctxt =
11007 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
11008
11009#if CONFIG_HIGHBITDEPTH
11010 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
11011#else
11012 const int is_hbd = 0;
11013#endif // CONFIG_HIGHBITDEPTH
11014
11015 const int bw = xd->n8_w << MI_SIZE_LOG2;
11016 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
11017
11018 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE);
11019 int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE);
11020 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
11021
11022 if (!is_hbd) {
11023 for (int row = 0; row < ctxt->overlap; ++row) {
11024 const uint8_t m0 = mask1d[row];
11025 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
11026 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
11027 wsrc[col] = m1 * tmp[col];
11028 mask[col] = m0;
11029 }
11030 wsrc += bw;
11031 mask += bw;
11032 tmp += ctxt->tmp_stride;
11033 }
11034#if CONFIG_HIGHBITDEPTH
11035 } else {
11036 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
11037
11038 for (int row = 0; row < ctxt->overlap; ++row) {
11039 const uint8_t m0 = mask1d[row];
11040 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
11041 for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) {
11042 wsrc[col] = m1 * tmp16[col];
11043 mask[col] = m0;
11044 }
11045 wsrc += bw;
11046 mask += bw;
11047 tmp16 += ctxt->tmp_stride;
11048 }
11049#endif // CONFIG_HIGHBITDEPTH
11050 }
11051}
11052
11053static INLINE void calc_target_weighted_pred_left(MACROBLOCKD *xd,
11054 int rel_mi_row,
11055 uint8_t nb_mi_height,
11056 MODE_INFO *nb_mi,
11057 void *fun_ctxt) {
11058 (void)nb_mi;
11059
11060 struct calc_target_weighted_pred_ctxt *ctxt =
11061 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
11062
11063#if CONFIG_HIGHBITDEPTH
11064 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
11065#else
11066 const int is_hbd = 0;
11067#endif // CONFIG_HIGHBITDEPTH
11068
11069 const int bw = xd->n8_w << MI_SIZE_LOG2;
11070 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
11071
11072 int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw);
11073 int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw);
11074 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
11075
11076 if (!is_hbd) {
11077 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
11078 for (int col = 0; col < ctxt->overlap; ++col) {
11079 const uint8_t m0 = mask1d[col];
11080 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
11081 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
11082 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
11083 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
11084 }
11085 wsrc += bw;
11086 mask += bw;
11087 tmp += ctxt->tmp_stride;
11088 }
11089#if CONFIG_HIGHBITDEPTH
11090 } else {
11091 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
11092
11093 for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) {
11094 for (int col = 0; col < ctxt->overlap; ++col) {
11095 const uint8_t m0 = mask1d[col];
11096 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
11097 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
11098 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
11099 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
11100 }
11101 wsrc += bw;
11102 mask += bw;
11103 tmp16 += ctxt->tmp_stride;
11104 }
11105#endif // CONFIG_HIGHBITDEPTH
11106 }
11107}
11108
Yaowu Xuf883b422016-08-30 14:01:10 -070011109// This function has a structure similar to av1_build_obmc_inter_prediction
Yaowu Xuc27fc142016-08-22 16:08:15 -070011110//
11111// The OBMC predictor is computed as:
11112//
11113// PObmc(x,y) =
Yaowu Xuf883b422016-08-30 14:01:10 -070011114// AOM_BLEND_A64(Mh(x),
11115// AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
Yaowu Xuc27fc142016-08-22 16:08:15 -070011116// PLeft(x, y))
11117//
Yaowu Xuf883b422016-08-30 14:01:10 -070011118// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
Yaowu Xuc27fc142016-08-22 16:08:15 -070011119// rounding, this can be written as:
11120//
Yaowu Xuf883b422016-08-30 14:01:10 -070011121// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
Yaowu Xuc27fc142016-08-22 16:08:15 -070011122// Mh(x) * Mv(y) * P(x,y) +
11123// Mh(x) * Cv(y) * Pabove(x,y) +
Yaowu Xuf883b422016-08-30 14:01:10 -070011124// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011125//
11126// Where :
11127//
Yaowu Xuf883b422016-08-30 14:01:10 -070011128// Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
11129// Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011130//
11131// This function computes 'wsrc' and 'mask' as:
11132//
11133// wsrc(x, y) =
Yaowu Xuf883b422016-08-30 14:01:10 -070011134// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
Yaowu Xuc27fc142016-08-22 16:08:15 -070011135// Mh(x) * Cv(y) * Pabove(x,y) +
Yaowu Xuf883b422016-08-30 14:01:10 -070011136// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011137//
11138// mask(x, y) = Mh(x) * Mv(y)
11139//
11140// These can then be used to efficiently approximate the error for any
11141// predictor P in the context of the provided neighbouring predictors by
11142// computing:
11143//
11144// error(x, y) =
Yaowu Xuf883b422016-08-30 14:01:10 -070011145// wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
Yaowu Xuc27fc142016-08-22 16:08:15 -070011146//
Yaowu Xuf883b422016-08-30 14:01:10 -070011147static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x,
Yaowu Xuc27fc142016-08-22 16:08:15 -070011148 const MACROBLOCKD *xd, int mi_row,
11149 int mi_col, const uint8_t *above,
11150 int above_stride, const uint8_t *left,
Yue Chene9638cc2016-10-10 12:37:54 -070011151 int left_stride) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070011152 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
Jingning Hanff6ee6a2016-12-07 09:55:21 -080011153 const int bw = xd->n8_w << MI_SIZE_LOG2;
11154 const int bh = xd->n8_h << MI_SIZE_LOG2;
Yue Chene9638cc2016-10-10 12:37:54 -070011155 int32_t *mask_buf = x->mask_buf;
11156 int32_t *wsrc_buf = x->wsrc_buf;
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011157
Yaowu Xuf883b422016-08-30 14:01:10 -070011158 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020011159#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070011160 const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
11161#else
11162 const int is_hbd = 0;
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020011163#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070011164
11165 // plane 0 should not be subsampled
11166 assert(xd->plane[0].subsampling_x == 0);
11167 assert(xd->plane[0].subsampling_y == 0);
11168
Yaowu Xuf883b422016-08-30 14:01:10 -070011169 av1_zero_array(wsrc_buf, bw * bh);
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011170 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011171
11172 // handle above row
11173 if (xd->up_available) {
Jingning Han440d4252017-07-24 14:07:34 -070011174 const int overlap =
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011175 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
11176 struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride,
11177 overlap };
11178 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, mi_col,
11179 max_neighbor_obmc[b_width_log2_lookup[bsize]],
11180 calc_target_weighted_pred_above, &ctxt);
Yaowu Xuc27fc142016-08-22 16:08:15 -070011181 }
11182
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011183 for (int i = 0; i < bw * bh; ++i) {
Yaowu Xuf883b422016-08-30 14:01:10 -070011184 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
11185 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011186 }
11187
11188 // handle left column
11189 if (xd->left_available) {
Jingning Han440d4252017-07-24 14:07:34 -070011190 const int overlap =
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011191 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
11192 struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride,
11193 overlap };
11194 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, mi_row,
11195 max_neighbor_obmc[b_height_log2_lookup[bsize]],
11196 calc_target_weighted_pred_left, &ctxt);
Yaowu Xuc27fc142016-08-22 16:08:15 -070011197 }
11198
11199 if (!is_hbd) {
11200 const uint8_t *src = x->plane[0].src.buf;
11201
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011202 for (int row = 0; row < bh; ++row) {
11203 for (int col = 0; col < bw; ++col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070011204 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
11205 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011206 wsrc_buf += bw;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011207 src += x->plane[0].src.stride;
11208 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020011209#if CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070011210 } else {
11211 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
11212
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011213 for (int row = 0; row < bh; ++row) {
11214 for (int col = 0; col < bw; ++col) {
Yaowu Xuc27fc142016-08-22 16:08:15 -070011215 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
11216 }
Rupert Swarbrickc0cea7f2017-08-22 14:06:56 +010011217 wsrc_buf += bw;
Yaowu Xuc27fc142016-08-22 16:08:15 -070011218 src += x->plane[0].src.stride;
11219 }
Sebastien Alaiwan71e87842017-04-12 16:03:28 +020011220#endif // CONFIG_HIGHBITDEPTH
Yaowu Xuc27fc142016-08-22 16:08:15 -070011221 }
11222}