blob: 8463ba147839d48224e7d1b54e756adc96687bba [file] [log] [blame]
kyslove3c05a82019-03-28 11:06:09 -07001/*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
11 */
12
13#include <assert.h>
14#include <limits.h>
15#include <math.h>
16#include <stdio.h>
17
18#include "config/aom_dsp_rtcd.h"
19#include "config/av1_rtcd.h"
20
21#include "aom_dsp/aom_dsp_common.h"
22#include "aom_dsp/blend.h"
23#include "aom_mem/aom_mem.h"
24#include "aom_ports/aom_timer.h"
25#include "aom_ports/mem.h"
26#include "aom_ports/system_state.h"
27
28#include "av1/common/mvref_common.h"
29#include "av1/common/pred_common.h"
30#include "av1/common/reconinter.h"
kyslov82449d12019-05-02 13:36:50 -070031#include "av1/common/reconintra.h"
kyslove3c05a82019-03-28 11:06:09 -070032
33#include "av1/encoder/encodemv.h"
34#include "av1/encoder/rdopt.h"
35#include "av1/encoder/reconinter_enc.h"
36
Fyodor Kyslov1951ed52019-05-21 16:14:19 -070037#define _TMP_USE_CURVFIT_ 0
38
kyslove3c05a82019-03-28 11:06:09 -070039extern int g_pick_inter_mode_cnt;
40typedef struct {
41 uint8_t *data;
42 int stride;
43 int in_use;
44} PRED_BUFFER;
45
46typedef struct {
47 PRED_BUFFER *best_pred;
48 PREDICTION_MODE best_mode;
49 TX_SIZE best_tx_size;
50 TX_SIZE best_intra_tx_size;
51 MV_REFERENCE_FRAME best_ref_frame;
52 MV_REFERENCE_FRAME best_second_ref_frame;
53 uint8_t best_mode_skip_txfm;
54 InterpFilters best_pred_filter;
55} BEST_PICKMODE;
56
57typedef struct {
58 MV_REFERENCE_FRAME ref_frame;
59 PREDICTION_MODE pred_mode;
60} REF_MODE;
61
62#define RT_INTER_MODES 9
63static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
64 { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV },
65 { LAST_FRAME, NEWMV }, { GOLDEN_FRAME, NEARESTMV },
66 { GOLDEN_FRAME, NEARMV }, { GOLDEN_FRAME, NEWMV },
67 { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV },
68 { ALTREF_FRAME, NEWMV }
69};
70
71static const THR_MODES mode_idx[REF_FRAMES][4] = {
72 { THR_DC, THR_V_PRED, THR_H_PRED, THR_SMOOTH },
73 { THR_NEARESTMV, THR_NEARMV, THR_GLOBALMV, THR_NEWMV },
74 { THR_NEARESTG, THR_NEARG, THR_GLOBALMV, THR_NEWG },
75 { THR_NEARESTA, THR_NEARA, THR_GLOBALMV, THR_NEWA },
76};
77
kyslov82449d12019-05-02 13:36:50 -070078static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED,
79 SMOOTH_PRED };
80
81static INLINE int mode_offset(const PREDICTION_MODE mode) {
82 if (mode >= NEARESTMV) {
83 return INTER_OFFSET(mode);
84 } else {
85 switch (mode) {
86 case DC_PRED: return 0;
87 case V_PRED: return 1;
88 case H_PRED: return 2;
89 case SMOOTH_PRED: return 3;
90 default: assert(0); return -1;
91 }
92 }
93}
94
kyslove3c05a82019-03-28 11:06:09 -070095typedef struct {
96 PREDICTION_MODE mode;
97 MV_REFERENCE_FRAME ref_frame[2];
98} MODE_DEFINITION;
99
kyslove3c05a82019-03-28 11:06:09 -0700100enum {
101 // INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
102 INTER_NEAREST = (1 << NEARESTMV),
103 INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
104 INTER_NEAREST_NEAR = (1 << NEARESTMV) | (1 << NEARMV),
105 INTER_NEAR_NEW = (1 << NEARMV) | (1 << NEWMV),
106};
107
108static INLINE void init_best_pickmode(BEST_PICKMODE *bp) {
109 bp->best_mode = NEARESTMV;
110 bp->best_ref_frame = LAST_FRAME;
kyslovbab94fd2019-05-03 11:25:17 -0700111 bp->best_tx_size = TX_8X8;
112 bp->best_intra_tx_size = TX_8X8;
Sachin Kumar Garg22258532019-06-18 16:45:06 +0530113 bp->best_pred_filter = av1_broadcast_interp_filter(EIGHTTAP_REGULAR);
kyslove3c05a82019-03-28 11:06:09 -0700114 bp->best_mode_skip_txfm = 0;
115 bp->best_second_ref_frame = NONE_FRAME;
116 bp->best_pred = NULL;
117}
118
119static int combined_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
120 BLOCK_SIZE bsize, int mi_row, int mi_col,
121 int_mv *tmp_mv, int *rate_mv,
122 int64_t best_rd_sofar, int use_base_mv) {
123 MACROBLOCKD *xd = &x->e_mbd;
124 const AV1_COMMON *cm = &cpi->common;
125 const int num_planes = av1_num_planes(cm);
126 MB_MODE_INFO *mi = xd->mi[0];
127 struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } };
kyslov82449d12019-05-02 13:36:50 -0700128 int step_param = cpi->mv_step_param;
kyslove3c05a82019-03-28 11:06:09 -0700129 const int sadpb = x->sadperbit16;
130 MV mvp_full;
131 const int ref = mi->ref_frame[0];
132 const MV ref_mv = av1_get_ref_mv(x, mi->ref_mv_idx).as_mv;
133 MV center_mv;
134 int dis;
135 const MvLimits tmp_mv_limits = x->mv_limits;
136 int rv = 0;
137 int cost_list[5];
138 int search_subpel = 1;
139 const YV12_BUFFER_CONFIG *scaled_ref_frame =
140 av1_get_scaled_ref_frame(cpi, ref);
141
kyslove3c05a82019-03-28 11:06:09 -0700142 if (scaled_ref_frame) {
143 int i;
144 // Swap out the reference frame for a version that's been scaled to
145 // match the resolution of the current frame, allowing the existing
146 // motion search code to be used without additional modifications.
147 for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
148 av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL,
149 num_planes);
150 }
151 av1_set_mv_search_range(&x->mv_limits, &ref_mv);
152
153 mvp_full = ref_mv;
154
155 mvp_full.col >>= 3;
156 mvp_full.row >>= 3;
157
158 if (!use_base_mv)
159 center_mv = ref_mv;
160 else
161 center_mv = tmp_mv->as_mv;
162
163 av1_full_pixel_search(
164 cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0, sadpb,
165 cond_cost_list(cpi, cost_list), &center_mv, INT_MAX, 0,
166 (MI_SIZE * mi_col), (MI_SIZE * mi_row), 0, &cpi->ss_cfg[SS_CFG_SRC]);
167
168 x->mv_limits = tmp_mv_limits;
169 *tmp_mv = x->best_mv;
170 // calculate the bit cost on motion vector
171 mvp_full.row = tmp_mv->as_mv.row * 8;
172 mvp_full.col = tmp_mv->as_mv.col * 8;
173
174 *rate_mv = av1_mv_bit_cost(&mvp_full, &ref_mv, x->nmv_vec_cost,
175 x->mv_cost_stack, MV_COST_WEIGHT);
176
177 // TODO(kyslov) Account for Rate Mode!
178 rv = !(RDCOST(x->rdmult, (*rate_mv), 0) > best_rd_sofar);
179
180 if (rv && search_subpel) {
181 SUBPEL_FORCE_STOP subpel_force_stop = cpi->sf.mv.subpel_force_stop;
182 cpi->find_fractional_mv_step(
183 x, cm, mi_row, mi_col, &ref_mv, cpi->common.allow_high_precision_mv,
184 x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
185 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
186 x->nmv_vec_cost, x->mv_cost_stack, &dis, &x->pred_sse[ref], NULL, NULL,
187 0, 0, 0, 0, 0, 1);
188 *tmp_mv = x->best_mv;
189 *rate_mv = av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmv_vec_cost,
190 x->mv_cost_stack, MV_COST_WEIGHT);
191 }
192
193 if (scaled_ref_frame) {
194 int i;
195 for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
196 }
197 return rv;
198}
199
200static int search_new_mv(AV1_COMP *cpi, MACROBLOCK *x,
201 int_mv frame_mv[][REF_FRAMES],
202 MV_REFERENCE_FRAME ref_frame, int gf_temporal_ref,
203 BLOCK_SIZE bsize, int mi_row, int mi_col,
204 int best_pred_sad, int *rate_mv,
205 int64_t best_sse_sofar, RD_STATS *best_rdc) {
206 MACROBLOCKD *const xd = &x->e_mbd;
207 MB_MODE_INFO *const mi = xd->mi[0];
208 AV1_COMMON *cm = &cpi->common;
209 (void)best_sse_sofar;
210 if (ref_frame > LAST_FRAME && gf_temporal_ref &&
211 cpi->oxcf.rc_mode == AOM_CBR) {
212 int tmp_sad;
213 int dis;
214 int cost_list[5] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX };
215
216 if (bsize < BLOCK_16X16) return -1;
217
218 tmp_sad = av1_int_pro_motion_estimation(
219 cpi, x, bsize, mi_row, mi_col,
220 &x->mbmi_ext->ref_mv_stack[ref_frame][0].this_mv.as_mv);
221
222 if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) return -1;
223 if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad) return -1;
224
225 frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int;
226 MV ref_mv = av1_get_ref_mv(x, 0).as_mv;
227
228 *rate_mv =
229 av1_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv, &ref_mv,
230 x->nmv_vec_cost, x->mv_cost_stack, MV_COST_WEIGHT);
231 frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
232 frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
233
234 cpi->find_fractional_mv_step(
235 x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv,
236 x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
237 cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
238 x->nmv_vec_cost, x->mv_cost_stack, &dis, &x->pred_sse[ref_frame], NULL,
239 NULL, 0, 0, 0, 0, 0, 1);
240 } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
241 &frame_mv[NEWMV][ref_frame], rate_mv,
242 best_rdc->rdcost, 0)) {
243 return -1;
244 }
245
246 return 0;
247}
248
249static INLINE void find_predictors(
250 AV1_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
251 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES], int const_motion[REF_FRAMES],
252 int *ref_frame_skip_mask, const int flag_list[4], TileDataEnc *tile_data,
253 int mi_row, int mi_col, struct buf_2d yv12_mb[4][MAX_MB_PLANE],
254 BLOCK_SIZE bsize, int force_skip_low_temp_var, int comp_pred_allowed) {
255 AV1_COMMON *const cm = &cpi->common;
256 MACROBLOCKD *const xd = &x->e_mbd;
257 MB_MODE_INFO *const mbmi = xd->mi[0];
258 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
259 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
260 const int num_planes = av1_num_planes(cm);
261 (void)tile_data;
262 (void)const_motion;
263 (void)comp_pred_allowed;
264
265 x->pred_mv_sad[ref_frame] = INT_MAX;
266 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
267 // TODO(kyslov) this needs various further optimizations. to be continued..
268 if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) {
269 const struct scale_factors *const sf =
270 get_ref_scale_factors_const(cm, ref_frame);
271 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf,
272 num_planes);
273 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
Remyab998d072019-04-11 16:44:25 +0530274 mbmi_ext->ref_mv_stack, mbmi_ext->weight, NULL,
275 mbmi_ext->global_mvs, mi_row, mi_col,
276 mbmi_ext->mode_context);
kyslove3c05a82019-03-28 11:06:09 -0700277 av1_find_best_ref_mvs_from_stack(cm->allow_high_precision_mv, mbmi_ext,
278 ref_frame, &frame_mv[NEARESTMV][ref_frame],
279 &frame_mv[NEARMV][ref_frame], 0);
280 // Early exit for golden frame if force_skip_low_temp_var is set.
281 if (!av1_is_scaled(sf) && bsize >= BLOCK_8X8 &&
282 !(force_skip_low_temp_var && ref_frame == GOLDEN_FRAME)) {
283 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
284 bsize);
285 }
286 } else {
287 *ref_frame_skip_mask |= (1 << ref_frame);
288 }
289 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
290 mbmi->num_proj_ref = 1;
291}
292
Jerome Jiang906a94f2019-05-01 19:02:58 -0700293static void estimate_single_ref_frame_costs(const AV1_COMMON *cm,
294 const MACROBLOCKD *xd,
295 const MACROBLOCK *x, int segment_id,
296 unsigned int *ref_costs_single) {
kyslove3c05a82019-03-28 11:06:09 -0700297 int seg_ref_active =
298 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
299 if (seg_ref_active) {
300 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
kyslove3c05a82019-03-28 11:06:09 -0700301 } else {
302 int intra_inter_ctx = av1_get_intra_inter_context(xd);
303 ref_costs_single[INTRA_FRAME] = x->intra_inter_cost[intra_inter_ctx][0];
304 unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1];
305
306 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
307 ref_costs_single[i] = base_cost;
308
309 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
310 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
311 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
312 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
313 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
314 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
315
316 // Determine cost of a single ref frame, where frame types are represented
317 // by a tree:
318 // Level 0: add cost whether this ref is a forward or backward ref
319 ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p1][0][0];
320 ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p1][0][0];
321 ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p1][0][0];
322 ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p1][0][0];
323 ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p1][0][1];
324 ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p1][0][1];
325 ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p1][0][1];
326
327 // Level 1: if this ref is forward ref,
328 // add cost whether it is last/last2 or last3/golden
329 ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p3][2][0];
330 ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p3][2][0];
331 ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p3][2][1];
332 ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p3][2][1];
333
334 // Level 1: if this ref is backward ref
335 // then add cost whether this ref is altref or backward ref
336 ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p2][1][0];
337 ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p2][1][0];
338 ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p2][1][1];
339
340 // Level 2: further add cost whether this ref is last or last2
341 ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p4][3][0];
342 ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p4][3][1];
343
344 // Level 2: last3 or golden
345 ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p5][4][0];
346 ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p5][4][1];
347
348 // Level 2: bwdref or altref2
349 ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p6][5][0];
350 ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p6][5][1];
kyslove3c05a82019-03-28 11:06:09 -0700351 }
352}
353
Jerome Jiang037f5d22019-05-02 19:32:42 -0700354static void estimate_comp_ref_frame_costs(
355 const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x,
356 int segment_id, unsigned int (*ref_costs_comp)[REF_FRAMES]) {
357 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
358 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
359 memset(ref_costs_comp[ref_frame], 0,
360 REF_FRAMES * sizeof((*ref_costs_comp)[0]));
361 } else {
362 int intra_inter_ctx = av1_get_intra_inter_context(xd);
363 unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1];
364
365 if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
366 // Similar to single ref, determine cost of compound ref frames.
367 // cost_compound_refs = cost_first_ref + cost_second_ref
368 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
369 const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
370 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
371 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
372 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
373
374 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
375 unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
376
377 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
378 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
379 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][1];
380 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
381 ref_bicomp_costs[ALTREF_FRAME] = 0;
382
383 // cost of first ref frame
384 ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
385 ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0];
386 ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
387 ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1];
388
389 ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][0];
390 ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][1];
391
392 ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][0];
393 ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][1];
394
395 // cost of second ref frame
396 ref_bicomp_costs[BWDREF_FRAME] +=
397 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
398 ref_bicomp_costs[ALTREF2_FRAME] +=
399 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
400 ref_bicomp_costs[ALTREF_FRAME] +=
401 x->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
402
403 ref_bicomp_costs[BWDREF_FRAME] +=
404 x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
405 ref_bicomp_costs[ALTREF2_FRAME] +=
406 x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
407
408 // cost: if one ref frame is forward ref, the other ref is backward ref
409 for (int ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
410 for (int ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
411 ref_costs_comp[ref0][ref1] =
412 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
413 }
414 }
415
416 // cost: if both ref frames are the same side.
417 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
418 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
419 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
420 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
421 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
422 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
423 x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
424 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
425 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
426 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
427 x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
428 x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
429 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
430 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
431 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
432 x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
433 x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
434 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
435 base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] +
436 x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
437 } else {
438 for (int ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
439 for (int ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
440 ref_costs_comp[ref0][ref1] = 512;
441 }
442 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
443 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
444 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
445 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
446 }
447 }
448}
449
Fyodor Kyslov1951ed52019-05-21 16:14:19 -0700450#if _TMP_USE_CURVFIT_
451static void model_rd_with_curvfit(const AV1_COMP *const cpi,
452 const MACROBLOCK *const x,
453 BLOCK_SIZE plane_bsize, int plane,
454 int64_t sse, int num_samples, int *rate,
455 int64_t *dist) {
kyslove3c05a82019-03-28 11:06:09 -0700456 (void)cpi;
Fyodor Kyslov1951ed52019-05-21 16:14:19 -0700457 (void)plane_bsize;
kyslove3c05a82019-03-28 11:06:09 -0700458 const MACROBLOCKD *const xd = &x->e_mbd;
Ravi Chaudharyfb237132019-06-06 15:50:36 +0530459 const struct macroblock_plane *const p = &x->plane[plane];
kyslove3c05a82019-03-28 11:06:09 -0700460 const int dequant_shift = (is_cur_buf_hbd(xd)) ? xd->bd - 5 : 3;
Ravi Chaudharyfb237132019-06-06 15:50:36 +0530461 const int qstep = AOMMAX(p->dequant_QTX[1] >> dequant_shift, 1);
kyslove3c05a82019-03-28 11:06:09 -0700462
Fyodor Kyslov1951ed52019-05-21 16:14:19 -0700463 if (sse == 0) {
464 if (rate) *rate = 0;
465 if (dist) *dist = 0;
466 return;
467 }
468 aom_clear_system_state();
469 const double sse_norm = (double)sse / num_samples;
470 const double qstepsqr = (double)qstep * qstep;
471 const double xqr = log2(sse_norm / qstepsqr);
472
473 double rate_f, dist_by_sse_norm_f;
474 av1_model_rd_curvfit(plane_bsize, sse_norm, xqr, &rate_f,
475 &dist_by_sse_norm_f);
476
477 const double dist_f = dist_by_sse_norm_f * sse_norm;
478 int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5);
479 int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5);
480 aom_clear_system_state();
481
482 // Check if skip is better
483 if (rate_i == 0) {
484 dist_i = sse << 4;
485 } else if (RDCOST(x->rdmult, rate_i, dist_i) >=
486 RDCOST(x->rdmult, 0, sse << 4)) {
487 rate_i = 0;
488 dist_i = sse << 4;
489 }
490
491 if (rate) *rate = rate_i;
492 if (dist) *dist = dist_i;
kyslove3c05a82019-03-28 11:06:09 -0700493}
Fyodor Kyslov1951ed52019-05-21 16:14:19 -0700494#endif
kyslove3c05a82019-03-28 11:06:09 -0700495
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -0700496static TX_SIZE calculate_tx_size(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
497 MACROBLOCKD *const xd, unsigned int var,
498 unsigned int sse) {
499 TX_SIZE tx_size;
500 if (cpi->common.tx_mode == TX_MODE_SELECT) {
501 if (sse > (var << 2))
502 tx_size = AOMMIN(max_txsize_lookup[bsize],
503 tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
504 else
505 tx_size = TX_8X8;
506
507 if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
508 cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
509 tx_size = TX_8X8;
510 else if (tx_size > TX_16X16)
511 tx_size = TX_16X16;
512 } else {
513 tx_size = AOMMIN(max_txsize_lookup[bsize],
514 tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
515 }
516 if (bsize > BLOCK_32X32) tx_size = TX_16X16;
517 return AOMMIN(tx_size, TX_16X16);
518}
519
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -0700520static const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2,
521 2, 2, 3, 3, 3, 4,
522 4, 4, 5, 5 };
523static const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1,
524 2, 3, 2, 3, 4, 3,
525 4, 5, 4, 5 };
526
527static void block_variance(const uint8_t *src, int src_stride,
528 const uint8_t *ref, int ref_stride, int w, int h,
529 unsigned int *sse, int *sum, int block_size,
530 uint32_t *sse8x8, int *sum8x8, uint32_t *var8x8) {
531 int i, j, k = 0;
532
533 *sse = 0;
534 *sum = 0;
535
536 for (i = 0; i < h; i += block_size) {
537 for (j = 0; j < w; j += block_size) {
538 aom_get8x8var(src + src_stride * i + j, src_stride,
539 ref + ref_stride * i + j, ref_stride, &sse8x8[k],
540 &sum8x8[k]);
541 *sse += sse8x8[k];
542 *sum += sum8x8[k];
543 var8x8[k] = sse8x8[k] - (uint32_t)(((int64_t)sum8x8[k] * sum8x8[k]) >> 6);
544 k++;
545 }
546 }
547}
548
549static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
550 unsigned int *sse_i, int *sum_i,
551 unsigned int *var_o, unsigned int *sse_o,
552 int *sum_o) {
553 const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
554 const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
555 const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
556 int i, j, k = 0;
557
558 for (i = 0; i < nh; i += 2) {
559 for (j = 0; j < nw; j += 2) {
560 sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
561 sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
562 sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
563 sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
564 var_o[k] = sse_o[k] - (uint32_t)(((int64_t)sum_o[k] * sum_o[k]) >>
565 (b_width_log2_lookup[unit_size] +
566 b_height_log2_lookup[unit_size] + 6));
567 k++;
568 }
569 }
570}
571
572// Adjust the ac_thr according to speed, width, height and normalized sum
573static int ac_thr_factor(const int speed, const int width, const int height,
574 const int norm_sum) {
575 if (speed >= 8 && norm_sum < 5) {
576 if (width <= 640 && height <= 480)
577 return 4;
578 else
579 return 2;
580 }
581 return 1;
582}
583
584static void model_skip_for_sb_y_large(AV1_COMP *cpi, BLOCK_SIZE bsize,
585 MACROBLOCK *x, MACROBLOCKD *xd,
586 unsigned int *var_y, unsigned int *sse_y,
587 int *early_term) {
588 // Note our transform coeffs are 8 times an orthogonal transform.
589 // Hence quantizer step is also 8 times. To get effective quantizer
590 // we need to divide by 8 before sending to modeling function.
591 unsigned int sse;
592 struct macroblock_plane *const p = &x->plane[0];
593 struct macroblockd_plane *const pd = &xd->plane[0];
Ravi Chaudharyfb237132019-06-06 15:50:36 +0530594 const uint32_t dc_quant = p->dequant_QTX[0];
595 const uint32_t ac_quant = p->dequant_QTX[1];
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -0700596 const int64_t dc_thr = dc_quant * dc_quant >> 6;
597 int64_t ac_thr = ac_quant * ac_quant >> 6;
598 unsigned int var;
599 int sum;
600
601 const int bw = b_width_log2_lookup[bsize];
602 const int bh = b_height_log2_lookup[bsize];
603 const int num8x8 = 1 << (bw + bh - 2);
604 unsigned int sse8x8[256] = { 0 };
605 int sum8x8[256] = { 0 };
606 unsigned int var8x8[256] = { 0 };
607 TX_SIZE tx_size;
608 int k;
609 // Calculate variance for whole partition, and also save 8x8 blocks' variance
610 // to be used in following transform skipping test.
611 block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
612 4 << bw, 4 << bh, &sse, &sum, 8, sse8x8, sum8x8, var8x8);
613 var = sse - (unsigned int)(((int64_t)sum * sum) >> (bw + bh + 4));
614
615 *var_y = var;
616 *sse_y = sse;
617
618 ac_thr *= ac_thr_factor(cpi->oxcf.speed, cpi->common.width,
619 cpi->common.height, abs(sum) >> (bw + bh));
620
621 tx_size = calculate_tx_size(cpi, bsize, xd, var, sse);
622 // The code below for setting skip flag assumes tranform size of at least 8x8,
623 // so force this lower limit on transform.
624 if (tx_size < TX_8X8) tx_size = TX_8X8;
625 xd->mi[0]->tx_size = tx_size;
626
627 // Evaluate if the partition block is a skippable block in Y plane.
628 {
629 unsigned int sse16x16[64] = { 0 };
630 int sum16x16[64] = { 0 };
631 unsigned int var16x16[64] = { 0 };
632 const int num16x16 = num8x8 >> 2;
633
634 unsigned int sse32x32[16] = { 0 };
635 int sum32x32[16] = { 0 };
636 unsigned int var32x32[16] = { 0 };
637 const int num32x32 = num8x8 >> 4;
638
639 int ac_test = 1;
640 int dc_test = 1;
641 const int num = (tx_size == TX_8X8)
642 ? num8x8
643 : ((tx_size == TX_16X16) ? num16x16 : num32x32);
644 const unsigned int *sse_tx =
645 (tx_size == TX_8X8) ? sse8x8
646 : ((tx_size == TX_16X16) ? sse16x16 : sse32x32);
647 const unsigned int *var_tx =
648 (tx_size == TX_8X8) ? var8x8
649 : ((tx_size == TX_16X16) ? var16x16 : var32x32);
650
651 // Calculate variance if tx_size > TX_8X8
652 if (tx_size >= TX_16X16)
653 calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
654 sum16x16);
655 if (tx_size == TX_32X32)
656 calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32,
657 sse32x32, sum32x32);
658
659 // Skipping test
660 *early_term = 0;
661 for (k = 0; k < num; k++)
662 // Check if all ac coefficients can be quantized to zero.
663 if (!(var_tx[k] < ac_thr || var == 0)) {
664 ac_test = 0;
665 break;
666 }
667
668 for (k = 0; k < num; k++)
669 // Check if dc coefficient can be quantized to zero.
670 if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
671 dc_test = 0;
672 break;
673 }
674
675 if (ac_test && dc_test) {
676 *early_term = 1;
677 }
678 }
679}
680
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -0700681static void model_rd_for_sb_y(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
682 MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum,
683 int64_t *out_dist_sum, int *skip_txfm_sb,
684 int64_t *skip_sse_sb, unsigned int *var_y,
685 unsigned int *sse_y) {
kyslove3c05a82019-03-28 11:06:09 -0700686 // Note our transform coeffs are 8 times an orthogonal transform.
687 // Hence quantizer step is also 8 times. To get effective quantizer
688 // we need to divide by 8 before sending to modeling function.
kyslove3c05a82019-03-28 11:06:09 -0700689 const int ref = xd->mi[0]->ref_frame[0];
690
kyslove7ff3b62019-04-05 14:15:03 -0700691 assert(bsize < BLOCK_SIZES_ALL);
kyslove3c05a82019-03-28 11:06:09 -0700692
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -0700693 struct macroblock_plane *const p = &x->plane[0];
694 struct macroblockd_plane *const pd = &xd->plane[0];
695 unsigned int sse;
696 int rate;
697 int64_t dist;
kyslove3c05a82019-03-28 11:06:09 -0700698
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -0700699 unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
700 pd->dst.buf, pd->dst.stride, &sse);
701 xd->mi[0]->tx_size = calculate_tx_size(cpi, bsize, xd, var, sse);
kyslove3c05a82019-03-28 11:06:09 -0700702
Fyodor Kyslov1951ed52019-05-21 16:14:19 -0700703#if _TMP_USE_CURVFIT_
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -0700704 model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist);
Fyodor Kyslov1951ed52019-05-21 16:14:19 -0700705#else
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -0700706 (void)cpi;
707 rate = INT_MAX; // this will be overwritten later with block_yrd
708 dist = INT_MAX;
Fyodor Kyslov1951ed52019-05-21 16:14:19 -0700709#endif
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -0700710 *var_y = var;
711 *sse_y = sse;
712 x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX);
kyslove3c05a82019-03-28 11:06:09 -0700713
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -0700714 assert(rate >= 0);
kyslove3c05a82019-03-28 11:06:09 -0700715
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -0700716 if (skip_txfm_sb) *skip_txfm_sb = rate == 0;
717 if (skip_sse_sb) *skip_sse_sb = sse << 4;
718 rate = AOMMIN(rate, INT_MAX);
719 *out_rate_sum = (int)rate;
720 *out_dist_sum = dist;
kyslove3c05a82019-03-28 11:06:09 -0700721}
722
723static void block_yrd(AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col,
724 RD_STATS *this_rdc, int *skippable, int64_t *sse,
Fyodor Kyslov1951ed52019-05-21 16:14:19 -0700725 BLOCK_SIZE bsize, TX_SIZE tx_size) {
kyslove3c05a82019-03-28 11:06:09 -0700726 MACROBLOCKD *xd = &x->e_mbd;
727 const struct macroblockd_plane *pd = &xd->plane[0];
728 struct macroblock_plane *const p = &x->plane[0];
729 const int num_4x4_w = mi_size_wide[bsize];
730 const int num_4x4_h = mi_size_high[bsize];
731 const int step = 1 << (tx_size << 1);
732 const int block_step = (1 << tx_size);
Jerome Jiangc480d822019-05-01 18:30:37 -0700733 int block = 0;
kyslove3c05a82019-03-28 11:06:09 -0700734 const int max_blocks_wide =
735 num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> 5);
736 const int max_blocks_high =
737 num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> 5);
738 int eob_cost = 0;
739 const int bw = 4 * num_4x4_w;
740 const int bh = 4 * num_4x4_h;
741
742 assert(tx_size > 0 && tx_size <= 4);
743
744 (void)mi_row;
745 (void)mi_col;
kyslove3c05a82019-03-28 11:06:09 -0700746 (void)cpi;
747
748 aom_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
749 pd->dst.buf, pd->dst.stride);
750 *skippable = 1;
751 // Keep track of the row and column of the blocks we use so that we know
752 // if we are in the unrestricted motion border.
Jerome Jiangc480d822019-05-01 18:30:37 -0700753 for (int r = 0; r < max_blocks_high; r += block_step) {
754 for (int c = 0; c < num_4x4_w; c += block_step) {
kyslove3c05a82019-03-28 11:06:09 -0700755 if (c < max_blocks_wide) {
756 const SCAN_ORDER *const scan_order = &av1_default_scan_orders[tx_size];
757 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
758 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
759 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
760 uint16_t *const eob = &p->eobs[block];
761 const int diff_stride = bw;
762 const int16_t *src_diff;
763 src_diff = &p->src_diff[(r * diff_stride + c) << 2];
764
765 switch (tx_size) {
766 case TX_64X64:
767 assert(0); // Not implemented
768 break;
769 case TX_32X32:
770 aom_hadamard_32x32(src_diff, diff_stride, coeff);
771 av1_quantize_fp(coeff, 32 * 32, p->zbin_QTX, p->round_fp_QTX,
772 p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
773 dqcoeff, p->dequant_QTX, eob, scan_order->scan,
774 scan_order->iscan);
775 break;
776 case TX_16X16:
777 aom_hadamard_16x16(src_diff, diff_stride, coeff);
778 av1_quantize_fp(coeff, 16 * 16, p->zbin_QTX, p->round_fp_QTX,
779 p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
780 dqcoeff, p->dequant_QTX, eob, scan_order->scan,
781 scan_order->iscan);
782 break;
783 case TX_8X8:
784 aom_hadamard_8x8(src_diff, diff_stride, coeff);
785 av1_quantize_fp(coeff, 8 * 8, p->zbin_QTX, p->round_fp_QTX,
786 p->quant_fp_QTX, p->quant_shift_QTX, qcoeff,
787 dqcoeff, p->dequant_QTX, eob, scan_order->scan,
788 scan_order->iscan);
789 break;
790 default: assert(0); break;
791 }
792 *skippable &= (*eob == 0);
793 eob_cost += 1;
794 }
795 block += step;
796 }
797 }
Fyodor Kyslov1951ed52019-05-21 16:14:19 -0700798 this_rdc->skip = *skippable;
kyslove3c05a82019-03-28 11:06:09 -0700799 this_rdc->rate = 0;
800 if (*sse < INT64_MAX) {
801 *sse = (*sse << 6) >> 2;
802 if (*skippable) {
803 this_rdc->dist = *sse;
804 return;
805 }
806 }
807
808 block = 0;
809 this_rdc->dist = 0;
Jerome Jiangc480d822019-05-01 18:30:37 -0700810 for (int r = 0; r < max_blocks_high; r += block_step) {
811 for (int c = 0; c < num_4x4_w; c += block_step) {
kyslove3c05a82019-03-28 11:06:09 -0700812 if (c < max_blocks_wide) {
813 int64_t dummy;
814 tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
815 tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
816 tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
817 uint16_t *const eob = &p->eobs[block];
818
819 if (*eob == 1)
820 this_rdc->rate += (int)abs(qcoeff[0]);
821 else if (*eob > 1)
822 this_rdc->rate += aom_satd(qcoeff, step << 4);
823
824 this_rdc->dist +=
825 av1_block_error(coeff, dqcoeff, step << 4, &dummy) >> 2;
826 }
827 block += step;
828 }
829 }
830
831 // If skippable is set, rate gets clobbered later.
832 this_rdc->rate <<= (2 + AV1_PROB_COST_SHIFT);
833 this_rdc->rate += (eob_cost << AV1_PROB_COST_SHIFT);
834}
835
kyslov82449d12019-05-02 13:36:50 -0700836static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE pred_mode,
837 MV_REFERENCE_FRAME ref_frame0,
838 MV_REFERENCE_FRAME ref_frame1,
kyslove3c05a82019-03-28 11:06:09 -0700839 const AV1_COMMON *cm) {
840 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
kyslove3c05a82019-03-28 11:06:09 -0700841 mbmi->ref_mv_idx = 0;
kyslov82449d12019-05-02 13:36:50 -0700842 mbmi->mode = pred_mode;
kyslove3c05a82019-03-28 11:06:09 -0700843 mbmi->uv_mode = UV_DC_PRED;
kyslov82449d12019-05-02 13:36:50 -0700844 mbmi->ref_frame[0] = ref_frame0;
845 mbmi->ref_frame[1] = ref_frame1;
kyslove3c05a82019-03-28 11:06:09 -0700846 pmi->palette_size[0] = 0;
847 pmi->palette_size[1] = 0;
848 mbmi->filter_intra_mode_info.use_filter_intra = 0;
849 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
850 mbmi->motion_mode = SIMPLE_TRANSLATION;
851 mbmi->num_proj_ref = 1;
852 mbmi->interintra_mode = 0;
853 set_default_interp_filters(mbmi, cm->interp_filter);
854}
855
856static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
857 int mode_index) {
858 MACROBLOCKD *const xd = &x->e_mbd;
859
860 // Take a snapshot of the coding context so it can be
861 // restored if we decide to encode this way
Hui Sucff74442019-04-08 11:54:47 -0700862 ctx->rd_stats.skip = x->skip;
kyslove3c05a82019-03-28 11:06:09 -0700863 memcpy(ctx->blk_skip, x->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
864 ctx->skippable = x->skip;
865 ctx->best_mode_index = mode_index;
866 ctx->mic = *xd->mi[0];
867 ctx->mbmi_ext = *x->mbmi_ext;
868 ctx->comp_pred_diff = 0;
869 ctx->hybrid_pred_diff = 0;
870 ctx->single_pred_diff = 0;
871}
872
873static int get_pred_buffer(PRED_BUFFER *p, int len) {
Jerome Jiangc480d822019-05-01 18:30:37 -0700874 for (int i = 0; i < len; i++) {
kyslove3c05a82019-03-28 11:06:09 -0700875 if (!p[i].in_use) {
876 p[i].in_use = 1;
877 return i;
878 }
879 }
880 return -1;
881}
882
883static void free_pred_buffer(PRED_BUFFER *p) {
884 if (p != NULL) p->in_use = 0;
885}
886
887static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode,
888 int16_t mode_context) {
889 if (is_inter_compound_mode(mode)) {
890 return x
891 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
892 }
893
894 int mode_cost = 0;
895 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
896
897 assert(is_inter_mode(mode));
898
899 if (mode == NEWMV) {
900 mode_cost = x->newmv_mode_cost[mode_ctx][0];
901 return mode_cost;
902 } else {
903 mode_cost = x->newmv_mode_cost[mode_ctx][1];
904 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
905
906 if (mode == GLOBALMV) {
907 mode_cost += x->zeromv_mode_cost[mode_ctx][0];
908 return mode_cost;
909 } else {
910 mode_cost += x->zeromv_mode_cost[mode_ctx][1];
911 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
912 mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
913 return mode_cost;
914 }
915 }
916}
917
918static void newmv_diff_bias(MACROBLOCKD *xd, PREDICTION_MODE this_mode,
919 RD_STATS *this_rdc, BLOCK_SIZE bsize, int mv_row,
920 int mv_col, int is_last_frame) {
921 // Bias against MVs associated with NEWMV mode that are very different from
922 // top/left neighbors.
923 if (this_mode == NEWMV) {
924 int al_mv_average_row;
925 int al_mv_average_col;
926 int left_row, left_col;
927 int row_diff, col_diff;
928 int above_mv_valid = 0;
929 int left_mv_valid = 0;
930 int above_row = 0;
931 int above_col = 0;
932
933 if (xd->above_mbmi) {
934 above_mv_valid = xd->above_mbmi->mv[0].as_int != INVALID_MV;
935 above_row = xd->above_mbmi->mv[0].as_mv.row;
936 above_col = xd->above_mbmi->mv[0].as_mv.col;
937 }
938 if (xd->left_mbmi) {
939 left_mv_valid = xd->left_mbmi->mv[0].as_int != INVALID_MV;
940 left_row = xd->left_mbmi->mv[0].as_mv.row;
941 left_col = xd->left_mbmi->mv[0].as_mv.col;
942 }
943 if (above_mv_valid && left_mv_valid) {
944 al_mv_average_row = (above_row + left_row + 1) >> 1;
945 al_mv_average_col = (above_col + left_col + 1) >> 1;
946 } else if (above_mv_valid) {
947 al_mv_average_row = above_row;
948 al_mv_average_col = above_col;
949 } else if (left_mv_valid) {
950 al_mv_average_row = left_row;
951 al_mv_average_col = left_col;
952 } else {
953 al_mv_average_row = al_mv_average_col = 0;
954 }
Jerome Jiangc480d822019-05-01 18:30:37 -0700955 row_diff = al_mv_average_row - mv_row;
956 col_diff = al_mv_average_col - mv_col;
kyslove3c05a82019-03-28 11:06:09 -0700957 if (row_diff > 48 || row_diff < -48 || col_diff > 48 || col_diff < -48) {
958 if (bsize > BLOCK_32X32)
959 this_rdc->rdcost = this_rdc->rdcost << 1;
960 else
961 this_rdc->rdcost = 5 * this_rdc->rdcost >> 2;
962 }
963 }
964 if (bsize >= BLOCK_16X16 && is_last_frame && mv_row < 16 && mv_row > -16 &&
965 mv_col < 16 && mv_col > -16)
966 this_rdc->rdcost = 7 * (this_rdc->rdcost >> 3);
967}
968
kyslov82449d12019-05-02 13:36:50 -0700969struct estimate_block_intra_args {
970 AV1_COMP *cpi;
971 MACROBLOCK *x;
972 PREDICTION_MODE mode;
973 int skippable;
974 RD_STATS *rdc;
975};
976
977static void estimate_block_intra(int plane, int block, int row, int col,
978 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
979 void *arg) {
980 struct estimate_block_intra_args *const args = arg;
981 AV1_COMP *const cpi = args->cpi;
982 AV1_COMMON *const cm = &cpi->common;
983 MACROBLOCK *const x = args->x;
984 MACROBLOCKD *const xd = &x->e_mbd;
985 struct macroblock_plane *const p = &x->plane[plane];
986 struct macroblockd_plane *const pd = &xd->plane[plane];
987 uint8_t *const src_buf_base = p->src.buf;
988 uint8_t *const dst_buf_base = pd->dst.buf;
989 const int64_t src_stride = p->src.stride;
990 const int64_t dst_stride = pd->dst.stride;
991 RD_STATS this_rdc;
992
993 p->src.buf = &src_buf_base[4 * (row * src_stride + col)];
994 pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)];
995
996 const int stepr = tx_size_high_unit[tx_size];
997 const int stepc = tx_size_wide_unit[tx_size];
998 const int max_blocks_wide = max_block_wide(xd, block, 0);
999 const int max_blocks_high = max_block_high(xd, block, 0);
1000 // Prediction.
1001 for (int trow = 0; trow < max_blocks_high; trow += stepr) {
1002 for (int tcol = 0; tcol < max_blocks_wide; tcol += stepc) {
1003 av1_predict_intra_block_facade(cm, xd, 0, tcol, trow, tx_size);
1004 }
1005 }
1006
1007 assert(plane == 0);
1008
1009 if (plane == 0) {
1010 int64_t this_sse = INT64_MAX;
1011 block_yrd(cpi, x, 0, 0, &this_rdc, &args->skippable, &this_sse, plane_bsize,
Fyodor Kyslov1951ed52019-05-21 16:14:19 -07001012 AOMMIN(tx_size, TX_16X16));
1013 } else {
1014 return;
kyslov82449d12019-05-02 13:36:50 -07001015 }
1016
1017 p->src.buf = src_buf_base;
1018 pd->dst.buf = dst_buf_base;
1019 args->rdc->rate += this_rdc.rate;
1020 args->rdc->dist += this_rdc.dist;
1021}
1022
kyslove3c05a82019-03-28 11:06:09 -07001023void av1_fast_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
1024 MACROBLOCK *x, int mi_row, int mi_col,
1025 RD_STATS *rd_cost, BLOCK_SIZE bsize,
1026 PICK_MODE_CONTEXT *ctx,
1027 int64_t best_rd_so_far) {
1028 AV1_COMMON *const cm = &cpi->common;
1029 MACROBLOCKD *const xd = &x->e_mbd;
1030 MB_MODE_INFO *const mi = xd->mi[0];
1031 struct macroblockd_plane *const pd = &xd->plane[0];
1032
1033 BEST_PICKMODE best_pickmode;
1034 int inter_mode_mask[BLOCK_SIZES];
1035
1036 MV_REFERENCE_FRAME ref_frame;
1037 MV_REFERENCE_FRAME usable_ref_frame, second_ref_frame;
1038 int_mv frame_mv[MB_MODE_COUNT][REF_FRAMES];
1039 uint8_t mode_checked[MB_MODE_COUNT][REF_FRAMES];
1040 struct buf_2d yv12_mb[6][MAX_MB_PLANE];
1041 static const int flag_list[5] = { 0, AOM_LAST_FLAG, AOM_LAST2_FLAG,
1042 AOM_LAST3_FLAG, AOM_GOLD_FLAG };
1043 RD_STATS this_rdc, best_rdc;
1044 // var_y and sse_y are saved to be used in skipping checking
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -07001045 unsigned int sse_y = UINT_MAX;
1046 unsigned int var_y = UINT_MAX;
kyslove3c05a82019-03-28 11:06:09 -07001047 const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize];
1048 const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
1049
1050 InterpFilter filter_ref;
1051 int const_motion[REF_FRAMES] = { 0 };
1052 int ref_frame_skip_mask = 0;
kyslove3c05a82019-03-28 11:06:09 -07001053 int best_pred_sad = INT_MAX;
1054 int best_early_term = 0;
Jerome Jiang037f5d22019-05-02 19:32:42 -07001055 unsigned int ref_costs_single[REF_FRAMES],
1056 ref_costs_comp[REF_FRAMES][REF_FRAMES];
kyslove3c05a82019-03-28 11:06:09 -07001057 int use_golden_nonzeromv = 1;
1058 int force_skip_low_temp_var = 0;
1059 int skip_ref_find_pred[5] = { 0 };
1060 int64_t best_sse_sofar = INT64_MAX;
1061 int gf_temporal_ref = 0;
1062 const struct segmentation *const seg = &cm->seg;
1063 int comp_modes = 0;
1064 int num_inter_modes = RT_INTER_MODES;
1065 unsigned char segment_id = mi->segment_id;
1066 InterpFilter best_filter = EIGHTTAP_REGULAR;
1067 PRED_BUFFER tmp[4];
1068 DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64]);
1069 PRED_BUFFER *this_mode_pred = NULL;
1070 const int reuse_inter_pred = cpi->sf.reuse_inter_pred_nonrd;
1071 const int bh = block_size_high[bsize];
1072 const int bw = block_size_wide[bsize];
1073 const int pixels_in_block = bh * bw;
1074 struct buf_2d orig_dst = pd->dst;
1075
kyslov82449d12019-05-02 13:36:50 -07001076 const int intra_cost_penalty = av1_get_intra_cost_penalty(
1077 cm->base_qindex, cm->y_dc_delta_q, cm->seq_params.bit_depth);
1078 const int64_t inter_mode_thresh = RDCOST(x->rdmult, intra_cost_penalty, 0);
1079 const int perform_intra_pred = cpi->sf.check_intra_pred_nonrd;
1080
kyslove3c05a82019-03-28 11:06:09 -07001081 (void)best_rd_so_far;
1082
1083 init_best_pickmode(&best_pickmode);
1084
1085 for (int i = 0; i < BLOCK_SIZES; ++i) inter_mode_mask[i] = INTER_ALL;
1086
1087 // TODO(kyslov) Move this to Speed Features
1088 inter_mode_mask[BLOCK_128X128] = INTER_NEAREST_NEAR;
1089
1090 x->source_variance = UINT_MAX;
1091
1092 struct scale_factors *const sf_last = get_ref_scale_factors(cm, LAST_FRAME);
1093 struct scale_factors *const sf_golden =
1094 get_ref_scale_factors(cm, GOLDEN_FRAME);
1095 gf_temporal_ref = 1;
1096 // For temporal long term prediction, check that the golden reference
1097 // is same scale as last reference, otherwise disable.
1098 if ((sf_last->x_scale_fp != sf_golden->x_scale_fp) ||
1099 (sf_last->y_scale_fp != sf_golden->y_scale_fp)) {
1100 gf_temporal_ref = 0;
1101 }
1102
1103 av1_collect_neighbors_ref_counts(xd);
1104 av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col);
1105
Jerome Jiang906a94f2019-05-01 19:02:58 -07001106 estimate_single_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single);
Jerome Jiang037f5d22019-05-02 19:32:42 -07001107 if (cpi->sf.use_comp_ref_nonrd)
1108 estimate_comp_ref_frame_costs(cm, xd, x, segment_id, ref_costs_comp);
kyslove3c05a82019-03-28 11:06:09 -07001109
1110 memset(&mode_checked[0][0], 0, MB_MODE_COUNT * REF_FRAMES);
1111 if (reuse_inter_pred) {
Jerome Jiangc480d822019-05-01 18:30:37 -07001112 for (int i = 0; i < 3; i++) {
kyslove3c05a82019-03-28 11:06:09 -07001113 tmp[i].data = &pred_buf[pixels_in_block * i];
1114 tmp[i].stride = bw;
1115 tmp[i].in_use = 0;
1116 }
1117 tmp[3].data = pd->dst.buf;
1118 tmp[3].stride = pd->dst.stride;
1119 tmp[3].in_use = 0;
1120 }
1121
1122 x->skip = 0;
1123
1124 // Instead of using av1_get_pred_context_switchable_interp(xd) to assign
1125 // filter_ref, we use a less strict condition on assigning filter_ref.
1126 // This is to reduce the probabily of entering the flow of not assigning
1127 // filter_ref and then skip filter search.
1128 filter_ref = cm->interp_filter;
1129
1130 // initialize mode decisions
1131 av1_invalid_rd_stats(&best_rdc);
kyslovad34be72019-05-03 14:04:02 -07001132 av1_invalid_rd_stats(&this_rdc);
kyslove3c05a82019-03-28 11:06:09 -07001133 av1_invalid_rd_stats(rd_cost);
1134 mi->sb_type = bsize;
1135 mi->ref_frame[0] = NONE_FRAME;
1136 mi->ref_frame[1] = NONE_FRAME;
1137
kyslove3c05a82019-03-28 11:06:09 -07001138// TODO(kyslov) Refine logic of selecting REF FRAME SET.
1139// For now only LAST_FRAME is used
1140#if 0
1141 if (cpi->rc.frames_since_golden == 0 && gf_temporal_ref) {
1142 usable_ref_frame = LAST_FRAME;
1143 } else {
1144 usable_ref_frame = GOLDEN_FRAME;
1145 }
1146
1147 force_skip_low_temp_var = get_force_skip_low_temp_var(&x->variance_low[0],
1148 mi_row, mi_col, bsize);
1149 // If force_skip_low_temp_var is set, and for short circuit mode = 1 and 3,
1150 // skip golden reference.
1151 if (force_skip_low_temp_var) {
1152 usable_ref_frame = LAST_FRAME;
1153 }
1154
1155 if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
1156 !force_skip_low_temp_var))
1157 use_golden_nonzeromv = 0;
1158
1159 // If the segment reference frame feature is enabled and it's set to GOLDEN
1160 // reference, then make sure we don't skip checking GOLDEN, this is to
1161 // prevent possibility of not picking any mode.
1162 if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
1163 get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) == GOLDEN_FRAME) {
1164 usable_ref_frame = GOLDEN_FRAME;
1165 skip_ref_find_pred[GOLDEN_FRAME] = 0;
1166 }
1167#endif
1168 usable_ref_frame = LAST_FRAME;
1169
Jerome Jiangc480d822019-05-01 18:30:37 -07001170 for (MV_REFERENCE_FRAME ref_frame_iter = LAST_FRAME;
1171 ref_frame_iter <= usable_ref_frame; ++ref_frame_iter) {
kyslove3c05a82019-03-28 11:06:09 -07001172 // Skip find_predictor if the reference frame is not in the
1173 // ref_frame_flags (i.e., not used as a reference for this frame).
Jerome Jiangc480d822019-05-01 18:30:37 -07001174 skip_ref_find_pred[ref_frame_iter] =
1175 !(cpi->ref_frame_flags & flag_list[ref_frame_iter]);
1176 if (!skip_ref_find_pred[ref_frame_iter]) {
1177 find_predictors(cpi, x, ref_frame_iter, frame_mv, const_motion,
kyslove3c05a82019-03-28 11:06:09 -07001178 &ref_frame_skip_mask, flag_list, tile_data, mi_row,
1179 mi_col, yv12_mb, bsize, force_skip_low_temp_var,
1180 comp_modes > 0);
1181 }
1182 }
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -07001183 const int large_block = bsize >= BLOCK_32X32;
1184 const int use_model_yrd_large =
1185 cpi->oxcf.rc_mode == AOM_CBR && large_block &&
1186 !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
1187 cm->base_qindex;
kyslove3c05a82019-03-28 11:06:09 -07001188
Jerome Jiangc480d822019-05-01 18:30:37 -07001189 for (int idx = 0; idx < num_inter_modes; ++idx) {
kyslove3c05a82019-03-28 11:06:09 -07001190 int rate_mv = 0;
1191 int mode_rd_thresh;
1192 int mode_index;
Fyodor Kyslov1951ed52019-05-21 16:14:19 -07001193#if !_TMP_USE_CURVFIT_
kyslove3c05a82019-03-28 11:06:09 -07001194 int64_t this_sse;
1195 int is_skippable;
Fyodor Kyslov1951ed52019-05-21 16:14:19 -07001196#endif
kyslove3c05a82019-03-28 11:06:09 -07001197 int this_early_term = 0;
kyslove3c05a82019-03-28 11:06:09 -07001198 int skip_this_mv = 0;
1199 int comp_pred = 0;
1200 int force_mv_inter_layer = 0;
1201 PREDICTION_MODE this_mode;
1202 MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1203 second_ref_frame = NONE_FRAME;
1204
1205 this_mode = ref_mode_set[idx].pred_mode;
1206 ref_frame = ref_mode_set[idx].ref_frame;
kyslov82449d12019-05-02 13:36:50 -07001207 init_mbmi(mi, this_mode, ref_frame, NONE_FRAME, cm);
kyslove3c05a82019-03-28 11:06:09 -07001208
1209 mi->tx_size = AOMMIN(AOMMIN(max_txsize_lookup[bsize],
1210 tx_mode_to_biggest_tx_size[cm->tx_mode]),
1211 TX_16X16);
1212 memset(mi->inter_tx_size, mi->tx_size, sizeof(mi->inter_tx_size));
1213 memset(mi->txk_type, DCT_DCT, sizeof(mi->txk_type[0]) * TXK_TYPE_BUF_LEN);
1214 av1_zero(x->blk_skip);
1215
1216 if (ref_frame > usable_ref_frame) continue;
1217 if (skip_ref_find_pred[ref_frame]) continue;
1218
1219 // If the segment reference frame feature is enabled then do nothing if the
1220 // current ref frame is not allowed.
1221 if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
1222 get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
1223 continue;
1224
1225 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
1226
kyslove3c05a82019-03-28 11:06:09 -07001227 if (!(inter_mode_mask[bsize] & (1 << this_mode))) continue;
1228
1229 if (const_motion[ref_frame] && this_mode == NEARMV) continue;
1230
1231 // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
1232 // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
1233 // later.
1234 if (!force_mv_inter_layer && force_skip_low_temp_var &&
1235 ref_frame == GOLDEN_FRAME &&
1236 frame_mv[this_mode][ref_frame].as_int != 0) {
1237 continue;
1238 }
1239
1240// TODO(kyslov) Refine logic of pruning reference .
1241// For now only LAST_FRAME is used
1242#if 0
1243 if (x->content_state_sb != kVeryHighSad &&
1244 (cpi->sf.short_circuit_low_temp_var >= 2 ||
1245 (cpi->sf.short_circuit_low_temp_var == 1 && bsize == BLOCK_64X64))
1246 && force_skip_low_temp_var && ref_frame == LAST_FRAME && this_mode ==
1247 NEWMV) { continue;
1248 }
1249
1250 // Disable this drop out case if the ref frame segment level feature is
1251 // enabled for this segment. This is to prevent the possibility that we
1252 end
1253 // up unable to pick any mode.
1254 if (!segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) {
1255 if (sf->reference_masking &&
1256 !(frame_mv[this_mode][ref_frame].as_int == 0 &&
1257 ref_frame == LAST_FRAME)) {
1258 if (usable_ref_frame < ALTREF_FRAME) {
1259 if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
1260 i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
1261 if ((cpi->ref_frame_flags & flag_list[i]))
1262 if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
1263 ref_frame_skip_mask |= (1 << ref_frame);
1264 }
1265 } else if (!cpi->rc.is_src_frame_alt_ref &&
1266 !(frame_mv[this_mode][ref_frame].as_int == 0 &&
1267 ref_frame == ALTREF_FRAME)) {
1268 int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME :
1269 GOLDEN_FRAME; int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME :
1270 ALTREF_FRAME; if (((cpi->ref_frame_flags & flag_list[ref1]) &&
1271 (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
1272 ((cpi->ref_frame_flags & flag_list[ref2]) &&
1273 (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
1274 ref_frame_skip_mask |= (1 << ref_frame);
1275 }
1276 }
1277 if (ref_frame_skip_mask & (1 << ref_frame)) continue;
1278 }
1279#endif
1280
1281 // Select prediction reference frames.
Jerome Jiangc480d822019-05-01 18:30:37 -07001282 for (int i = 0; i < MAX_MB_PLANE; i++) {
kyslove3c05a82019-03-28 11:06:09 -07001283 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
1284 }
1285
1286 mi->ref_frame[0] = ref_frame;
1287 mi->ref_frame[1] = second_ref_frame;
1288 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
1289
1290 mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
1291 mode_rd_thresh = best_pickmode.best_mode_skip_txfm
1292 ? rd_threshes[mode_index] << 1
1293 : rd_threshes[mode_index];
1294
kyslov82449d12019-05-02 13:36:50 -07001295 // Increase mode_rd_thresh value for GOLDEN_FRAME for improved encoding
1296 // speed
1297 if (ref_frame == GOLDEN_FRAME && cpi->rc.frames_since_golden > 4)
1298 mode_rd_thresh = mode_rd_thresh << 3;
1299
kyslove3c05a82019-03-28 11:06:09 -07001300 if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
1301 rd_thresh_freq_fact[mode_index]))
1302 if (frame_mv[this_mode][ref_frame].as_int != 0) continue;
1303
1304 if (this_mode == NEWMV && !force_mv_inter_layer) {
1305 if (search_new_mv(cpi, x, frame_mv, ref_frame, gf_temporal_ref, bsize,
1306 mi_row, mi_col, best_pred_sad, &rate_mv, best_sse_sofar,
1307 &best_rdc))
1308 continue;
1309 }
1310
Jerome Jiangc480d822019-05-01 18:30:37 -07001311 for (PREDICTION_MODE inter_mv_mode = NEARESTMV; inter_mv_mode <= NEWMV;
1312 inter_mv_mode++) {
kyslove3c05a82019-03-28 11:06:09 -07001313 if (inter_mv_mode == this_mode || comp_pred) continue;
1314 if (mode_checked[inter_mv_mode][ref_frame] &&
1315 frame_mv[this_mode][ref_frame].as_int ==
1316 frame_mv[inter_mv_mode][ref_frame].as_int &&
1317 frame_mv[inter_mv_mode][ref_frame].as_int == 0) {
1318 skip_this_mv = 1;
1319 break;
1320 }
1321 }
1322
1323 if (skip_this_mv) continue;
1324
1325 // If use_golden_nonzeromv is false, NEWMV mode is skipped for golden, no
1326 // need to compute best_pred_sad which is only used to skip golden NEWMV.
1327 if (use_golden_nonzeromv && this_mode == NEWMV && ref_frame == LAST_FRAME &&
1328 frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) {
1329 const int pre_stride = xd->plane[0].pre[0].stride;
1330 const uint8_t *const pre_buf =
1331 xd->plane[0].pre[0].buf +
1332 (frame_mv[NEWMV][LAST_FRAME].as_mv.row >> 3) * pre_stride +
1333 (frame_mv[NEWMV][LAST_FRAME].as_mv.col >> 3);
1334 best_pred_sad = cpi->fn_ptr[bsize].sdf(
1335 x->plane[0].src.buf, x->plane[0].src.stride, pre_buf, pre_stride);
1336 x->pred_mv_sad[LAST_FRAME] = best_pred_sad;
1337 }
1338
1339 if (this_mode != NEARESTMV && !comp_pred &&
1340 frame_mv[this_mode][ref_frame].as_int ==
1341 frame_mv[NEARESTMV][ref_frame].as_int)
1342 continue;
1343
1344 mi->mode = this_mode;
1345 mi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int;
1346 mi->mv[1].as_int = 0;
1347 if (reuse_inter_pred) {
1348 if (!this_mode_pred) {
1349 this_mode_pred = &tmp[3];
1350 } else {
1351 this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
1352 pd->dst.buf = this_mode_pred->data;
1353 pd->dst.stride = bw;
1354 }
1355 }
1356
1357 // TODO(kyslov) bring back filter search
Sachin Kumar Garg22258532019-06-18 16:45:06 +05301358 mi->interp_filters = (filter_ref == SWITCHABLE)
1359 ? av1_broadcast_interp_filter(EIGHTTAP_REGULAR)
1360 : av1_broadcast_interp_filter(filter_ref);
kyslove3c05a82019-03-28 11:06:09 -07001361 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
1362 AOM_PLANE_Y, AOM_PLANE_Y);
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -07001363#if !_TMP_USE_CURVFIT_
1364 if (use_model_yrd_large) {
1365 model_skip_for_sb_y_large(cpi, bsize, x, xd, &var_y, &sse_y,
1366 &this_early_term);
1367 } else {
1368#endif
1369 model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
1370 &this_rdc.skip, NULL, &var_y, &sse_y);
1371#if !_TMP_USE_CURVFIT_
1372 }
1373#endif
kyslove3c05a82019-03-28 11:06:09 -07001374
1375 if (sse_y < best_sse_sofar) best_sse_sofar = sse_y;
1376
1377 const int skip_ctx = av1_get_skip_context(xd);
1378 const int skip_cost = x->skip_cost[skip_ctx][1];
Fyodor Kyslov1951ed52019-05-21 16:14:19 -07001379 const int no_skip_cost = x->skip_cost[skip_ctx][0];
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -07001380 if (!this_early_term) {
Fyodor Kyslov1951ed52019-05-21 16:14:19 -07001381#if !_TMP_USE_CURVFIT_
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -07001382 this_sse = (int64_t)sse_y;
1383 block_yrd(cpi, x, mi_row, mi_col, &this_rdc, &is_skippable, &this_sse,
1384 bsize, mi->tx_size);
Fyodor Kyslov1951ed52019-05-21 16:14:19 -07001385#endif
kyslove3c05a82019-03-28 11:06:09 -07001386
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -07001387 x->skip = this_rdc.skip;
1388 if (this_rdc.skip) {
kyslove3c05a82019-03-28 11:06:09 -07001389 this_rdc.rate = skip_cost;
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -07001390 } else {
1391#if !_TMP_USE_CURVFIT_
1392 // on CurvFit this condition is checked inside curvfit modeling
1393 if (RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist) >=
1394 RDCOST(
1395 x->rdmult, 0,
1396 this_sse)) { // this_sse already multiplied by 16 in block_yrd
1397 x->skip = 1;
1398 this_rdc.rate = skip_cost;
1399 this_rdc.dist = this_sse;
1400 } else
Fyodor Kyslov1951ed52019-05-21 16:14:19 -07001401#endif
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -07001402 {
1403 this_rdc.rate += no_skip_cost;
1404 }
kyslove3c05a82019-03-28 11:06:09 -07001405 }
Fyodor Kyslov7cfbaa72019-05-31 13:37:29 -07001406 } else {
1407 x->skip = 1;
1408 this_rdc.rate = skip_cost;
1409 this_rdc.dist = sse_y << 4;
kyslove3c05a82019-03-28 11:06:09 -07001410 }
1411
1412 // TODO(kyslov) account for UV prediction cost
kyslove3c05a82019-03-28 11:06:09 -07001413 this_rdc.rate += rate_mv;
1414 const int16_t mode_ctx =
1415 av1_mode_context_analyzer(mbmi_ext->mode_context, mi->ref_frame);
1416 this_rdc.rate += cost_mv_ref(x, this_mode, mode_ctx);
1417
1418 this_rdc.rate += ref_costs_single[ref_frame];
1419
1420 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
1421 if (cpi->oxcf.rc_mode == AOM_CBR && cpi->oxcf.speed >= 8) {
1422 newmv_diff_bias(xd, this_mode, &this_rdc, bsize,
1423 frame_mv[this_mode][ref_frame].as_mv.row,
1424 frame_mv[this_mode][ref_frame].as_mv.col,
1425 ref_frame == LAST_FRAME);
1426 }
1427
1428 mode_checked[this_mode][ref_frame] = 1;
1429
1430 if (this_rdc.rdcost < best_rdc.rdcost) {
1431 best_rdc = this_rdc;
1432 best_early_term = this_early_term;
1433 best_pickmode.best_mode = this_mode;
1434 best_pickmode.best_pred_filter = mi->interp_filters;
1435 best_pickmode.best_tx_size = mi->tx_size;
1436 best_pickmode.best_ref_frame = ref_frame;
1437 best_pickmode.best_mode_skip_txfm = x->skip;
1438 best_pickmode.best_second_ref_frame = second_ref_frame;
1439 if (reuse_inter_pred) {
1440 free_pred_buffer(best_pickmode.best_pred);
1441 best_pickmode.best_pred = this_mode_pred;
1442 }
1443 } else {
1444 if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
1445 }
1446 if (best_early_term && idx > 0) {
1447 x->skip = 1;
1448 break;
1449 }
1450 }
1451
1452 mi->mode = best_pickmode.best_mode;
1453 mi->interp_filters = av1_broadcast_interp_filter(best_filter);
1454 mi->tx_size = best_pickmode.best_tx_size;
Fyodor Kyslov4dfdb5c2019-05-24 11:06:56 -07001455 memset(mi->inter_tx_size, mi->tx_size, sizeof(mi->inter_tx_size));
kyslove3c05a82019-03-28 11:06:09 -07001456 mi->ref_frame[0] = best_pickmode.best_ref_frame;
1457 mi->mv[0].as_int =
1458 frame_mv[best_pickmode.best_mode][best_pickmode.best_ref_frame].as_int;
1459 mi->ref_frame[1] = best_pickmode.best_second_ref_frame;
kyslov82449d12019-05-02 13:36:50 -07001460
1461 // Perform intra prediction search, if the best SAD is above a certain
1462 // threshold.
1463 mi->angle_delta[PLANE_TYPE_Y] = 0;
1464 mi->angle_delta[PLANE_TYPE_UV] = 0;
1465 mi->filter_intra_mode_info.use_filter_intra = 0;
1466 // TODO(kyslov@) Need to adjust inter_mode_thresh
1467 if (best_rdc.rdcost == INT64_MAX ||
1468 (perform_intra_pred && !x->skip && best_rdc.rdcost > inter_mode_thresh &&
1469 bsize <= cpi->sf.max_intra_bsize)) {
1470 struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
1471 PRED_BUFFER *const best_pred = best_pickmode.best_pred;
1472 TX_SIZE intra_tx_size =
1473 AOMMIN(AOMMIN(max_txsize_lookup[bsize],
1474 tx_mode_to_biggest_tx_size[cpi->common.tx_mode]),
1475 TX_16X16);
1476
1477 if (reuse_inter_pred && best_pred != NULL) {
1478 if (best_pred->data == orig_dst.buf) {
1479 this_mode_pred = &tmp[get_pred_buffer(tmp, 3)];
1480 aom_convolve_copy(best_pred->data, best_pred->stride,
1481 this_mode_pred->data, this_mode_pred->stride, 0, 0, 0,
1482 0, bw, bh);
1483 best_pickmode.best_pred = this_mode_pred;
1484 }
1485 }
1486 pd->dst = orig_dst;
1487
1488 for (int i = 0; i < 4; ++i) {
1489 const PREDICTION_MODE this_mode = intra_mode_list[i];
1490 const THR_MODES mode_index =
1491 mode_idx[INTRA_FRAME][mode_offset(this_mode)];
1492 const int mode_rd_thresh = rd_threshes[mode_index];
1493
1494 if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
1495 rd_thresh_freq_fact[mode_index])) {
1496 continue;
1497 }
1498
1499 mi->mode = this_mode;
1500 mi->ref_frame[0] = INTRA_FRAME;
1501 mi->ref_frame[1] = NONE_FRAME;
1502
1503 this_rdc.dist = this_rdc.rate = 0;
1504 args.mode = this_mode;
1505 args.skippable = 1;
1506 args.rdc = &this_rdc;
1507 mi->tx_size = intra_tx_size;
1508 av1_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra,
1509 &args);
1510 // TODO(kyslov@) Need to account for skippable
1511 int mode_cost = 0;
1512 if (av1_is_directional_mode(this_mode) && av1_use_angle_delta(bsize)) {
1513 mode_cost += x->angle_delta_cost[this_mode - V_PRED]
1514 [MAX_ANGLE_DELTA +
1515 mi->angle_delta[PLANE_TYPE_Y]];
1516 }
1517 if (this_mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) {
1518 mode_cost += x->filter_intra_cost[bsize][0];
1519 }
1520 this_rdc.rate += ref_costs_single[INTRA_FRAME];
1521 this_rdc.rate += intra_cost_penalty;
1522 this_rdc.rate += mode_cost;
1523 this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
1524
1525 if (this_rdc.rdcost < best_rdc.rdcost) {
1526 best_rdc = this_rdc;
1527 best_pickmode.best_mode = this_mode;
1528 best_pickmode.best_intra_tx_size = mi->tx_size;
1529 best_pickmode.best_ref_frame = INTRA_FRAME;
1530 best_pickmode.best_second_ref_frame = NONE_FRAME;
1531 mi->uv_mode = this_mode;
1532 mi->mv[0].as_int = INVALID_MV;
1533 mi->mv[1].as_int = INVALID_MV;
1534 }
1535 }
1536
1537 // Reset mb_mode_info to the best inter mode.
1538 if (best_pickmode.best_ref_frame != INTRA_FRAME) {
1539 mi->tx_size = best_pickmode.best_tx_size;
1540 } else {
1541 mi->tx_size = best_pickmode.best_intra_tx_size;
1542 }
1543 }
1544
kyslove3c05a82019-03-28 11:06:09 -07001545 pd->dst = orig_dst;
kyslov82449d12019-05-02 13:36:50 -07001546 mi->mode = best_pickmode.best_mode;
1547 mi->ref_frame[0] = best_pickmode.best_ref_frame;
1548 mi->ref_frame[1] = best_pickmode.best_second_ref_frame;
1549
1550 if (!is_inter_block(mi)) {
1551 mi->interp_filters = av1_broadcast_interp_filter(SWITCHABLE_FILTERS);
1552 }
1553
kyslove3c05a82019-03-28 11:06:09 -07001554 if (reuse_inter_pred && best_pickmode.best_pred != NULL) {
1555 PRED_BUFFER *const best_pred = best_pickmode.best_pred;
1556 if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) {
1557 aom_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
1558 pd->dst.stride, 0, 0, 0, 0, bw, bh);
1559 }
1560 }
1561
1562 store_coding_context(x, ctx, mi->mode);
1563 *rd_cost = best_rdc;
1564}